From 82eedde3dd74b5c50026c102ca3cc5cb825116a8 Mon Sep 17 00:00:00 2001 From: hyungkishin Date: Sat, 18 Oct 2025 06:51:17 +0900 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20transfer-relay=20=EB=A9=80=ED=8B=B0?= =?UTF-8?q?=20=EC=9D=B8=EC=8A=A4=ED=84=B4=EC=8A=A4=EA=B0=80=20=EC=95=84?= =?UTF-8?q?=EB=8B=8C,=20=EB=8B=A8=EC=9D=BC=20=EC=8A=A4=EB=A0=88=EB=93=9C?= =?UTF-8?q?=EC=97=90=EC=84=9C=20multiThread=20=EB=B0=A9=EC=8B=9D=EC=9C=BC?= =?UTF-8?q?=EB=A1=9C=20=EA=B0=9C=EC=84=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../transfer/application/build.gradle.kts | 22 +- .../application/TransactionService.kt | 70 ++- .../handler/TransferOutboxEventHandler.kt | 25 +- .../application/mapper/OutboxEventMapper.kt | 41 ++ .../port/TransferEventPublisher.kt | 15 + .../TransferEventsOutboxRepository.kt | 27 +- .../adapter/KafkaTransferEventPublisher.kt | 63 +++ .../adapter/HybridFdsEventPublisherAdapter.kt | 124 ------ .../TransferEventsOutboxJdbcRepository.kt | 74 +-- .../resources/db/migration/V1__init_table.sql | 384 +++++++--------- .../transentia/relay/TransferOutboxRelay.kt | 420 +++--------------- .../relay/component/EventBatchProcessor.kt | 144 ------ .../relay/component/OutboxEventProcessor.kt | 38 ++ ...EventPublisher.kt => OutboxEventWriter.kt} | 23 +- .../relay/component/OutboxItemReader.kt | 65 +++ .../relay/component/OutboxWorker.kt | 56 +++ .../relay/component/RetryPolicyHandler.kt | 42 -- .../relay/config/ExecutorServiceConfig.kt | 7 +- .../relay/config/OutboxRelayConfig.kt | 126 +----- .../src/main/resources/application-peak.yml | 6 +- .../src/main/resources/application.yml | 23 +- .../TransferOutboxRelayIntegrationTest.kt | 281 ------------ .../TransferOutboxRelayPerformanceTest.kt | 271 ----------- .../relay/TransferOutboxRelayTest.kt | 284 ------------ .../src/test/resources/application-test.yml | 39 +- 25 files changed, 641 insertions(+), 2029 deletions(-) create mode 100644 services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/mapper/OutboxEventMapper.kt create mode 100644 services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/port/TransferEventPublisher.kt create mode 100644 services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/adapter/KafkaTransferEventPublisher.kt delete mode 100644 services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/rdb/adapter/HybridFdsEventPublisherAdapter.kt delete mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/EventBatchProcessor.kt create mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxEventProcessor.kt rename services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/{KafkaEventPublisher.kt => OutboxEventWriter.kt} (50%) create mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxItemReader.kt create mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxWorker.kt delete mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/RetryPolicyHandler.kt delete mode 100644 services/transfer/instances/transfer-relay/src/test/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelayPerformanceTest.kt diff --git a/services/transfer/application/build.gradle.kts b/services/transfer/application/build.gradle.kts index d223143..828c149 100644 --- a/services/transfer/application/build.gradle.kts +++ b/services/transfer/application/build.gradle.kts @@ -1,30 +1,10 @@ -//plugins { -// kotlin("jvm") -// kotlin("plugin.spring") -//} -// -//dependencies { -// implementation(project(":transfer-domain")) -// implementation(project(":common-application")) -// implementation(project(":common-domain")) -// -// implementation("org.springframework:spring-context") -// implementation("org.springframework:spring-tx") -// -// testImplementation("io.kotest:kotest-runner-junit5") -// testImplementation("io.kotest:kotest-assertions-core") -//} - plugins { id("transentia.spring-library") } dependencies { - // 프로젝트 의존성 implementation(project(":transfer-domain")) implementation(project(":common-application")) implementation(project(":common-domain")) - - // 특화된 의존성 (있다면 추가) - // 예: implementation("org.springframework.retry:spring-retry") + implementation(project(":kafka-model")) } diff --git a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/TransactionService.kt b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/TransactionService.kt index 3044c56..a0fc6cf 100644 --- a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/TransactionService.kt +++ b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/TransactionService.kt @@ -1,66 +1,97 @@ package io.github.hyungkishin.transentia.application +import io.github.hyungkishin.transentia.application.mapper.OutboxEventMapper import io.github.hyungkishin.transentia.application.provided.TransactionRegister import io.github.hyungkishin.transentia.application.provided.command.TransferRequestCommand import io.github.hyungkishin.transentia.application.required.TransactionRepository +import io.github.hyungkishin.transentia.application.required.TransferEventsOutboxRepository import io.github.hyungkishin.transentia.application.required.UserRepository import io.github.hyungkishin.transentia.application.required.command.TransferResponseCommand import io.github.hyungkishin.transentia.common.error.CommonError import io.github.hyungkishin.transentia.common.error.DomainException +import io.github.hyungkishin.transentia.common.message.transfer.TransferCompleted import io.github.hyungkishin.transentia.common.snowflake.IdGenerator import io.github.hyungkishin.transentia.common.snowflake.SnowFlakeId import io.github.hyungkishin.transentia.container.model.transaction.Transaction import io.github.hyungkishin.transentia.container.validator.transfer.TransferValidator +import org.slf4j.LoggerFactory import org.springframework.context.ApplicationEventPublisher import org.springframework.stereotype.Service import org.springframework.transaction.annotation.Transactional +import java.time.Instant @Service class TransactionService( private val transactionRepository: TransactionRepository, private val userRepository: UserRepository, - private val transactionHistoryService: TransactionHistoryService, + private val outboxRepository: TransferEventsOutboxRepository, + private val outboxEventMapper: OutboxEventMapper, private val idGenerator: IdGenerator, private val eventPublisher: ApplicationEventPublisher, ) : TransactionRegister { + private val log = LoggerFactory.getLogger(javaClass) + @Transactional override fun createTransfer(command: TransferRequestCommand): TransferResponseCommand { - val sender = userRepository.findById(command.senderId) ?: throw DomainException( - CommonError.NotFound("account_balance", command.senderId.toString()), - "송신자 정보를 찾을 수 없습니다. senderId=${command.senderId}" - ) - - val receiver = userRepository.findByAccountNumber(command.receiverAccountNumber) ?: throw DomainException( - CommonError.NotFound("account_balance", command.receiverAccountNumber.toString()), - "수신자 계좌 정보를 찾을 수 없습니다. snowFlakeId=${command.receiverAccountNumber}" - ) + val (sender, receiver) = loadUsers(command) + val amount = command.amount() - // TODO: - 테스트의 용이성과 확장성 / 재사용성 검증하기 - TransferValidator.validate(sender, receiver, command.amount()) + TransferValidator.validate(sender, receiver, amount) val transaction = Transaction.of( SnowFlakeId(idGenerator.nextId()), sender.id, receiver.id, - command.amount() + amount ) - sender.accountBalance.withdrawOrThrow(command.amount()) - receiver.accountBalance.deposit(command.amount()) - + sender.accountBalance.withdrawOrThrow(amount) + receiver.accountBalance.deposit(amount) userRepository.save(sender) userRepository.save(receiver) - val completeEvent = transaction.complete() val savedTransaction = transactionRepository.save(transaction) - // TODO: outbox ( kafka publish ) + relay 서버를 fadeout 하고, CDC 방식으로 전환. + val completeEvent = transaction.complete() + + // outbox 먼저 저장 + saveToOutbox(completeEvent, savedTransaction.id.value) + + // 이벤트 발행 (커밋 후 별도 스레드에서 Kafka 전송) eventPublisher.publishEvent(completeEvent) return TransferResponseCommand.from(savedTransaction) } + private fun saveToOutbox(event: TransferCompleted, transactionId: Long) { + try { + val outboxEvent = outboxEventMapper.toOutboxEvent(event, transactionId) + outboxRepository.save(outboxEvent, Instant.now()) + } catch (e: Exception) { + throw DomainException( + CommonError.Conflict("outbox_save_failed"), + "송금 처리 중 시스템 오류가 발생했습니다.", + e + ) + } + } + + private fun loadUsers(command: TransferRequestCommand) = + Pair( + userRepository.findById(command.senderId) + ?: throw DomainException( + CommonError.NotFound("account_balance", command.senderId.toString()), + "송신자 정보를 찾을 수 없습니다." + ), + userRepository.findByAccountNumber(command.receiverAccountNumber) + ?: throw DomainException( + CommonError.NotFound("account_balance", command.receiverAccountNumber), + "수신자 계좌 정보를 찾을 수 없습니다." + ) + ) + + @Transactional(readOnly = true) override fun findTransfer(transactionId: Long): TransferResponseCommand { val tx = transactionRepository.findById(transactionId) ?: throw DomainException( @@ -69,5 +100,4 @@ class TransactionService( ) return TransferResponseCommand.from(tx) } - -} \ No newline at end of file +} diff --git a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/handler/TransferOutboxEventHandler.kt b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/handler/TransferOutboxEventHandler.kt index 641d4fc..4caa316 100644 --- a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/handler/TransferOutboxEventHandler.kt +++ b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/handler/TransferOutboxEventHandler.kt @@ -1,6 +1,6 @@ package io.github.hyungkishin.transentia.application.handler -import io.github.hyungkishin.transentia.application.required.HybridFdsEventPublisher +import io.github.hyungkishin.transentia.application.port.TransferEventPublisher import io.github.hyungkishin.transentia.common.message.transfer.TransferCompleted import org.slf4j.LoggerFactory import org.springframework.scheduling.annotation.Async @@ -10,31 +10,16 @@ import org.springframework.transaction.event.TransactionalEventListener @Component class TransferOutboxEventHandler( - private val hybridFdsEventPublisher: HybridFdsEventPublisher + private val eventPublisher: TransferEventPublisher ) { - private val log = LoggerFactory.getLogger(javaClass) @Async("outboxEventExecutor") @TransactionalEventListener(phase = TransactionPhase.AFTER_COMMIT) fun handle(event: TransferCompleted) { + log.debug("비동기 Kafka 전송 시도: transactionId={}, eventId={}", event.transactionId, event.eventId) - val currentThread = Thread.currentThread() - - val threadConfigData = mapOf( - "threadName" to currentThread.name, - "threadGroup" to (currentThread.threadGroup?.name ?: "N/A"), - "threadId" to currentThread.id.toString(), - "isDaemon" to currentThread.isDaemon.toString() - ) - - println("threadConfigData: $threadConfigData") - - val kafkaSuccess = hybridFdsEventPublisher.publish(event) - - if (!kafkaSuccess) { - log.warn("Kafka 즉시 전송 실패, Outbox에 저장됨: transactionId={}", event.transactionId) - } + eventPublisher.publish(event) } -} \ No newline at end of file +} diff --git a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/mapper/OutboxEventMapper.kt b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/mapper/OutboxEventMapper.kt new file mode 100644 index 0000000..01fefb0 --- /dev/null +++ b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/mapper/OutboxEventMapper.kt @@ -0,0 +1,41 @@ +package io.github.hyungkishin.transentia.application.mapper + +import com.fasterxml.jackson.databind.ObjectMapper +import io.github.hyungkishin.transentia.common.message.transfer.TransferCompleted +import io.github.hyungkishin.transentia.container.event.TransferEvent +import org.slf4j.MDC +import org.springframework.stereotype.Component +import java.util.* + +@Component +class OutboxEventMapper( + private val objectMapper: ObjectMapper +) { + fun toOutboxEvent(event: TransferCompleted, transactionId: Long): TransferEvent { + return TransferEvent( + eventId = event.eventId, + aggregateType = "Transaction", + aggregateId = transactionId.toString(), + eventType = "TRANSFER_COMPLETED", + payload = objectMapper.writeValueAsString( + mapOf( + "transactionId" to event.transactionId, + "senderId" to event.senderUserId, + "receiverId" to event.receiverUserId, + "amount" to event.amount, + "status" to "COMPLETED", + "occurredAt" to event.occurredAt.toEpochMilli() + ) + ), + headers = objectMapper.writeValueAsString( + mapOf( + "eventType" to "TRANSFER_COMPLETED", + "eventVersion" to "v1", + "traceId" to (MDC.get("traceId") ?: UUID.randomUUID().toString()), + "producer" to "transfer-api", + "contentType" to "application/json" + ) + ) + ) + } +} diff --git a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/port/TransferEventPublisher.kt b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/port/TransferEventPublisher.kt new file mode 100644 index 0000000..73c75d7 --- /dev/null +++ b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/port/TransferEventPublisher.kt @@ -0,0 +1,15 @@ +package io.github.hyungkishin.transentia.application.port + +import io.github.hyungkishin.transentia.common.message.transfer.TransferCompleted + +/** + * 송금 이벤트 발행 Port + */ +interface TransferEventPublisher { + /** + * 송금 완료 이벤트 발행 + * + * 호출자가 이미 비동기 스레드에서 실행 중 + */ + fun publish(event: TransferCompleted) +} diff --git a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/required/TransferEventsOutboxRepository.kt b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/required/TransferEventsOutboxRepository.kt index 736e49d..72d730c 100644 --- a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/required/TransferEventsOutboxRepository.kt +++ b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/required/TransferEventsOutboxRepository.kt @@ -9,33 +9,18 @@ interface TransferEventsOutboxRepository { fun save(row: TransferEvent, now: Instant) /** - * 처리할 이벤트들을 배치로 조회하고 SENDING 상태로 변경한다. + * 처리 대기 중인 이벤트를 조회하고 SENDING 상태로 변경 * - * 여러 스레드나 프로세스가 동시에 실행되어도 안전하도록 SKIP LOCKED를 사용한다. - * Stuck SENDING 상태(stuckThresholdSeconds 이상 진행 중)인 이벤트도 자동으로 복구하여 처리한다. - * 우선순위는 PENDING > SENDING(Stuck) > FAILED 순으로 처리한다. - * - * @param limit 한 번에 처리할 최대 이벤트 수 - * @param now 기준 시간 (기본값: 현재 시간, 테스트 시 고정 시간 주입 가능) - * @param stuckThresholdSeconds Stuck SENDING 판단 기준 시간 (초) - * @return 처리할 이벤트 목록 + * SKIP LOCKED로 동시성 제어 + * 우선순위: PENDING > SENDING(Stuck) > FAILED */ fun claimBatch( limit: Int, now: Instant, - stuckThresholdSeconds: Long = 600 + sendingTimeoutSeconds: Long = 120 ): List - fun markAsPublished( - ids: List, - now: Instant, - ) - - fun markFailedWithBackoff( - id: Long, - cause: String?, - backoffMillis: Long, - now: Instant, - ) + fun markAsPublished(ids: List, now: Instant) + fun markFailedWithBackoff(id: Long, cause: String?, backoffMillis: Long, now: Instant) } diff --git a/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/adapter/KafkaTransferEventPublisher.kt b/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/adapter/KafkaTransferEventPublisher.kt new file mode 100644 index 0000000..f32d940 --- /dev/null +++ b/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/adapter/KafkaTransferEventPublisher.kt @@ -0,0 +1,63 @@ +package io.github.hyungkishin.transentia.infra.adapter + +import com.fasterxml.jackson.databind.ObjectMapper +import io.github.hyungkishin.transentia.application.port.TransferEventPublisher +import io.github.hyungkishin.transentia.application.required.TransferEventsOutboxRepository +import io.github.hyungkishin.transentia.common.message.transfer.TransferCompleted +import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventAvroModel +import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventType +import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferStatus +import io.github.hyungkishin.transentia.infrastructure.kafka.producer.service.KafkaProducer +import org.slf4j.LoggerFactory +import org.slf4j.MDC +import org.springframework.beans.factory.annotation.Value +import org.springframework.stereotype.Component +import java.time.Instant +import java.util.* + +@Component +class KafkaTransferEventPublisher( + private val kafkaProducer: KafkaProducer, + private val outboxRepository: TransferEventsOutboxRepository, + private val objectMapper: ObjectMapper, + @Value("\${app.kafka.topics.transfer-events}") private val topicName: String +) : TransferEventPublisher { + + private val log = LoggerFactory.getLogger(javaClass) + + override fun publish(event: TransferCompleted) { + try { + val avroModel = TransferEventAvroModel.newBuilder() + .setEventId(event.eventId) + .setEventType(TransferEventType.TRANSFER_COMPLETED) + .setAggregateId(event.transactionId.toString()) + .setTransactionId(event.transactionId) + .setSenderId(event.senderUserId) + .setReceiverId(event.receiverUserId) + .setAmount(event.amount.toString()) + .setStatus(TransferStatus.COMPLETED) + .setOccurredAt(event.occurredAt.toEpochMilli()) + .setHeaders( + objectMapper.writeValueAsString( + mapOf( + "eventType" to "TRANSFER_COMPLETED", + "eventVersion" to "v1", + "traceId" to (MDC.get("traceId") ?: UUID.randomUUID().toString()), + "producer" to "transfer-api", + "contentType" to "application/json" + ) + ) + ) + .setCreatedAt(System.currentTimeMillis()) + .build() + + kafkaProducer.sendSync(topicName, avroModel) + + outboxRepository.markAsPublished(listOf(event.eventId), Instant.now()) + log.debug("Kafka 전송 및 outbox PUBLISHED 완료: eventId={}", event.eventId) + + } catch (e: Exception) { + log.warn("Kafka 전송 실패 (relay 재시도): eventId={}, error={}", event.eventId, e.message) + } + } +} diff --git a/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/rdb/adapter/HybridFdsEventPublisherAdapter.kt b/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/rdb/adapter/HybridFdsEventPublisherAdapter.kt deleted file mode 100644 index 53d1dd0..0000000 --- a/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/rdb/adapter/HybridFdsEventPublisherAdapter.kt +++ /dev/null @@ -1,124 +0,0 @@ -package io.github.hyungkishin.transentia.infra.rdb.adapter - -import com.fasterxml.jackson.databind.ObjectMapper -import io.github.hyungkishin.transentia.application.required.HybridFdsEventPublisher -import io.github.hyungkishin.transentia.application.required.TransferEventsOutboxRepository -import io.github.hyungkishin.transentia.common.message.transfer.TransferCompleted -import io.github.hyungkishin.transentia.common.snowflake.IdGenerator -import io.github.hyungkishin.transentia.container.event.TransferEvent -import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventAvroModel -import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventType -import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferStatus -import io.github.hyungkishin.transentia.infrastructure.kafka.producer.service.KafkaProducer -import org.slf4j.LoggerFactory -import org.slf4j.MDC -import org.springframework.beans.factory.annotation.Value -import org.springframework.stereotype.Component -import java.time.Instant -import java.util.* - -@Component -class HybridFdsEventPublisherAdapter( - private val kafkaProducer: KafkaProducer, - private val outboxRepository: TransferEventsOutboxRepository, - private val idGenerator: IdGenerator, - private val objectMapper: ObjectMapper, - @Value("\${app.kafka.topics.transfer-events}") private val topicName: String -) : HybridFdsEventPublisher { - - private val log = LoggerFactory.getLogger(javaClass) - - override fun publish(event: TransferCompleted): Boolean { - - return try { - val transferModel = TransferEventAvroModel.newBuilder() - .setEventId(idGenerator.nextId()) - .setEventType(TransferEventType.TRANSFER_COMPLETED) - .setAggregateId(event.transactionId.toString()) - .setTransactionId(event.transactionId) - .setSenderId(event.senderUserId) - .setReceiverId(event.receiverUserId) - .setAmount(event.amount.toString()) - .setStatus(TransferStatus.COMPLETED) - .setOccurredAt(event.occurredAt.toEpochMilli()) - .setHeaders( - objectMapper.writeValueAsString( - mapOf( - "eventType" to "TRANSFER_COMPLETED", - "eventVersion" to "v1", - "traceId" to currentTraceId(), - "producer" to "transfer-api", - "contentType" to "application/json" - ) - ) - ) - .setCreatedAt(System.currentTimeMillis()) - .build() - - kafkaProducer.sendSync(topicName, transferModel) - true - } catch (e: Exception) { - println(e.message) - // 실패시, outbox 테이블에 적재. ( 실패한 이벤트는 relay 서버에서 실행한다. ) - saveToOutTransferOutbox(event) - - // TODO 알림 ? - false - } - - } - - private fun saveToOutTransferOutbox(event: TransferCompleted) { - - try { - val outboxEvent = createOutboxEvent( - eventType = "TRANSFER_COMPLETED", - aggregateId = event.transactionId, - payload = mapOf( - "transactionId" to event.transactionId, - "senderId" to event.senderUserId, - "receiverUserId" to event.receiverUserId, - "amount" to event.amount, - "status" to "COMPLETED", - "occurredAt" to event.occurredAt.toEpochMilli() - ), - eventVersion = "TransferCompleted" - ) - - outboxRepository.save(outboxEvent, Instant.now()) - - log.info("Transfer completed event saved to outbox: transactionId={}", event.transactionId) - } catch (e: Exception) { - log.error("Failed to save transfer completed event to outbox: transactionId={}", event.transactionId, e) - // TODO: webhook + 모니터링 -> 알려주는거까지. - } - } - - private fun createOutboxEvent( - eventType: String, - aggregateId: Long, - payload: Map, - eventVersion: String - ): TransferEvent { - return TransferEvent( - eventId = idGenerator.nextId(), - aggregateType = "Transaction", - aggregateId = aggregateId.toString(), - eventType = eventType, - payload = objectMapper.writeValueAsString(payload), - headers = objectMapper.writeValueAsString( - mapOf( - "eventType" to eventVersion, - "eventVersion" to "v1", - "traceId" to currentTraceId(), - "producer" to "transfer-api", - "contentType" to "application/json" - ) - ) - ) - } - - private fun currentTraceId(): String = - MDC.get("traceId") ?: UUID.randomUUID().toString() - -} \ No newline at end of file diff --git a/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/rdb/adapter/TransferEventsOutboxJdbcRepository.kt b/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/rdb/adapter/TransferEventsOutboxJdbcRepository.kt index 8affbc4..2311d9c 100644 --- a/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/rdb/adapter/TransferEventsOutboxJdbcRepository.kt +++ b/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/rdb/adapter/TransferEventsOutboxJdbcRepository.kt @@ -9,31 +9,11 @@ import org.springframework.stereotype.Repository import java.sql.Timestamp import java.time.Instant -/** - * ## 배경 - * 송금 이벤트를 안정적으로 Kafka에 발행하기 위해 Outbox 패턴을 사용합니다. - * 송금 트랜잭션과 같은 DB 트랜잭션 내에서 이벤트를 저장하여 원자성을 보장하고, - * 별도의 Relay 서버가 이 테이블을 폴링하여 Kafka로 발행합니다. - * - * ## 기능 - * - SKIP LOCKED로 동시성 처리하여 여러 Relay 인스턴스 운영 가능 - * - 지수 백오프로 일시적 장애 시 자동 재시도 - * - Stuck SENDING 상태 자동 복구로 서버 재시작 시에도 안정성 보장 - * - 5회 실패 후 DEAD_LETTER 상태로 수동 개입 요구 - */ @Repository class TransferEventsOutboxJdbcRepository( private val jdbc: NamedParameterJdbcTemplate ) : TransferEventsOutboxRepository { - /** - * 송금 이벤트를 Outbox 테이블에 저장한다. - * - * 송금 트랜잭션과 동일한 DB 트랜잭션 내에서 실행되어 원자성을 보장한다. - * 중복 저장을 방지하기 위해 event_id를 기준으로 ON CONFLICT DO NOTHING 처리한다. - * - * @param row 저장할 송금 이벤트 정보 - */ override fun save(row: TransferEvent, now: Instant) { val timestamp = Timestamp.from(now) @@ -61,29 +41,17 @@ class TransferEventsOutboxJdbcRepository( } /** - * 처리할 이벤트들을 배치로 조회하고 SENDING 상태로 변경합니다. - * - * 여러 스레드나 프로세스가 동시에 실행되어도 안전하도록 SKIP LOCKED를 사용합니다. + * 처리 대기 중인 이벤트를 조회하고 SENDING 상태로 변경 * - * Stuck SENDING 상태(stuckThresholdSeconds 이상 진행 중)인 이벤트도 자동으로 복구하여 처리하며, - * 우선순위는 PENDING > SENDING(Stuck) > FAILED 순으로 처리합니다. - * - * 처리 흐름은 다음과 같습니다. - * 1. 처리 가능한 이벤트 목록을 조회하고 락을 획득 - * 2. 해당 이벤트들을 SENDING 상태로 변경하고 attempt_count (재시도 횟수) 를 증가 - * 3. Stuck SENDING의 경우 attempt_count는 유지 - * - * @param limit 한 번에 처리할 최대 이벤트 수 - * @param now 기준 시간 - * @param stuckThresholdSeconds Stuck SENDING 판단 기준 시간 (초) - * @return 처리할 이벤트 목록 + * SKIP LOCKED로 동시성 제어 + * 우선순위: PENDING > SENDING(Stuck) > FAILED */ override fun claimBatch( limit: Int, now: Instant, - stuckThresholdSeconds: Long + sendingTimeoutSeconds: Long ): List { - val stuckThreshold = Timestamp.from(now.minusSeconds(stuckThresholdSeconds)) + val stuckThreshold = Timestamp.from(now.minusSeconds(sendingTimeoutSeconds)) val currentTime = Timestamp.from(now) val sql = """ @@ -131,12 +99,7 @@ class TransferEventsOutboxJdbcRepository( } /** - * Kafka 발행에 성공한 이벤트들을 PUBLISHED 상태로 변경합니다. - * - * 이벤트 발행 이력을 추적하기 위해 삭제하지 않고 상태만 변경하며, - * FDS 분석이나 트러블슈팅 시 발행 이력을 확인할 수 있습니다. - * - * @param ids Kafka 발행에 성공한 이벤트 ID 목록 + * Kafka 발행 성공한 이벤트를 PUBLISHED로 변경 */ override fun markAsPublished( ids: List, @@ -162,23 +125,6 @@ class TransferEventsOutboxJdbcRepository( ) } - /** - * Kafka 발행에 실패한 이벤트에 백오프 전략을 적용한다. - * - * 지수 백오프로 재시도 간격을 늘려가며 일시적 장애에 대응한다. - * 5회 실패 시 DEAD_LETTER 상태로 변경하여 수동 개입을 요구한다. - * - * 백오프 전략: - * - 1회: 2초 후 재시도 - * - 2회: 4초 후 재시도 - * - 3회: 8초 후 재시도 - * - 4회: 16초 후 재시도 - * - 5회: DEAD_LETTER 상태로 변경 - * - * @param id 실패한 이벤트 ID - * @param cause 실패 원인 - * @param backoffMillis 다음 재시도까지 대기할 밀리초 - */ override fun markFailedWithBackoff( id: Long, cause: String?, @@ -210,11 +156,6 @@ class TransferEventsOutboxJdbcRepository( ) } - /** - * DB 조회 결과를 ClaimedRow 객체로 매핑하는 RowMapper - * - * JSONB 타입은 ::text로 캐스팅하여 String으로 변환한다. - */ private val claimedRowMapper = RowMapper { rs, _ -> ClaimedRow( eventId = rs.getLong("event_id"), @@ -224,5 +165,4 @@ class TransferEventsOutboxJdbcRepository( attemptCount = rs.getInt("attempt_count") ) } - -} \ No newline at end of file +} diff --git a/services/transfer/instances/api/src/main/resources/db/migration/V1__init_table.sql b/services/transfer/instances/api/src/main/resources/db/migration/V1__init_table.sql index e837266..1824d3a 100644 --- a/services/transfer/instances/api/src/main/resources/db/migration/V1__init_table.sql +++ b/services/transfer/instances/api/src/main/resources/db/migration/V1__init_table.sql @@ -1,35 +1,45 @@ --- ENUM 없으면 먼저 생성 -DO -$$ - BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'user_status') THEN - CREATE TYPE user_status AS ENUM ('ACTIVE','SUSPENDED','DEACTIVATED'); - END IF; - IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'user_role') THEN - CREATE TYPE user_role AS ENUM ('USER','ADMIN','AUDITOR'); - END IF; - END -$$; +-- ENUM 타입 정의 +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'user_status') THEN + CREATE TYPE user_status AS ENUM ('ACTIVE','SUSPENDED','DEACTIVATED'); + END IF; + IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'user_role') THEN + CREATE TYPE user_role AS ENUM ('USER','ADMIN','AUDITOR'); + END IF; + IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'account_type') THEN + CREATE TYPE account_type AS ENUM ('CHECKING', 'SAVINGS', 'DEPOSIT'); + END IF; + IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'account_status') THEN + CREATE TYPE account_status AS ENUM ('ACTIVE', 'SUSPENDED', 'CLOSED'); + END IF; + IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'transaction_status') THEN + CREATE TYPE transaction_status AS ENUM ('PENDING','COMPLETED','FAILED','CORRECTED'); + END IF; + IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'transaction_history_status') THEN + CREATE TYPE transaction_history_status AS ENUM ('SUCCESS','FAIL'); + END IF; + IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'transfer_outbox_status') THEN + CREATE TYPE transfer_outbox_status AS ENUM ('PENDING','PUBLISHED','DEAD_LETTER'); + END IF; +END $$; -- users 테이블 -CREATE TABLE IF NOT EXISTS users -( - id BIGINT PRIMARY KEY, -- 사용자 고유 ID (Snowflake) - name TEXT NOT NULL, -- 사용자 이름 - email TEXT UNIQUE NOT NULL, -- 사용자 이메일 (UNIQUE) - status user_status NOT NULL DEFAULT 'ACTIVE', -- 계정 상태 - is_transfer_locked BOOLEAN NOT NULL DEFAULT false, -- 송금 잠금 여부 - transfer_lock_reason TEXT NULL, - daily_transfer_limit BIGINT NOT NULL DEFAULT 5000000, -- 1일 최대 송금 가능 금액 (500만원) - role user_role NOT NULL DEFAULT 'USER', -- 사용자 역할 - created_at TIMESTAMPTZ NOT NULL DEFAULT now(), -- 계정 생성 일시 - updated_at TIMESTAMPTZ NOT NULL DEFAULT now() -- 마지막 정보 갱신 일시 +CREATE TABLE IF NOT EXISTS users ( + id BIGINT PRIMARY KEY, -- SnowFlakeId + name VARCHAR(100) NOT NULL, + email VARCHAR(255) UNIQUE NOT NULL, + status user_status NOT NULL DEFAULT 'ACTIVE', + is_transfer_locked BOOLEAN NOT NULL DEFAULT false, + transfer_lock_reason VARCHAR(500), + daily_transfer_limit BIGINT NOT NULL DEFAULT 5000000, + role user_role NOT NULL DEFAULT 'USER', + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() ); --- 인덱스 CREATE INDEX IF NOT EXISTS idx_users_status_locked ON users (status, is_transfer_locked); --- 코멘트 COMMENT ON TABLE users IS '송금 시스템 사용자 정보'; COMMENT ON COLUMN users.id IS '사용자 고유 ID (Snowflake)'; COMMENT ON COLUMN users.name IS '사용자 이름'; @@ -41,89 +51,60 @@ COMMENT ON COLUMN users.role IS '사용자 역할: USER, ADMIN, AUDITOR'; COMMENT ON COLUMN users.created_at IS '계정 생성 일시'; COMMENT ON COLUMN users.updated_at IS '마지막 정보 갱신 일시'; ---- -DO -$$ - BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'account_type') THEN - CREATE TYPE account_type AS ENUM ('CHECKING', 'SAVINGS', 'DEPOSIT'); - END IF; - IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'account_status') THEN - CREATE TYPE account_status AS ENUM ('ACTIVE', 'SUSPENDED', 'CLOSED'); - END IF; - END -$$; - -CREATE TABLE IF NOT EXISTS account_balances -( - id BIGINT PRIMARY KEY, -- ID (Snowflake) - user_id BIGINT NOT NULL, -- 사용자 ID (1:1 관계, 현재) -- TODO : REFERENCES users (id) - account_number VARCHAR(20) NOT NULL, -- 계좌번호 - balance BIGINT NOT NULL DEFAULT 0, -- 잔액 - account_type account_type NOT NULL DEFAULT 'CHECKING', -- 계좌 유형 - status account_status NOT NULL DEFAULT 'ACTIVE', -- 계좌 상태 - version BIGINT NOT NULL DEFAULT 0, -- 낙관적 락 - created_at TIMESTAMPTZ NOT NULL DEFAULT now(), - updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +-- account_balances 테이블 +CREATE TABLE IF NOT EXISTS account_balances ( + id BIGINT PRIMARY KEY, -- SnowFlakeId + user_id BIGINT NOT NULL REFERENCES users(id), + account_number VARCHAR(20) NOT NULL, + balance BIGINT NOT NULL DEFAULT 0, + account_type account_type NOT NULL DEFAULT 'CHECKING', + status account_status NOT NULL DEFAULT 'ACTIVE', + version BIGINT NOT NULL DEFAULT 0, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() ); --- 인덱스 -CREATE UNIQUE INDEX IF NOT EXISTS idx_account_balances_account_number ON account_balances (account_number); - --- updated_at 자동 갱신 트리거 -DROP TRIGGER IF EXISTS trg_ab_touch_updated_at ON account_balances; -CREATE OR REPLACE FUNCTION trg_ab_touch_updated_at() - RETURNS trigger - LANGUAGE plpgsql AS - -$$ -BEGIN - NEW.updated_at := now(); - RETURN NEW; -END -$$; - -DROP TRIGGER IF EXISTS ab_touch_updated_at ON account_balances; -CREATE TRIGGER ab_touch_updated_at - BEFORE UPDATE - ON account_balances - FOR EACH ROW -EXECUTE FUNCTION trg_ab_touch_updated_at(); - ---- -DO -$$ - BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'transaction_status') THEN - CREATE TYPE transaction_status AS ENUM ('PENDING','COMPLETED','FAILED','CORRECTED'); - END IF; - -- IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'currency_code') THEN --- CREATE TYPE currency_code AS ENUM ('KRW','USD','EUR','JPY'); -- 통화 --- END IF; - END -$$; - -CREATE TABLE IF NOT EXISTS transactions -( - id BIGINT PRIMARY KEY, -- 트랜잭션 ID (Snowflake) - sender_user_id BIGINT NOT NULL, -- FK: users(id) - receiver_user_id BIGINT NOT NULL, -- FK: users(id) - amount BIGINT NOT NULL CHECK (amount > 0), -- scale=8 --- currency currency_code NOT NULL DEFAULT 'KRW', +CREATE UNIQUE INDEX IF NOT EXISTS idx_account_balances_account_number +ON account_balances (account_number); + +CREATE INDEX IF NOT EXISTS idx_account_balances_user_id +ON account_balances (user_id); + +COMMENT ON TABLE account_balances IS '사용자 계좌 잔액'; +COMMENT ON COLUMN account_balances.id IS '계좌 ID (Snowflake)'; +COMMENT ON COLUMN account_balances.user_id IS '사용자 ID'; +COMMENT ON COLUMN account_balances.account_number IS '계좌번호'; +COMMENT ON COLUMN account_balances.balance IS '잔액'; +COMMENT ON COLUMN account_balances.account_type IS '계좌 유형'; +COMMENT ON COLUMN account_balances.status IS '계좌 상태'; +COMMENT ON COLUMN account_balances.version IS '낙관적 락 버전'; +COMMENT ON COLUMN account_balances.created_at IS '계좌 생성 일시'; +COMMENT ON COLUMN account_balances.updated_at IS '마지막 갱신 일시'; + +-- transactions 테이블 +CREATE TABLE IF NOT EXISTS transactions ( + id BIGINT PRIMARY KEY, -- SnowFlakeId + sender_user_id BIGINT NOT NULL REFERENCES users(id), + receiver_user_id BIGINT NOT NULL REFERENCES users(id), + amount BIGINT NOT NULL CHECK (amount > 0), status transaction_status NOT NULL DEFAULT 'PENDING', - received_at TIMESTAMPTZ NOT NULL DEFAULT now(), -- 수신/요청 시각 - status_updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), - created_at TIMESTAMPTZ NOT NULL DEFAULT now(), - updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), - version BIGINT NOT NULL DEFAULT 0, + received_at TIMESTAMPTZ NOT NULL DEFAULT now(), + status_updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), + version BIGINT NOT NULL DEFAULT 0, + + CONSTRAINT ck_tx_sender_ne_receiver CHECK (sender_user_id <> receiver_user_id) +); --- TODO: users 도메인 작업 이후, 주석 해제 --- CONSTRAINT fk_tx_sender FOREIGN KEY (sender_user_id) REFERENCES users (id) ON UPDATE RESTRICT ON DELETE RESTRICT, --- CONSTRAINT fk_tx_receiver FOREIGN KEY (receiver_user_id) REFERENCES users (id) ON UPDATE RESTRICT ON DELETE RESTRICT, +CREATE INDEX IF NOT EXISTS idx_tx_sender_created +ON transactions (sender_user_id, created_at DESC); +CREATE INDEX IF NOT EXISTS idx_tx_receiver_created +ON transactions (receiver_user_id, created_at DESC); - CONSTRAINT ck_tx_sender_ne_receiver CHECK (sender_user_id <> receiver_user_id) -); +CREATE INDEX IF NOT EXISTS idx_tx_status_updated +ON transactions (status, status_updated_at DESC); COMMENT ON TABLE transactions IS '송금 트랜잭션'; COMMENT ON COLUMN transactions.id IS '트랜잭션 ID'; @@ -131,151 +112,100 @@ COMMENT ON COLUMN transactions.sender_user_id IS '보낸 사용자 ID'; COMMENT ON COLUMN transactions.receiver_user_id IS '받는 사용자 ID'; COMMENT ON COLUMN transactions.amount IS '송금 금액'; COMMENT ON COLUMN transactions.status IS '상태: PENDING, COMPLETED, FAILED, CORRECTED'; --- COMMENT ON COLUMN transactions.currency IS '통화 코드 (예: KRW, USD)'; --- COMMENT ON COLUMN transactions.exchange_rate_id IS '참조 환율 ID'; COMMENT ON COLUMN transactions.received_at IS '수신/요청 시각'; COMMENT ON COLUMN transactions.created_at IS '생성 시각'; COMMENT ON COLUMN transactions.status_updated_at IS '상태 최종 갱신 시각'; --- 발신자 타임라인 -CREATE INDEX IF NOT EXISTS idx_tx_sender_created ON transactions (sender_user_id, created_at DESC); - --- 수신자 타임라인 -CREATE INDEX IF NOT EXISTS idx_tx_receiver_created ON transactions (receiver_user_id, created_at DESC); - --- 상태 기반 조회/모니터링(운영) -CREATE INDEX IF NOT EXISTS idx_tx_status_updated ON transactions (status, status_updated_at DESC); - - --- 상태 변경 시 status_updated_at 자동 업데이트 트리거 -CREATE OR REPLACE FUNCTION trg_tx_touch_status_updated_at() - RETURNS trigger - LANGUAGE plpgsql AS -$$ -BEGIN - IF NEW.status IS DISTINCT FROM OLD.status THEN - NEW.status_updated_at := now(); - END IF; - RETURN NEW; -END -$$; - -DROP TRIGGER IF EXISTS tx_touch_status_updated_at ON transactions; -CREATE TRIGGER tx_touch_status_updated_at - BEFORE UPDATE - ON transactions - FOR EACH ROW -EXECUTE FUNCTION trg_tx_touch_status_updated_at(); - ---- - -DO -$$ - BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'transaction_history_status') THEN - CREATE TYPE transaction_history_status AS ENUM ('SUCCESS','FAIL'); - END IF; - END -$$; - -CREATE TABLE IF NOT EXISTS transaction_histories -( - id BIGINT PRIMARY KEY, -- 이력 자체 PK - transaction_id BIGINT NOT NULL, -- TODO: 논리적 FK 로 할지 고민필요. REFERENCES transactions (id), +-- transaction_histories 테이블 +CREATE TABLE IF NOT EXISTS transaction_histories ( + id BIGINT PRIMARY KEY, + transaction_id BIGINT NOT NULL REFERENCES transactions(id), status transaction_history_status NOT NULL, - created_at TIMESTAMPTZ NOT NULL DEFAULT now() + created_at TIMESTAMPTZ NOT NULL DEFAULT now() ); +CREATE INDEX IF NOT EXISTS idx_tx_histories_txid_created +ON transaction_histories (transaction_id, created_at); + COMMENT ON TABLE transaction_histories IS '트랜잭션 상태 변경 이력'; COMMENT ON COLUMN transaction_histories.status IS '최종 송금 상태'; COMMENT ON COLUMN transaction_histories.transaction_id IS '참조 트랜잭션 ID'; COMMENT ON COLUMN transaction_histories.created_at IS '생성 시각'; --- 인덱스 -CREATE INDEX IF NOT EXISTS idx_tx_histories_txid_created - ON transaction_histories (transaction_id, created_at); - ---- - --- ENUM 타입 정의 -DO -$$ - BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'transfer_outbox_status') THEN - CREATE TYPE transfer_outbox_status AS ENUM ('PENDING','SENDING','PUBLISHED','FAILED', 'DEAD_LETTER'); - END IF; - END -$$; - --- Outbox 테이블 -CREATE TABLE IF NOT EXISTS transfer_events -( +-- transfer_events (Outbox) +CREATE TABLE IF NOT EXISTS transfer_events ( event_id BIGINT PRIMARY KEY, - event_version INT NOT NULL DEFAULT 1, - aggregate_type VARCHAR(100) NOT NULL, - aggregate_id VARCHAR(100) NOT NULL, - event_type VARCHAR(100) NOT NULL, - payload JSONB NOT NULL, - headers JSONB NOT NULL DEFAULT '{}'::jsonb, - created_at TIMESTAMPTZ(6) NOT NULL DEFAULT now(), - updated_at TIMESTAMPTZ(6) NOT NULL DEFAULT now(), - published_at TIMESTAMPTZ(6), + transaction_id BIGINT NOT NULL REFERENCES transactions(id), + payload JSONB NOT NULL, status transfer_outbox_status NOT NULL DEFAULT 'PENDING', - attempt_count INT NOT NULL DEFAULT 0, - next_retry_at TIMESTAMPTZ(6) NOT NULL DEFAULT now(), - last_error TEXT, - - -- 제약조건 + attempt_count INT NOT NULL DEFAULT 0, + error_message TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + published_at TIMESTAMPTZ, + CONSTRAINT ck_transfer_events_payload_object CHECK (jsonb_typeof(payload) = 'object'), - CONSTRAINT ck_transfer_events_headers_object CHECK (jsonb_typeof(headers) = 'object'), - CONSTRAINT ck_transfer_events_nonempty CHECK ( - char_length(btrim(aggregate_type)) > 0 AND - char_length(btrim(aggregate_id)) > 0 AND - char_length(btrim(event_type)) > 0 - ), CONSTRAINT ck_published_requires_timestamp CHECK ( status <> 'PUBLISHED' OR published_at IS NOT NULL - ), - CONSTRAINT ck_attempt_count_positive CHECK (attempt_count >= 0), - CONSTRAINT ck_retry_after_created CHECK (next_retry_at >= created_at) + ), + CONSTRAINT ck_attempt_count_positive CHECK (attempt_count >= 0) ); --- 인덱스-- --- 배치 처리용 -CREATE INDEX IF NOT EXISTS ix_transfer_events_batch_processing - ON transfer_events (status, next_retry_at, created_at) - WHERE status IN ('PENDING', 'FAILED') AND attempt_count < 5; +CREATE INDEX IF NOT EXISTS idx_transfer_events_pending +ON transfer_events (created_at) +WHERE status = 'PENDING'; --- SENDING stuck 복구용 -CREATE INDEX IF NOT EXISTS ix_transfer_events_stuck_sending - ON transfer_events (updated_at) - WHERE status = 'SENDING'; +CREATE INDEX IF NOT EXISTS idx_transfer_events_dead_letter +ON transfer_events (created_at) +WHERE status = 'DEAD_LETTER'; --- 모니터링용 -CREATE INDEX IF NOT EXISTS ix_transfer_events_monitoring - ON transfer_events (status, created_at); +COMMENT ON TABLE transfer_events IS 'Outbox: Kafka 발행 실패 시 재시도용'; +COMMENT ON COLUMN transfer_events.event_id IS '이벤트 고유 ID (Snowflake)'; +COMMENT ON COLUMN transfer_events.transaction_id IS '참조 트랜잭션 ID'; +COMMENT ON COLUMN transfer_events.payload IS 'FDS 전송용 이벤트 데이터 (JSONB)'; +COMMENT ON COLUMN transfer_events.status IS '상태: PENDING, PUBLISHED, DEAD_LETTER'; +COMMENT ON COLUMN transfer_events.attempt_count IS '발행 재시도 누적 횟수'; +COMMENT ON COLUMN transfer_events.error_message IS '최근 실패 에러 메시지'; +COMMENT ON COLUMN transfer_events.created_at IS 'Outbox 레코드 생성 시각'; +COMMENT ON COLUMN transfer_events.published_at IS 'Kafka 발행 성공 시각'; --- Aggregate 조회용 (디버깅) -CREATE INDEX IF NOT EXISTS ix_transfer_events_aggregate - ON transfer_events (aggregate_type, aggregate_id, created_at); +-- updated_at 자동 갱신 함수 +CREATE OR REPLACE FUNCTION update_updated_at_column() +RETURNS TRIGGER AS $$ +BEGIN + NEW.updated_at = now(); + RETURN NEW; +END; +$$ LANGUAGE plpgsql; --- DEAD_LETTER 관리용 -CREATE INDEX IF NOT EXISTS ix_transfer_events_dead_letter - ON transfer_events (updated_at) - WHERE status = 'DEAD_LETTER'; +-- updated_at 트리거 +CREATE TRIGGER trg_users_updated_at + BEFORE UPDATE ON users + FOR EACH ROW + EXECUTE FUNCTION update_updated_at_column(); -COMMENT ON TABLE transfer_events IS 'Outbox: DB 커밋과 함께 기록되는 발행 보장 버퍼'; -COMMENT ON COLUMN transfer_events.event_id IS 'Snowflake(Long) 이벤트 고유 ID (idempotency/재생 기준)'; -COMMENT ON COLUMN transfer_events.event_version IS '이벤트 스키마 버전'; -COMMENT ON COLUMN transfer_events.aggregate_type IS '애그리거트 종류 (예: Transfer)'; -COMMENT ON COLUMN transfer_events.aggregate_id IS '애그리거트 식별자'; -COMMENT ON COLUMN transfer_events.event_type IS '이벤트 타입명 (예: TransferCompleted)'; -COMMENT ON COLUMN transfer_events.payload IS '이벤트 페이로드(JSONB)'; -COMMENT ON COLUMN transfer_events.headers IS '추적/전파 헤더(traceId, correlationId 등)'; -COMMENT ON COLUMN transfer_events.created_at IS 'Outbox 레코드 생성(커밋) 시각'; -COMMENT ON COLUMN transfer_events.updated_at IS '최근 상태 전이 시각'; -COMMENT ON COLUMN transfer_events.published_at IS '브로커 발행 성공 시각(null=미발행)'; -COMMENT ON COLUMN transfer_events.status IS '상태(PENDING|SENDING|PUBLISHED|FAILED)'; -COMMENT ON COLUMN transfer_events.attempt_count IS '발행 재시도 누적 횟수'; -COMMENT ON COLUMN transfer_events.next_retry_at IS '재시도 가능 시각(백오프)'; -COMMENT ON COLUMN transfer_events.last_error IS '최근 실패 에러 메시지 요약'; +CREATE TRIGGER trg_account_balances_updated_at + BEFORE UPDATE ON account_balances + FOR EACH ROW + EXECUTE FUNCTION update_updated_at_column(); + +CREATE TRIGGER trg_transactions_updated_at + BEFORE UPDATE ON transactions + FOR EACH ROW + EXECUTE FUNCTION update_updated_at_column(); + +-- status_updated_at 자동 갱신 함수 +CREATE OR REPLACE FUNCTION update_status_updated_at_column() +RETURNS TRIGGER AS $$ +BEGIN + IF NEW.status IS DISTINCT FROM OLD.status THEN + NEW.status_updated_at = now(); + END IF; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +-- status_updated_at 트리거 +CREATE TRIGGER trg_transactions_status_updated_at + BEFORE UPDATE ON transactions + FOR EACH ROW + EXECUTE FUNCTION update_status_updated_at_column(); diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelay.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelay.kt index faef612..f6b9f03 100644 --- a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelay.kt +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelay.kt @@ -1,8 +1,10 @@ package io.github.hyungkishin.transentia.relay import io.github.hyungkishin.transentia.application.required.TransferEventsOutboxRepository -import io.github.hyungkishin.transentia.relay.component.EventBatchProcessor -import io.github.hyungkishin.transentia.relay.component.RetryPolicyHandler +import io.github.hyungkishin.transentia.relay.component.OutboxEventWriter +import io.github.hyungkishin.transentia.relay.component.OutboxEventProcessor +import io.github.hyungkishin.transentia.relay.component.OutboxItemReader +import io.github.hyungkishin.transentia.relay.component.OutboxWorker import io.github.hyungkishin.transentia.relay.config.OutboxRelayConfig import io.github.hyungkishin.transentia.relay.model.ProcessingResult import jakarta.annotation.PreDestroy @@ -12,111 +14,31 @@ import org.springframework.beans.factory.annotation.Value import org.springframework.scheduling.annotation.Scheduled import org.springframework.stereotype.Component import java.time.Instant +import java.util.concurrent.ConcurrentLinkedQueue import java.util.concurrent.ExecutorService -import java.util.concurrent.atomic.AtomicInteger +import java.util.concurrent.TimeUnit /** - * Outbox 패턴 Relay 서버 (멀티 스레드 기반) + * Outbox 이벤트를 Kafka로 전송하는 Relay * - * ## 역할 - * Outbox 테이블에 저장된 이벤트를 주기적으로 폴링하여 Kafka로 전송한다. - * 이를 통해 송금 트랜잭션과 이벤트 발행의 원자성을 보장한다. - * - * ### 1. **단일 인스턴스 + 멀티 스레드** - * - MOD 파티셔닝 제거 (인스턴스 확장 시 복잡도 제거) - * - 멀티 스레드로 처리량 확보 - * - 단순하고 안정적인 아키텍처 - * - * ### 2. **동시성 제어** - * - **DB 레벨**: SKIP LOCKED (행 단위 락) - * - **애플리케이션 레벨**: @Scheduled fixedDelay (순차 실행) - * - **처리 레벨**: ExecutorService (병렬 Kafka 전송) - * - * ### 3. **장애 복구** - * - **Stuck SENDING**: 2분 후 자동 재시도 - * - **백오프 전략**: 지수 백오프로 일시적 장애 대응 - * - **재시도 로직**: markAsPublished 실패 시 3회 재시도 - * - * ### 4. **성능 목표** - * ``` - * 평시 (200 TPS): - * - 단일 인스턴스 - * - 멀티 스레드 (8개) - * - 배치 크기: 500 - * - 처리 시간: ~50ms - * - 여유도: 충분 - * - * 피크 (2000 TPS): - * - threadPoolSize 증가 (8 -> 16) - * - 또는 batchSize 증가 (500 -> 1000) - * ``` - * - * ## 엣지케이스 대응 - * 1. **Kafka 성공 + DB 실패**: Stuck SENDING 복구 (2분 후) - * 2. **서버 다운**: Stuck SENDING 복구 (2분 후) - * 3. TODO: **중복 발행**: FDS 컨슈머에서 멱등성 보장 (event_id 체크 할것.) - * - * @see EventBatchProcessor 멀티 스레드 Kafka 전송 - * @see RetryPolicyHandler 백오프 정책 계산 + * Spring Batch Multi-threaded Step 패턴: + * - ItemReader: Thread-safe하게 이벤트 읽기 + * - ItemProcessor: 데이터 변환 + * - ItemWriter: Kafka 전송 */ @Component class TransferOutboxRelay( private val outboxRepository: TransferEventsOutboxRepository, - private val eventBatchProcessor: EventBatchProcessor, - private val retryPolicyHandler: RetryPolicyHandler, + private val reader: OutboxItemReader, + private val processor: OutboxEventProcessor, + private val writer: OutboxEventWriter, private val config: OutboxRelayConfig, @Qualifier("outboxExecutorService") private val executorService: ExecutorService, @Value("\${app.kafka.topics.transfer-events}") private val topicName: String ) { private val log = LoggerFactory.getLogger(javaClass) - - /** - * 연속으로 빈 배치가 발생한 횟수 - * - * 이벤트가 없을 때 불필요한 DB 조회를 줄이기 위한 카운터 - * 3회 이상 연속으로 비면 3초 대기 (백오프) - */ private var consecutiveEmptyCount = 0 - /** - * 성능 테스트용: 이 인스턴스가 처리한 총 이벤트 수 - * - * 프로덕션에서는 사용하지 않으며, 성능 테스트에서만 사용됩니다. - * AtomicInteger로 thread-safety 보장 - */ - private val _processedEventCount = AtomicInteger(0) - - /** - * 처리한 이벤트 수 조회 (테스트용) - */ - fun getProcessedEventCount(): Int = _processedEventCount.get() - - /** - * 성능 테스트용: 카운터 리셋 - */ - fun resetCounter() { - _processedEventCount.set(0) - } - - /** - * Outbox 이벤트를 주기적으로 처리하는 메인 루프 - * - * ## 실행 주기 - * - fixedDelay: 이전 실행 완료 후 1초 대기 - * - initialDelay: 애플리케이션 시작 후 5초 대기 - * - 순차 실행 보장 (오버랩 없음) - * - * ## 처리 단계 - * 1. 배치 조회 (SKIP LOCKED, 500건) - * 2. 빈 배치면 백오프 처리 후 종료 - * 3. EventBatchProcessor로 멀티 스레드 병렬 처리 - * 4. 성공/실패 결과 처리 (재시도 로직 포함) - * 5. 성능 모니터링 - * - * ## 예외 처리 - * - 모든 예외 catch하여 다음 사이클 정상 실행 보장 - * - 로그만 남기고 애플리케이션 중단 방지 - */ @Scheduled( fixedDelayString = "\${app.outbox.relay.fixedDelayMs:1000}", initialDelayString = "\${app.outbox.relay.initialDelayMs:5000}" @@ -126,41 +48,22 @@ class TransferOutboxRelay( val startTime = System.currentTimeMillis() val now = Instant.now() - // 배치 조회 - val batch = outboxRepository.claimBatch( - limit = config.batchSize, - now = now, - stuckThresholdSeconds = config.stuckThresholdSeconds - ) + reader.reset() + + val result = processBatch() - // 빈 배치 처리 - if (batch.isEmpty()) { + if (result.totalProcessed == 0) { handleEmptyBatch() return } - // 카운터 리셋 (이벤트 발견) consecutiveEmptyCount = 0 - - log.debug("Processing {} events", batch.size) - - // 배치 처리 (멀티 스레드 병렬 Kafka 전송) - val result = eventBatchProcessor.processBatch( - batch = batch, - topicName = topicName, - timeoutSeconds = config.timeoutSeconds - ) - val processingTime = System.currentTimeMillis() - startTime - // 성공 이벤트 처리 (재시도 로직 포함) + // 성공 처리 if (result.successIds.isNotEmpty()) { - retryOperation(maxAttempts = 3, operationName = "markAsPublished") { - outboxRepository.markAsPublished(result.successIds, now) - } - - _processedEventCount.addAndGet(result.successIds.size) - + outboxRepository.markAsPublished(result.successIds, now) + log.info( "Published {} events ({}% success) in {}ms", result.successIds.size, @@ -169,278 +72,103 @@ class TransferOutboxRelay( ) } - // 실패 이벤트 처리 (백오프 적용, 재시도 로직 포함) + // 실패 처리 if (result.failedEvents.isNotEmpty()) { - retryOperation(maxAttempts = 3, operationName = "handleFailedEvents") { - handleFailedEvents(result.failedEvents, now) - } + handleFailedEvents(result.failedEvents, now) } // 성능 모니터링 - monitorPerformance(processingTime, result.totalProcessed) + if (processingTime > config.slowProcessingThresholdMs) { + log.warn("Slow batch processing: {}ms for {} events", processingTime, result.totalProcessed) + } } catch (e: Exception) { log.error("Relay batch processing failed", e) } } - /** - * 빈 배치 처리 (자원 절약 전략) - * - * ## 문제 - * 이벤트가 없을 때도 매초 DB 조회하면: - * - 불필요한 DB 부하 - * - CPU 낭비 - * - 로그 증가 - * - * ## 해결 - * 연속으로 3회 이상 빈 배치 발생 시 3초 대기 - * - * ## 효과 - * ``` - * Before (이벤트 없을 때): - * - 초당 1회 DB 조회 - * - 시간당 3,600회 조회 - * - * After (백오프 적용): - * - 3초마다 1회 DB 조회 - * - 시간당 1,200회 조회 - * - 67% 감소! - * ``` - * - * ## 트레이드오프 - * - 장점: DB 부하 감소, 리소스 절약 - * - 단점: 최초 이벤트 처리 3초 지연 가능 (허용 가능) - */ - private fun handleEmptyBatch() { - consecutiveEmptyCount++ - - if (consecutiveEmptyCount > 3) { - log.debug("No events for {} cycles, sleeping 3s...", consecutiveEmptyCount) - Thread.sleep(3000) + private fun processBatch(): ProcessingResult { + val successIds = ConcurrentLinkedQueue() + val failedEvents = ConcurrentLinkedQueue() + + // Worker 생성 (Reader-Processor-Writer 조합) + val workers = (1..config.threadPoolSize).map { workerId -> + OutboxWorker( + workerId = workerId, + reader = reader, + processor = processor, + writer = writer, + topicName = topicName, + successIds = successIds, + failedEvents = failedEvents + ) + } + + // 병렬 실행 + val futures = workers.map { executorService.submit(it) } + + try { + futures.forEach { it.get(config.timeoutSeconds, TimeUnit.SECONDS) } + } catch (e: Exception) { + log.error("Worker execution failed", e) + futures.forEach { it.cancel(true) } } + + return ProcessingResult( + successIds = successIds.toList(), + failedEvents = failedEvents.toList() + ) } - /** - * 실패한 이벤트들에 백오프 전략 적용 - * - * ## 백오프(Backoff)란? - * 실패한 작업을 점점 더 긴 간격으로 재시도하는 전략 - * - * ## 왜 필요한가? - * ``` - * Kafka가 5분간 다운된 상황: - * - * 백오프 없이: - * - 1초마다 재시도 (300회) - * - 모두 실패 - * - 리소스 낭비 - * - * 백오프 적용: - * - 1차: 5초 후 재시도 - * - 2차: 10초 후 재시도 - * - 3차: 20초 후 재시도 - * - 4차: 40초 후 재시도 - * - 5차: 80초 후 재시도 - * - 총 5회만 시도 - * - 효율적! - * ``` - * - * ## 재시도 패턴 (지수 백오프) - * ``` - * attempt_count | backoff | next_retry_at - * --------------|---------|------------------ - * 1 | 5초 | now + 5초 - * 2 | 10초 | now + 10초 - * 3 | 20초 | now + 20초 - * 4 | 40초 | now + 40초 - * 5 | 80초 | now + 80초 - * 6+ | 포기 | DEAD_LETTER 상태 - * ``` - * - * ## DEAD_LETTER 상태 - * - 5회 재시도 후에도 실패하면 수동 개입 필요 - * - 자동 재시도 중단 - * - 관리자 알림 (추후 구현 예정) - * - 수동 재처리 or 삭제 - * - * @param failedEvents 실패한 이벤트 목록 - * @param now 현재 시간 (백오프 계산 기준) - */ private fun handleFailedEvents(failedEvents: List, now: Instant) { - if (failedEvents.isEmpty()) return - log.warn("Failed to publish {} events", failedEvents.size) failedEvents.forEach { failed -> - // 백오프 시간 계산 (지수 + Jitter) - val backoffMillis = retryPolicyHandler.calculateBackoff(failed.attemptCount) - - // DB에 실패 기록 + 재시도 시간 설정 + val backoffMillis = calculateBackoff(failed.attemptCount) outboxRepository.markFailedWithBackoff( id = failed.eventId, cause = failed.error, backoffMillis = backoffMillis, now = now ) - - log.debug( - "Event {} will retry in {}ms (attempt {})", - failed.eventId, - backoffMillis, - failed.attemptCount + 1 - ) } } /** - * 성능 모니터링 및 경고 - * - * ## 목적 - * 배치 처리가 비정상적으로 느릴 때 감지하여 병목 지점 파악 - * - * ## 느린 처리의 원인 - * 1. DB 성능 저하 - * - 커넥션 풀 부족 - * - 슬로우 쿼리 - * - 락 대기 + * 지수 백오프 계산 * - * 2. Kafka 성능 저하 - * - 브로커 과부하 - * - 네트워크 지연 - * - 파티션 불균형 - * - * 3. 애플리케이션 문제 - * - GC 발생 - * - 스레드 풀 포화 - * - 메모리 부족 - * - * ## 대응 - * - 경고 로그 확인 - * - 메트릭 분석 (Grafana 등) - * - 원인 파악 후 조치 - * - * @param processingTime 배치 처리에 소요된 시간 (ms) - * @param totalProcessed 처리한 이벤트 수 + * 1회: 5초 + * 2회: 10초 + * 3회: 20초 + * 4회: 40초 + * 5회: DEAD_LETTER */ - private fun monitorPerformance(processingTime: Long, totalProcessed: Int) { - if (processingTime > config.slowProcessingThresholdMs) { - log.warn( - "Slow batch processing: {}ms for {} events (threshold: {}ms)", - processingTime, - totalProcessed, - config.slowProcessingThresholdMs - ) - } + private fun calculateBackoff(attemptCount: Int): Long { + return config.baseBackoffMs * (1L shl (attemptCount - 1)) } - /** - * DB 작업에 대한 재시도 로직 - * - * ## 배경 - * markAsPublished나 handleFailedEvents 실패 시: - * - Kafka는 이미 전송됨 - * - DB만 업데이트 실패 - * - Stuck SENDING 상태로 방치 - * - 2분 후 중복 발행 - * - * ## 대응 - * DB 작업 실패 시 즉시 재시도 (3회) - * - 1차 실패: 100ms 후 재시도 - * - 2차 실패: 200ms 후 재시도 - * - 3차 실패: 예외 발생 (Stuck SENDING 복구로 처리) - * - * ## 결과 - * 대부분의 일시적 DB 장애 자동 복구 이후, 중복 발행 감소 - * - * @param maxAttempts 최대 재시도 횟수 - * @param operationName 작업 이름 (로그용) - * @param operation 실행할 작업 - * @throws Exception 모든 재시도 실패 시 - */ - private fun retryOperation( - maxAttempts: Int = 3, - operationName: String, - operation: () -> T - ): T { - var lastException: Exception? = null - - repeat(maxAttempts) { attempt -> - try { - return operation() - } catch (e: Exception) { - lastException = e - - if (attempt < maxAttempts - 1) { - val delayMs = 100L * (attempt + 1) // 100ms, 200ms - log.warn( - "{} failed (attempt {}/{}): {}. Retrying in {}ms...", - operationName, - attempt + 1, - maxAttempts, - e.message, - delayMs - ) - Thread.sleep(delayMs) - } else { - log.error( - "{} failed after {} attempts. Will be recovered by Stuck SENDING mechanism.", - operationName, - maxAttempts, - e - ) - } - } + private fun handleEmptyBatch() { + consecutiveEmptyCount++ + if (consecutiveEmptyCount > 3) { + Thread.sleep(3000) } - - throw lastException!! } - /** - * 애플리케이션 종료 시 정리 작업 - * - * ## Graceful Shutdown - * 1. 새로운 작업 수락 중단 (shutdown) - * 2. 진행 중인 작업 완료 대기 (30초) - * 3. 타임아웃 시 강제 종료 (shutdownNow) - * - * ## 필요한 이유 - * ``` - * Graceful Shutdown 없이: - * - 이벤트 처리 중 종료 - * - Kafka 전송은 했지만 DB 업데이트 안함 - * - 재시작 시 중복 발행 - * - * Graceful Shutdown 적용: - * - 진행 중인 이벤트 처리 완료 - * - DB 업데이트 완료 - * - 안전한 종료 - * ``` - * - * ## 타임아웃 - * - 30초: 정상 종료 대기 시간 - * - 1초: 강제 종료 후 재확인 시간 - */ @PreDestroy fun cleanup() { - log.info("Shutting down outbox relay executor service") + log.info("executor service 종료") executorService.shutdown() try { - // 30초 동안 정상 종료 대기 - if (!executorService.awaitTermination(30, java.util.concurrent.TimeUnit.SECONDS)) { - log.warn("Executor did not terminate gracefully, forcing shutdown") + if (!executorService.awaitTermination(30, TimeUnit.SECONDS)) { + log.warn("강제 종료") executorService.shutdownNow() - // 강제 종료 후 1초 대기 - if (!executorService.awaitTermination(1, java.util.concurrent.TimeUnit.SECONDS)) { - log.error("Executor did not terminate after forced shutdown") + if (!executorService.awaitTermination(1, TimeUnit.SECONDS)) { + log.error("Executor did not terminate") } } - - log.info("Executor service terminated successfully") } catch (e: InterruptedException) { - log.warn("Interrupted while waiting for executor termination") executorService.shutdownNow() Thread.currentThread().interrupt() } diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/EventBatchProcessor.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/EventBatchProcessor.kt deleted file mode 100644 index 3d0cd24..0000000 --- a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/EventBatchProcessor.kt +++ /dev/null @@ -1,144 +0,0 @@ -package io.github.hyungkishin.transentia.relay.component - -import com.fasterxml.jackson.databind.ObjectMapper -import io.github.hyungkishin.transentia.common.outbox.transfer.ClaimedRow -import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventAvroModel -import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventType -import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferStatus -import io.github.hyungkishin.transentia.relay.model.ProcessingResult -import io.github.hyungkishin.transentia.relay.model.TransferPayload -import org.slf4j.LoggerFactory -import org.springframework.stereotype.Component -import java.util.Collections -import java.util.concurrent.CompletableFuture -import java.util.concurrent.ExecutorService -import java.util.concurrent.TimeUnit - -/** - * 이벤트 배치 처리 전담 클래스 - * - * 이벤트 목록을 받아서 병렬 처리하고 결과를 반환한다. - */ -@Component -class EventBatchProcessor( - private val kafkaEventPublisher: KafkaEventPublisher, - private val objectMapper: ObjectMapper, - private val retryPolicyHandler: RetryPolicyHandler, - private val executorService: ExecutorService -) { - private val log = LoggerFactory.getLogger(javaClass) - - /** - * 배치를 청크 단위로 나누어 병렬 처리 - */ - fun processBatch( - batch: List, - topicName: String, - chunkSize: Int = Runtime.getRuntime().availableProcessors() * 2, - timeoutSeconds: Long = 5 - ): ProcessingResult { - val successIds = Collections.synchronizedList(mutableListOf()) - val failedEvents = Collections.synchronizedList(mutableListOf()) - - batch.chunked(chunkSize).forEach { chunk -> - processChunk(chunk, topicName, timeoutSeconds, successIds, failedEvents) - } - - return ProcessingResult( - successIds = successIds.toList(), - failedEvents = failedEvents.toList() - ) - } - - private fun processChunk( - chunk: List, - topicName: String, - timeoutSeconds: Long, - successIds: MutableList, - failedEvents: MutableList - ) { - val futures = chunk.map { row -> - CompletableFuture.supplyAsync({ - processEvent(row, topicName) - }, executorService) - } - - // 청크별 완료 대기 - futures.forEach { future -> - try { - val result = future.get(timeoutSeconds, TimeUnit.SECONDS) - if (result.isSuccess) { - successIds.add(result.eventId) - } else { - failedEvents.add(result.toFailedEvent()) - } - } catch (e: Exception) { - log.warn("Future processing failed: ${e.message}") - } - } - } - - private fun processEvent(row: ClaimedRow, topicName: String): EventProcessingResult { - return try { - val eventModel = createKafkaEventModel(row) - kafkaEventPublisher.publish(topicName, eventModel) - return EventProcessingResult.success(row.eventId) - } catch (e: Exception) { - val shouldRetry = retryPolicyHandler.shouldRetry(e) - val errorMessage = if (shouldRetry) { - e.message ?: "Send failed" - } else { - "Non-retryable error: ${e.message ?: "Send failed"}" - } - return EventProcessingResult.failure(row.eventId, errorMessage, if (shouldRetry) 0 else 999) - } - } - - private fun createKafkaEventModel(row: ClaimedRow): TransferEventAvroModel { - val payload = objectMapper.readValue(row.payload, TransferPayload::class.java) - - return TransferEventAvroModel.newBuilder() - .setEventId(row.eventId) - .setEventType(determineEventType(payload)) - .setAggregateId(row.aggregateId) - .setTransactionId(payload.transactionId) - .setSenderId(payload.senderId) - .setReceiverId(payload.receiverUserId) - .setAmount(payload.amount.toString()) - .setStatus(TransferStatus.valueOf(payload.status)) - .setOccurredAt(payload.occurredAt) - .setHeaders(row.headers) - .setCreatedAt(System.currentTimeMillis()) - .build() - } - - private fun determineEventType(payload: TransferPayload): TransferEventType { - return when (payload.status) { - "COMPLETED" -> TransferEventType.TRANSFER_COMPLETED - "FAILED" -> TransferEventType.TRANSFER_FAILED - else -> TransferEventType.TRANSFER_COMPLETED - } - } -} - -/** - * 개별 이벤트 처리 결과 - */ -private data class EventProcessingResult( - val eventId: Long, - val isSuccess: Boolean, - val errorMessage: String? = null, - val attemptCount: Int = 0 -) { - companion object { - fun success(eventId: Long) = EventProcessingResult(eventId, true) - fun failure(eventId: Long, error: String, attemptCount: Int) = - EventProcessingResult(eventId, false, error, attemptCount) - } - - fun toFailedEvent() = ProcessingResult.FailedEvent( - eventId = eventId, - error = errorMessage ?: "Unknown error", - attemptCount = attemptCount - ) -} \ No newline at end of file diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxEventProcessor.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxEventProcessor.kt new file mode 100644 index 0000000..fc32ebe --- /dev/null +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxEventProcessor.kt @@ -0,0 +1,38 @@ +package io.github.hyungkishin.transentia.relay.component + +import com.fasterxml.jackson.databind.ObjectMapper +import io.github.hyungkishin.transentia.common.outbox.transfer.ClaimedRow +import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventAvroModel +import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventType +import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferStatus +import io.github.hyungkishin.transentia.relay.model.TransferPayload +import org.springframework.stereotype.Component + +/** + * ClaimedRow 를 TransferEventAvroModel 로 변환한다. + */ +@Component +class OutboxEventProcessor( + private val objectMapper: ObjectMapper +) { + fun process(row: ClaimedRow): TransferEventAvroModel { + val payload = objectMapper.readValue(row.payload, TransferPayload::class.java) + + return TransferEventAvroModel.newBuilder() + .setEventId(row.eventId) + .setEventType( + if (payload.status == "COMPLETED") TransferEventType.TRANSFER_COMPLETED + else TransferEventType.TRANSFER_FAILED + ) + .setAggregateId(row.aggregateId) + .setTransactionId(payload.transactionId) + .setSenderId(payload.senderId) + .setReceiverId(payload.receiverUserId) + .setAmount(payload.amount.toString()) + .setStatus(TransferStatus.valueOf(payload.status)) + .setOccurredAt(payload.occurredAt) + .setHeaders(row.headers) + .setCreatedAt(System.currentTimeMillis()) + .build() + } +} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/KafkaEventPublisher.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxEventWriter.kt similarity index 50% rename from services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/KafkaEventPublisher.kt rename to services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxEventWriter.kt index 344d96e..7746e39 100644 --- a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/KafkaEventPublisher.kt +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxEventWriter.kt @@ -6,31 +6,26 @@ import org.slf4j.LoggerFactory import org.springframework.stereotype.Component /** - * Kafka 이벤트 발행 전담 클래스 - * - * 단일 책임: Kafka로 메시지를 안전하게 전송 + * TransferEventAvroModel을 Kafka로 전송한다. */ @Component -class KafkaEventPublisher( +class OutboxEventWriter( private val kafkaProducer: KafkaProducer ) { private val log = LoggerFactory.getLogger(javaClass) /** - * 이벤트를 Kafka로 동기 전송 - * - * @throws Exception 전송 실패 시 예외 발생 + * 단일 아이템 쓰기 + * + * @throws Exception Kafka 전송 실패 시 예외 */ - fun publish(topicName: String, event: TransferEventAvroModel) { + fun write(topicName: String, event: TransferEventAvroModel) { try { kafkaProducer.sendSync(topicName, event) - log.debug("Successfully published event: eventId={}, type={}", - event.eventId, event.eventType) + log.debug("Successfully wrote event: eventId={}", event.eventId) } catch (e: Exception) { - log.error("Failed to publish event: eventId={}, error={}", - event.eventId, e.message, e) - // 재전송을 위해 예외를 다시 던짐 + log.error("Failed to write event: eventId={}, error={}", event.eventId, e.message) throw e } } -} \ No newline at end of file +} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxItemReader.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxItemReader.kt new file mode 100644 index 0000000..f1b3bfd --- /dev/null +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxItemReader.kt @@ -0,0 +1,65 @@ +package io.github.hyungkishin.transentia.relay.component + +import io.github.hyungkishin.transentia.application.required.TransferEventsOutboxRepository +import io.github.hyungkishin.transentia.common.outbox.transfer.ClaimedRow +import io.github.hyungkishin.transentia.relay.config.OutboxRelayConfig +import org.slf4j.LoggerFactory +import org.springframework.stereotype.Component +import java.time.Instant + +/** + * Thread-safe ItemReader (Spring Batch Multi-threaded Step) + * + * 배치 + 커서 방식: + * - DB에서 한번에 N건 조회 (Batch) + * - 메모리에서 순차 반환 (Cursor) + */ +@Component +class OutboxItemReader( + private val repository: TransferEventsOutboxRepository, + private val config: OutboxRelayConfig +) { + private val log = LoggerFactory.getLogger(javaClass) + + private var items: List = emptyList() + private var currentIndex: Int = 0 + + @Synchronized + fun read(): ClaimedRow? { + // 메모리에 읽지 않은 아이템이 남아있으면 반환한다. + if (currentIndex < items.size) { + return items[currentIndex++] + } + + fetchItems() + + if (items.isEmpty()) { + return null + } + + return items[currentIndex++] + } + + private fun fetchItems() { + try { + items = repository.claimBatch( + limit = config.chunkSize, + now = Instant.now(), + sendingTimeoutSeconds = config.sendingTimeoutSeconds + ) + currentIndex = 0 + + log.debug("Fetched {} items from outbox", items.size) + + } catch (e: Exception) { + log.error("Failed to fetch items from outbox", e) + items = emptyList() + currentIndex = 0 + } + } + + fun reset() { + items = emptyList() + currentIndex = 0 + } +} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxWorker.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxWorker.kt new file mode 100644 index 0000000..9073e17 --- /dev/null +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxWorker.kt @@ -0,0 +1,56 @@ +package io.github.hyungkishin.transentia.relay.component + +import io.github.hyungkishin.transentia.relay.model.ProcessingResult +import org.slf4j.LoggerFactory +import java.util.concurrent.ConcurrentLinkedQueue + +class OutboxWorker( + private val workerId: Int, + private val reader: OutboxItemReader, + private val processor: OutboxEventProcessor, + private val writer: OutboxEventWriter, + private val topicName: String, + private val successIds: ConcurrentLinkedQueue, + private val failedEvents: ConcurrentLinkedQueue +) : Runnable { + + private val log = LoggerFactory.getLogger(javaClass) + private var processedCount = 0 + + override fun run() { + try { + while (true) { + val item = reader.read() ?: break + + processItem(item) + + processedCount++ + } + + log.debug("Worker-{} finished: {} items", workerId, processedCount) + } catch (e: Exception) { + log.error("Worker-{} failed", workerId, e) + } + } + + private fun processItem(row: io.github.hyungkishin.transentia.common.outbox.transfer.ClaimedRow) { + try { + val avroModel = processor.process(row) + + writer.write(topicName, avroModel) + + successIds.add(row.eventId) + + } catch (e: Exception) { + // 실패 + log.warn("Failed to process event: eventId={}, error={}", row.eventId, e.message) + failedEvents.add( + ProcessingResult.FailedEvent( + eventId = row.eventId, + error = e.message ?: "Unknown error", + attemptCount = row.attemptCount + ) + ) + } + } +} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/RetryPolicyHandler.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/RetryPolicyHandler.kt deleted file mode 100644 index 6946978..0000000 --- a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/RetryPolicyHandler.kt +++ /dev/null @@ -1,42 +0,0 @@ -package io.github.hyungkishin.transentia.relay.component - -import org.springframework.beans.factory.annotation.Value -import org.springframework.stereotype.Component - -/** - * 재시도 정책 전담 클래스 - * - * 단일 책임: 예외 분석 및 백오프 계산 - */ -@Component -class RetryPolicyHandler( - @Value("\${app.outbox.relay.baseBackoffMs:5000}") private val baseBackoffMs: Long, - @Value("\${app.outbox.relay.maxBackoffMs:600000}") private val maxBackoffMs: Long -) { - - /** - * 예외 유형에 따라 재시도 가능 여부 판단 - */ - fun shouldRetry(exception: Exception): Boolean { - return when (exception) { - // 네트워크/일시적 장애 - 재시도 가능 - is org.apache.kafka.common.errors.TimeoutException, - is org.apache.kafka.common.errors.NetworkException, - is org.apache.kafka.common.errors.RetriableException -> true - - // 데이터/설정 오류 - 재시도 불필요 - is org.apache.kafka.common.errors.SerializationException, - is org.apache.kafka.common.errors.InvalidTopicException -> false - - // 기타 예외는 재시도 시도 - else -> true - } - } - - /** - * 지수 백오프 계산 (5초 -> 10초 -> 20초 -> ... 최대 10분) - */ - fun calculateBackoff(attemptCount: Int): Long { - return minOf(baseBackoffMs * (1L shl (attemptCount - 1)), maxBackoffMs) - } -} \ No newline at end of file diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/ExecutorServiceConfig.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/ExecutorServiceConfig.kt index d8a74ea..7bb41d9 100644 --- a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/ExecutorServiceConfig.kt +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/ExecutorServiceConfig.kt @@ -5,15 +5,14 @@ import org.springframework.context.annotation.Configuration import java.util.concurrent.ExecutorService import java.util.concurrent.Executors -/** - * ExecutorService Bean 설정 - */ @Configuration class ExecutorServiceConfig( private val config: OutboxRelayConfig ) { + @Bean("outboxExecutorService") fun outboxExecutorService(): ExecutorService { return Executors.newFixedThreadPool(config.threadPoolSize) } -} \ No newline at end of file + +} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/OutboxRelayConfig.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/OutboxRelayConfig.kt index 49e1f60..d609c4b 100644 --- a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/OutboxRelayConfig.kt +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/OutboxRelayConfig.kt @@ -4,133 +4,31 @@ import org.springframework.boot.context.properties.ConfigurationProperties /** * Outbox Relay 설정 - * - * Outbox 패턴을 구현하는 Relay 서버의 동작을 제어하는 설정값들을 정의한다. - * 이 설정들은 application.yml의 app.outbox.relay 하위에 정의되며, - * 환경변수를 통해 동적으로 변경 가능하다. - * - * 중요한 설정값은 다음과 같다: - * 1. 배치 처리 설정 (batchSize, fixedDelayMs) - * 2. 멀티 스레드 설정 (threadPoolSize) - * 3. 재시도 정책 (baseBackoffMs, maxBackoffMs, stuckThresholdSeconds) - * 4. 성능 모니터링 (slowProcessingThresholdMs) */ @ConfigurationProperties(prefix = "app.outbox.relay") data class OutboxRelayConfig( - /** - * 한 번에 처리할 최대 이벤트 수 - * - * 값이 클수록 다음과 같은 장/단점을 갖는다: - * - 장점: DB 쿼리 횟수 감소, 처리량 증가 - * - 단점: 메모리 사용량 증가, 처리 시간 증가 - * - * ## 성능 계산 - * - 평시 200 TPS 기준 - * - 1초당 1회 실행 - * - batchSize 500 = 2.5초분 버퍼 - */ - val batchSize: Int = 500, - /** - * 배치 처리 간격 (밀리초) - * - * fixedDelay 방식으로 이전 배치 처리 완료 후 대기 시간 - * 처리 시간과 무관하게 일정 간격 유지 - * - * ## 예시 - * - 배치 처리: 50ms - * - fixedDelay: 1000ms - * - 총 주기: 1050ms - */ + /** DB 조회 배치 크기 */ + val chunkSize: Int = 100, + + /** 스케줄링 간격 (ms) */ val fixedDelayMs: Long = 1000, - /** - * 애플리케이션 시작 후 첫 실행까지 대기 시간 (밀리초) - * - * 애플리케이션 초기화 시간 확보 (Kafka 연결, DB 초기화 등) - * - * 권장값: 5000ms (5초) - */ + /** 첫 실행 지연 시간 (ms) */ val initialDelayMs: Long = 5000, - /** - * 멀티 스레드 풀 크기 - * - * EventBatchProcessor에서 병렬 Kafka 전송 시 사용 - * - * ## 계산식 - * - I/O 바운드 작업: CPU 코어 수 × 2 - * - 4코어: 8 스레드 - * - 8코어: 16 스레드 - * - * ## 성능 예측 (8 스레드 기준 입니다.) - * - 배치 크기: 500 - * - 청크 크기: 16 (Runtime.availableProcessors() * 2) - * - 청크 수: 500 / 16 = 32 (청크) - */ - val threadPoolSize: Int = 8, + /** Worker 스레드 개수 */ + val threadPoolSize: Int = Runtime.getRuntime().availableProcessors() * 2, - /** - * Kafka 전송 타임아웃 (초) - * - * 개별 이벤트 전송 시 최대 대기 시간 - * 타임아웃 초과 시 재시도 또는 실패 처리 - */ + /** Worker 타임아웃 (초) */ val timeoutSeconds: Long = 5, - /** - * 재시도 기본 백오프 시간 (밀리초) - * - * 실패한 이벤트의 첫 재시도 대기 시간 (지수 백오프 시작점) - * - * ## 재시도 패턴 (2배씩 증가) - * - 1차 실패: 5초 후 재시도 - * - 2차 실패: 10초 후 재시도 - * - 3차 실패: 20초 후 재시도 - * - 4차 실패: 40초 후 재시도 - * - 5차 실패: 80초 후 재시도 (maxBackoffMs로 제한) - */ + /** 첫 재시도 백오프 시간 (ms) - 지수 증가 */ val baseBackoffMs: Long = 5000, - /** - * 재시도 최대 백오프 시간 (밀리초) - * - * 지수 백오프의 상한선 (무한정 증가 방지) - */ - val maxBackoffMs: Long = 600000, - - /** - * Stuck SENDING 감지 임계값 (초) - * - * SENDING 상태로 이 시간 이상 경과한 이벤트를 Stuck으로 간주하여 재처리 - * - * ## 배경 - * - Kafka 전송 후 markAsPublished 실패 시 SENDING 상태로 방치 - * - 서버 다운 시 SENDING 상태로 남을 수 있음 - * - 이 임계값 후 자동 복구 - * - * ## 트레이드오프 - * - 짧게 설정: 빠른 복구, 중복 발행 위험 증가 - * - 길게 설정: 느린 복구, 중복 발행 위험 감소 - * - * ## 중복 발행 대응 - * - FDS 컨슈머에서 event_id 기반 멱등성 보장 필수 - * - * 권장값: 120초 (2분) - * 이전값: 600초 (10분) - 너무 길어서 단축 - */ - val stuckThresholdSeconds: Long = 120, + /** Stuck SENDING 판단 기준 (초) */ + val sendingTimeoutSeconds: Long = 120, - /** - * 느린 처리 경고 임계값 (밀리초) - * - * 배치 처리 시간이 이 값 초과 시 경고 로그 출력 - * 성능 모니터링 및 병목 지점 파악용 - * - * ## 느린 처리의 원인 - * - DB 성능 저하 (슬로우 쿼리, 락 대기) - * - Kafka 성능 저하 (브로커 과부하, 네트워크 지연) - * - 애플리케이션 문제 (GC, 스레드 풀 포화) - */ + /** 느린 처리 경고 임계값 (ms) */ val slowProcessingThresholdMs: Long = 3000 ) diff --git a/services/transfer/instances/transfer-relay/src/main/resources/application-peak.yml b/services/transfer/instances/transfer-relay/src/main/resources/application-peak.yml index 94b4f41..bac1077 100644 --- a/services/transfer/instances/transfer-relay/src/main/resources/application-peak.yml +++ b/services/transfer/instances/transfer-relay/src/main/resources/application-peak.yml @@ -7,7 +7,5 @@ spring: app: outbox.relay: - batchSize: 1000 # 피크시 배치 크기 증가 (500 → 1000) - fixedDelayMs: 200 # 더 자주 처리 (1000ms → 200ms) - threadPoolSize: 16 # 멀티 스레드 증가 (8 → 16) - stuckThresholdSeconds: 60 # Stuck 감지 시간 단축 (120초 → 60초) + fixedDelayMs: 200 # 더 자주 처리 (1000ms -> 200ms) + threadPoolSize: 16 # 멀티 스레드 증가 (8 -> 16) diff --git a/services/transfer/instances/transfer-relay/src/main/resources/application.yml b/services/transfer/instances/transfer-relay/src/main/resources/application.yml index 76c7aaa..4eb8eff 100644 --- a/services/transfer/instances/transfer-relay/src/main/resources/application.yml +++ b/services/transfer/instances/transfer-relay/src/main/resources/application.yml @@ -57,22 +57,13 @@ kafka-consumer-config: app: outbox: relay: - # 배치 처리 설정 - batchSize: 500 # 한 번에 처리할 최대 이벤트 수 - fixedDelayMs: 1000 # 배치 처리 간격 (1초) - initialDelayMs: 5000 # 애플리케이션 시작 후 첫 실행 대기 시간 (5초) - - # 멀티 스레드 설정 - threadPoolSize: 8 # 멀티 스레드 풀 크기 (CPU 코어 수 × 2 권장) - timeoutSeconds: 10 # Kafka 전송 타임아웃 (초) - - # 재시도 정책 - baseBackoffMs: 5000 # 재시도 기본 백오프 시간 (5초) - maxBackoffMs: 600000 # 재시도 최대 백오프 시간 (10분) - stuckThresholdSeconds: 120 # Stuck SENDING 감지 임계값 (2분) - - # 성능 모니터링 - slowProcessingThresholdMs: 3000 # 느린 처리 경고 임계값 (3초) + chunkSize: 100 + fixedDelayMs: 1000 + initialDelayMs: 5000 + timeoutSeconds: 10 + baseBackoffMs: 5000 + sendingTimeoutSeconds: 120 + slowProcessingThresholdMs: 3000 kafka: topics: diff --git a/services/transfer/instances/transfer-relay/src/test/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelayIntegrationTest.kt b/services/transfer/instances/transfer-relay/src/test/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelayIntegrationTest.kt index b4eccfc..e69de29 100644 --- a/services/transfer/instances/transfer-relay/src/test/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelayIntegrationTest.kt +++ b/services/transfer/instances/transfer-relay/src/test/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelayIntegrationTest.kt @@ -1,281 +0,0 @@ -package io.github.hyungkishin.transentia.relay - -import com.fasterxml.jackson.databind.ObjectMapper -import io.github.hyungkishin.transentia.application.required.TransferEventsOutboxRepository -import io.github.hyungkishin.transentia.common.outbox.transfer.ClaimedRow -import io.github.hyungkishin.transentia.relay.config.OutboxRelayConfig -import io.github.hyungkishin.transentia.relay.model.TransferPayload -import org.junit.jupiter.api.BeforeEach -import org.junit.jupiter.api.Test -import org.mockito.kotlin.* -import org.springframework.beans.factory.annotation.Autowired -import org.springframework.boot.test.context.SpringBootTest -import org.springframework.boot.test.mock.mockito.MockBean -import java.time.Instant - -/** - * 멀티 스레드 기반 단일 인스턴스 통합 테스트 - */ -@SpringBootTest(properties = ["spring.task.scheduling.enabled=false"]) -class TransferOutboxRelayIntegrationTest { - - @Autowired - private lateinit var relay: TransferOutboxRelay - - @MockBean - private lateinit var repository: TransferEventsOutboxRepository - - @Autowired - private lateinit var objectMapper: ObjectMapper - - @Autowired - private lateinit var config: OutboxRelayConfig - - @BeforeEach - fun setUp() { - reset(repository) - println("=== Relay 통합 테스트 설정 ===") - println("배치 크기: ${config.batchSize}") - println("스레드 풀: ${config.threadPoolSize}") - println("Stuck 임계값: ${config.stuckThresholdSeconds}초") - println("===========================") - } - - @Test - fun `빈 Outbox에서 relay 실행시 아무것도 처리하지 않음 - claimBatch만 호출되고 다른 메서드는 호출되지 않아야 한다`() { - // Given - whenever(repository.claimBatch(any(), any(), any())) - .thenReturn(emptyList()) - - // When - relay.run() - - // Then - verify(repository).claimBatch( - limit = eq(config.batchSize), - now = any(), - stuckThresholdSeconds = eq(config.stuckThresholdSeconds) - ) - - verify(repository, never()) - .markAsPublished(any(), any()) - - verify(repository, never()) - .markFailedWithBackoff(any(), any(), any(), any()) - } - - @Test - fun `단일 PENDING 이벤트를 성공적으로 처리 - 재시도 로직으로 최대 3회 시도되어야 한다`() { - // Given - val claimedEvent = createClaimedRow(1L) - whenever(repository.claimBatch(any(), any(), any())).thenReturn(listOf(claimedEvent)) - - // When - relay.run() - - // Then - verify(repository).claimBatch( - limit = eq(config.batchSize), - now = any(), - stuckThresholdSeconds = eq(config.stuckThresholdSeconds) - ) - - verify(repository, atLeast(1)).markAsPublished(eq(listOf(1L)), any()) - verify(repository, never()).markFailedWithBackoff(any(), any(), any(), any()) - } - - @Test - fun `다중 PENDING 이벤트를 배치로 처리 - 모든 이벤트가 성공적으로 처리되어야 한다`() { - // Given - val batchSize = 5 - val claimedEvents = (1L..batchSize).map { createClaimedRow(it) } - whenever(repository.claimBatch(any(), any(), any())).thenReturn(claimedEvents) - - // When - relay.run() - - // Then - verify(repository).claimBatch( - limit = eq(config.batchSize), - now = any(), - stuckThresholdSeconds = eq(config.stuckThresholdSeconds) - ) - verify(repository, atLeast(1)).markAsPublished(eq((1L..batchSize).toList()), any()) - verify(repository, never()).markFailedWithBackoff(any(), any(), any(), any()) - } - - @Test - fun `첫 번째 claimBatch에서 빈 결과 반환시 처리 종료 - claimBatch 한 번만 호출되고 종료되어야 한다`() { - // Given - whenever(repository.claimBatch(any(), any(), any())).thenReturn(emptyList()) - - // When - relay.run() - - // Then - verify(repository, times(1)).claimBatch( - limit = eq(config.batchSize), - now = any(), - stuckThresholdSeconds = eq(config.stuckThresholdSeconds) - ) - verify(repository, never()).markAsPublished(any(), any()) - } - - @Test - fun `대용량 배치를 단일 실행에서 처리 - claimBatch이 호출되고 모든 이벤트 처리되어야 한다`() { - // Given - val batchEvents = (1L..config.batchSize.toLong()).map { createClaimedRow(it) } - whenever(repository.claimBatch(any(), any(), any())).thenReturn(batchEvents) - - // When - relay.run() - - // Then - verify(repository, atLeast(1)).claimBatch( - limit = eq(config.batchSize), - now = any(), - stuckThresholdSeconds = eq(config.stuckThresholdSeconds) - ) - verify(repository, atLeast(1)).markAsPublished(eq((1L..config.batchSize.toLong()).toList()), any()) - } - - @Test - fun `Repository 메서드들의 호출 순서와 인자 검증`() { - // Given - val eventIds = listOf(100L, 101L, 102L) - val claimedEvents = eventIds.map { createClaimedRow(it) } - whenever(repository.claimBatch(any(), any(), any())).thenReturn(claimedEvents) - - // When - relay.run() - - // Then - verify(repository, atLeast(1)).claimBatch( - limit = eq(config.batchSize), - now = any(), - stuckThresholdSeconds = eq(config.stuckThresholdSeconds) - ) - verify(repository, atLeast(1)).markAsPublished(eq(eventIds), any()) - } - - @Test - fun `markAsPublished 실패 시 재시도 로직 - 재시도로 2번 호출 (1번 실패 + 1번 성공) 해야 한다`() { - // Given - val claimedEvent = createClaimedRow(1L) - whenever(repository.claimBatch(any(), any(), any())).thenReturn(listOf(claimedEvent)) - - // 첫 번째 실패, 두 번째 성공 - whenever(repository.markAsPublished(any(), any())) - .thenThrow(RuntimeException("Temporary DB connection failed")) - .thenAnswer { } - - // When - relay.run() - - // Then - verify(repository, times(2)).markAsPublished(eq(listOf(1L)), any()) - } - - @Test - fun `단일 실행에서 다중 이벤트 배치 처리 확인 - 한 번의 claimBatch으로 모든 이벤트 처리 되어야 한다`() { - // Given - val batch = listOf(createClaimedRow(1L), createClaimedRow(2L)) - whenever(repository.claimBatch(any(), any(), any())) - .thenReturn(batch) - - // When - relay.run() - - // Then - verify(repository, times(1)).claimBatch( - limit = eq(config.batchSize), - now = any(), - stuckThresholdSeconds = eq(config.stuckThresholdSeconds) - ) - verify(repository, atLeast(1)) - .markAsPublished(eq(listOf(1L, 2L)), any()) - } - - @Test - fun `attempt_count가 높은 이벤트도 정상 처리됨 - attempt_count와 상관없이 정상 처리됨`() { - // Given - val highAttemptEvent = createClaimedRow(1L, attemptCount = 4) - whenever(repository.claimBatch(any(), any(), any())).thenReturn(listOf(highAttemptEvent)) - - // When - relay.run() - - // Then - verify(repository, atLeast(1)).claimBatch( - limit = eq(config.batchSize), - now = any(), - stuckThresholdSeconds = eq(config.stuckThresholdSeconds) - ) - verify(repository, atLeast(1)) - .markAsPublished(eq(listOf(1L)), any()) - } - - @Test - fun `멀티 스레드 처리 확인 - 대량 이벤트 (멀티 스레드로 빠르게 처리되어야 한다)`() { - // Given - val largeBatch = (1L..100L).map { createClaimedRow(it) } - whenever(repository.claimBatch(any(), any(), any())) - .thenReturn(largeBatch) - - // When - val startTime = System.currentTimeMillis() - relay.run() - val endTime = System.currentTimeMillis() - - // Then - println("100개 이벤트 처리 시간: ${endTime - startTime}ms") - - verify(repository, atLeast(1)) - .claimBatch(any(), any(), any()) - - verify(repository, atLeast(1)) - .markAsPublished(eq(largeBatch.map { it.eventId }), any()) - } - - /** - * 테스트용 ClaimedRow 생성 헬퍼 메서드 입니다. - * Transfer 도메인에 의존하지 않고 필요한 데이터만 생성합니다. - */ - private fun createClaimedRow( - eventId: Long, - attemptCount: Int = 0 - ): ClaimedRow { - return ClaimedRow( - eventId = eventId, - aggregateId = "transaction-$eventId", - payload = createTransferPayload(eventId), - headers = createEventHeaders(eventId), - attemptCount = attemptCount - ) - } - - private fun createTransferPayload(transactionId: Long): String { - val payload = TransferPayload( - transactionId = transactionId, - senderId = 10000 + transactionId, - receiverUserId = 20000 + transactionId, - amount = (100000..10000000).random().toLong(), - status = "COMPLETED", - occurredAt = Instant.now().toEpochMilli() - ) - return objectMapper.writeValueAsString(payload) - } - - private fun createEventHeaders(eventId: Long): String { - return objectMapper.writeValueAsString( - mapOf( - "eventType" to "TRANSFER_COMPLETED", - "eventVersion" to "v1", - "traceId" to "test-trace-$eventId-${System.currentTimeMillis()}", - "producer" to "transfer-api", - "contentType" to "application/json" - ) - ) - } - -} diff --git a/services/transfer/instances/transfer-relay/src/test/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelayPerformanceTest.kt b/services/transfer/instances/transfer-relay/src/test/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelayPerformanceTest.kt deleted file mode 100644 index 23dc57a..0000000 --- a/services/transfer/instances/transfer-relay/src/test/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelayPerformanceTest.kt +++ /dev/null @@ -1,271 +0,0 @@ -package io.github.hyungkishin.transentia.relay - -import com.fasterxml.jackson.databind.ObjectMapper -import io.github.hyungkishin.transentia.relay.config.OutboxRelayConfig -import io.github.hyungkishin.transentia.relay.model.TransferPayload -import org.junit.jupiter.api.AfterEach -import org.junit.jupiter.api.Assertions.assertEquals -import org.junit.jupiter.api.BeforeEach -import org.junit.jupiter.api.Disabled -import org.junit.jupiter.api.Test -import org.springframework.beans.factory.annotation.Autowired -import org.springframework.boot.test.context.SpringBootTest -import org.springframework.jdbc.core.JdbcTemplate -import java.time.Instant - -@SpringBootTest -class TransferOutboxRelayPerformanceTest { - - @Autowired - private lateinit var relay: TransferOutboxRelay - - @Autowired - private lateinit var jdbcTemplate: JdbcTemplate - - @Autowired - private lateinit var objectMapper: ObjectMapper - - @Autowired - private lateinit var config: OutboxRelayConfig - - @BeforeEach - fun setUp() { - jdbcTemplate.execute("DELETE FROM transfer_events") - println("=== 성능 테스트 설정 ===") - println("배치 크기: ${config.batchSize}") - println("스레드 풀 크기: ${config.threadPoolSize}") - println("타임아웃: ${config.timeoutSeconds}초") - println("==================") - } - - @AfterEach - fun tearDown() { - jdbcTemplate.execute("DELETE FROM transfer_events") - } - - @Test - @Disabled("수동 실행용 - CI에서 제외") - fun `배치 크기별 처리 성능 측정`() { - val batchSizes = listOf(100, 300, 500) - - println("=== 배치 크기별 성능 측정 ===") - - batchSizes.forEach { batchSize -> - // 데이터 정리 후 테스트 데이터 생성 - jdbcTemplate.execute("DELETE FROM transfer_events") - val testEvents = createTestEvents(batchSize) - insertTestEvents(testEvents) - - // 처리 시간 측정 - val startTime = System.currentTimeMillis() - relay.run() - val processingTime = System.currentTimeMillis() - startTime - - val publishedCount = getEventCountByStatus("PUBLISHED") - val eventsPerSec = if (processingTime > 0) { - (batchSize * 1000.0 / processingTime).toInt() - } else { - Int.MAX_VALUE - } - - val within1Second = if (processingTime < 1000) "O" else "X" - val successRate = if (batchSize > 0) "%.1f%%".format(publishedCount * 100.0 / batchSize) else "0%" - - println("배치: $batchSize 개, 시간: ${processingTime}ms, 처리량: $eventsPerSec/sec, 성공률: $successRate $within1Second") - - // 검증 - assertEquals(batchSize, publishedCount, "모든 이벤트가 처리되어야 함") - } - } - - @Test - @Disabled("수동 실행용 - CI에서 제외") - fun `메모리 효율성 테스트`() { - val largeBatchSize = 500 - - println("=== 대용량 배치 메모리 효율성 테스트 ===") - - val testEvents = createTestEvents(largeBatchSize) - insertTestEvents(testEvents) - - // GC 실행으로 메모리 정리 - System.gc() - val beforeMemory = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory() - - val startTime = System.currentTimeMillis() - relay.run() - val processingTime = System.currentTimeMillis() - startTime - - System.gc() - val afterMemory = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory() - val memoryUsed = (afterMemory - beforeMemory) / (1024 * 1024) // MB - - val publishedCount = getEventCountByStatus("PUBLISHED") - val tps = (largeBatchSize * 1000.0 / processingTime).toInt() - - println("대용량 배치: $largeBatchSize 개") - println("처리 시간: ${processingTime}ms") - println("처리량: $tps TPS") - println("성공률: %.1f%%".format(publishedCount * 100.0 / largeBatchSize)) - println("메모리 사용량: ${memoryUsed}MB") - - // 메모리 사용량이 합리적인지 확인 (배치 크기 대비) - val memoryPerEvent = memoryUsed.toDouble() / largeBatchSize * 1024 // KB per event - println("이벤트당 메모리: %.2f KB".format(memoryPerEvent)) - - assertEquals(largeBatchSize, publishedCount, "모든 이벤트가 처리되어야 함") - assert(memoryPerEvent < 10) { "이벤트당 메모리 사용량이 10KB를 초과하면 안됨" } - } - - @Test - @Disabled("수동 실행용 - CI에서 제외") - fun `스레드 풀 효율성 테스트`() { - val batchSize = 500 - - println("=== 스레드 풀 효율성 테스트 ===") - - jdbcTemplate.execute("DELETE FROM transfer_events") - - val testEvents = createTestEvents(batchSize) - insertTestEvents(testEvents) - - // 처리 전 스레드 상태 확인 - val threadGroup = Thread.currentThread().threadGroup - val beforeThreadCount = threadGroup.activeCount() - - val startTime = System.currentTimeMillis() - relay.run() - val processingTime = System.currentTimeMillis() - startTime - - val afterThreadCount = threadGroup.activeCount() - val publishedCount = getEventCountByStatus("PUBLISHED") - - println("배치 크기: $batchSize") - println("처리 시간: ${processingTime}ms") - println("처리 전 스레드: $beforeThreadCount") - println("처리 후 스레드: $afterThreadCount") - println("스레드 증가: ${afterThreadCount - beforeThreadCount}") - println("성공률: %.1f%%".format(publishedCount * 100.0 / batchSize)) - println("실제 처리: $publishedCount / $batchSize") - - assertEquals(batchSize, publishedCount, "모든 이벤트가 처리되어야 함") - } - - @Test - fun `기본 성능 테스트 - Mock Kafka`() { - println("=== 기본 성능 테스트 (Mock Kafka) ===") - - // Kafka 연결 없이 DB 로직만 테스트 - val testSizes = listOf(5, 10) - - testSizes.forEach { size -> - jdbcTemplate.execute("DELETE FROM transfer_events") - - val events = createTestEvents(size) - insertTestEvents(events) - - val startTime = System.currentTimeMillis() - - // Kafka 전송은 실패하지만 DB 저장 로직은 확인 가능 - try { - relay.run() - val processingTime = System.currentTimeMillis() - startTime - println("배치: $size 개, 처리 시간: ${processingTime}ms (Kafka 전송 성공)") - } catch (e: Exception) { - val processingTime = System.currentTimeMillis() - startTime - println("배치: $size 개, 처리 시간: ${processingTime}ms (Kafka 연결 실패 - 예상됨)") - println("에러: ${e.message}") - } - } - - // DB 접근은 정상 동작하는지 확인 - val totalEvents = jdbcTemplate.queryForObject( - "SELECT COUNT(*) FROM transfer_events", - Int::class.java - ) ?: 0 - - assert(totalEvents >= 0) { "DB 접근이 정상적으로 동작해야 함" } - println("총 이벤트 수: $totalEvents") - println("=== 기본 성능 테스트 완료 ===") - } - - @Test - @Disabled("수동 실행용 - CI에서 제외") - fun `대용량 처리 시뮬레이션 - 다중 사이클`() { - val largeBatch = 10000 - val events = createTestEvents(largeBatch) - insertTestEvents(events) - - var totalProcessed = 0 - var cycles = 0 - val startTime = System.currentTimeMillis() - - // 운영에서 스케줄러가 여러 번 실행되는 것을 시뮬레이션 - while (totalProcessed < largeBatch && cycles < 20) { - relay.run() - totalProcessed = getEventCountByStatus("PUBLISHED") - cycles++ - } - - val totalTime = System.currentTimeMillis() - startTime - println("총 $cycles 사이클로 $totalProcessed 개 처리, 시간: ${totalTime}ms") - } - - private fun createTestEvents(count: Int): List { - return (1..count).map { i -> - TestEvent( - eventId = i.toLong(), - aggregateId = "transaction-$i", - payload = createTransferPayload(i.toLong()) - ) - } - } - - private fun createTransferPayload(transactionId: Long): String { - val payload = TransferPayload( - transactionId = transactionId, - senderId = 10000 + transactionId, - receiverUserId = 20000 + transactionId, - amount = (100000..10000000).random().toLong(), - status = "COMPLETED", - occurredAt = Instant.now().toEpochMilli() - ) - return objectMapper.writeValueAsString(payload) - } - - private fun insertTestEvents(events: List) { - val sql = """ - INSERT INTO transfer_events (event_id, aggregate_id, aggregate_type, event_type, payload, headers, status, created_at, updated_at) - VALUES (?, ?, ?, ?, ?::jsonb, ?::jsonb, ?::transfer_outbox_status, now(), now()) - """.trimIndent() - - val batchArgs = events.map { event -> - arrayOf( - event.eventId, - event.aggregateId, - "Transaction", - "TransferCompleted", - event.payload, - "{}", - "PENDING" - ) - } - - jdbcTemplate.batchUpdate(sql, batchArgs) - println("테스트 데이터 ${events.size}개 생성 완료") - } - - private fun getEventCountByStatus(status: String): Int { - return jdbcTemplate.queryForObject( - "SELECT COUNT(*) FROM transfer_events WHERE status::text = ?", - Int::class.java, - status - ) ?: 0 - } - - data class TestEvent( - val eventId: Long, - val aggregateId: String, - val payload: String - ) -} \ No newline at end of file diff --git a/services/transfer/instances/transfer-relay/src/test/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelayTest.kt b/services/transfer/instances/transfer-relay/src/test/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelayTest.kt index 2eb568a..e69de29 100644 --- a/services/transfer/instances/transfer-relay/src/test/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelayTest.kt +++ b/services/transfer/instances/transfer-relay/src/test/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelayTest.kt @@ -1,284 +0,0 @@ -package io.github.hyungkishin.transentia.relay - -import io.github.hyungkishin.transentia.common.outbox.transfer.ClaimedRow -import io.github.hyungkishin.transentia.infra.rdb.adapter.TransferEventsOutboxJdbcRepository -import io.github.hyungkishin.transentia.relay.component.EventBatchProcessor -import io.github.hyungkishin.transentia.relay.component.RetryPolicyHandler -import io.github.hyungkishin.transentia.relay.config.OutboxRelayConfig -import io.github.hyungkishin.transentia.relay.model.ProcessingResult -import org.junit.jupiter.api.BeforeEach -import org.junit.jupiter.api.Test -import org.junit.jupiter.api.extension.ExtendWith -import org.mockito.Mock -import org.mockito.Mockito.lenient -import org.mockito.junit.jupiter.MockitoExtension -import org.mockito.kotlin.* -import java.util.concurrent.ExecutorService - -/** - * 멀티 스레드 기반 단일 인스턴스의 단위 테스트 - */ -@ExtendWith(MockitoExtension::class) -class TransferOutboxRelayTest { - - @Mock - private lateinit var outboxRepository: TransferEventsOutboxJdbcRepository - - @Mock - private lateinit var eventBatchProcessor: EventBatchProcessor - - @Mock - private lateinit var retryPolicyHandler: RetryPolicyHandler - - @Mock - private lateinit var config: OutboxRelayConfig - - @Mock - private lateinit var executorService: ExecutorService - - private lateinit var relay: TransferOutboxRelay - - @BeforeEach - fun setUp() { - // 모든 테스트에서 사용하는 필수 설정 - whenever(config.batchSize).thenReturn(100) - whenever(config.stuckThresholdSeconds).thenReturn(120L) - - relay = TransferOutboxRelay( - outboxRepository = outboxRepository, - eventBatchProcessor = eventBatchProcessor, - retryPolicyHandler = retryPolicyHandler, - config = config, - executorService = executorService, - topicName = "transfer-transaction-events" - ) - } - - @Test - fun `빈 배치일 때 처리하지 않음`() { - // Given - whenever(outboxRepository.claimBatch(any(), any(), any())) - .thenReturn(emptyList()) - - // When - relay.run() - - // Then - verify(outboxRepository).claimBatch( - limit = eq(100), - now = any(), - stuckThresholdSeconds = eq(120L) - ) - - // eventBatchProcessor 호출 안함 - verifyNoInteractions(eventBatchProcessor) - - // retryPolicyHandler 호출 안함 - verifyNoInteractions(retryPolicyHandler) - } - - @Test - fun `배치 처리 성공시 이벤트들을 PUBLISHED로 마킹`() { - // Given - val batch = listOf(createMockClaimedRow(1L), createMockClaimedRow(2L)) - val successResult = ProcessingResult( - successIds = listOf(1L, 2L), - failedEvents = emptyList() - ) - - whenever(config.timeoutSeconds) - .thenReturn(5L) - - whenever(outboxRepository.claimBatch(any(), any(), any())) - .thenReturn(batch) - - whenever(eventBatchProcessor.processBatch(any(), any(), any(), any())) - .thenReturn(successResult) - - // When - relay.run() - - // Then - verify(outboxRepository).claimBatch( - limit = eq(100), - now = any(), - stuckThresholdSeconds = eq(120L) - ) - verify(eventBatchProcessor).processBatch( - batch = eq(batch), - topicName = eq("transfer-transaction-events"), - chunkSize = any(), - timeoutSeconds = eq(5L) - ) - - // 재시도 로직으로 최대 3회 시도 가능 - verify(outboxRepository, atLeast(1)) - .markAsPublished(eq(listOf(1L, 2L)), any()) - - verifyNoInteractions(retryPolicyHandler) - } - - @Test - fun `배치 처리 실패시 백오프 적용`() { - // Given - val batch = listOf(createMockClaimedRow(1L)) - val failedResult = ProcessingResult( - successIds = emptyList(), - failedEvents = listOf( - ProcessingResult.FailedEvent( - eventId = 1L, - error = "Kafka connection failed", - attemptCount = 1 - ) - ) - ) - - whenever(outboxRepository.claimBatch(any(), any(), any())).thenReturn(batch) - whenever(eventBatchProcessor.processBatch(any(), any(), any(), any())) - .thenReturn(failedResult) - whenever(retryPolicyHandler.calculateBackoff(1)).thenReturn(5000L) - - // When - relay.run() - - // Then - verify(outboxRepository).claimBatch( - limit = eq(100), - now = any(), - stuckThresholdSeconds = eq(120L) - ) - verify(eventBatchProcessor).processBatch(any(), any(), any(), any()) - verify(retryPolicyHandler).calculateBackoff(eq(1)) - - // 재시도 로직으로 최대 3회 시도 - verify(outboxRepository, atLeast(1)).markFailedWithBackoff( - id = eq(1L), - cause = eq("Kafka connection failed"), - backoffMillis = eq(5000L), - now = any() - ) - verify(outboxRepository, never()).markAsPublished(any(), any()) - } - - @Test - fun `부분 성공시 성공과 실패 모두 처리`() { - // Given - val batch = listOf(createMockClaimedRow(1L), createMockClaimedRow(2L)) - val mixedResult = ProcessingResult( - successIds = listOf(1L), - failedEvents = listOf( - ProcessingResult.FailedEvent( - eventId = 2L, - error = "Serialization failed", - attemptCount = 2 - ) - ) - ) - - whenever(outboxRepository.claimBatch(any(), any(), any())) - .thenReturn(batch) - - whenever(eventBatchProcessor.processBatch(any(), any(), any(), any())) - .thenReturn(mixedResult) - - whenever(retryPolicyHandler.calculateBackoff(2)) - .thenReturn(10000L) - - // When - relay.run() - - // Then - verify(outboxRepository, atLeast(1)) - .markAsPublished(eq(listOf(1L)), any()) - - verify(retryPolicyHandler).calculateBackoff(eq(2)) - - verify(outboxRepository, atLeast(1)) - .markFailedWithBackoff( - id = eq(2L), - cause = eq("Serialization failed"), - backoffMillis = eq(10000L), - now = any() - ) - } - - @Test - fun `처리 중 예외 발생시 안전하게 처리`() { - // Given - val batch = listOf(createMockClaimedRow(1L)) - whenever(outboxRepository.claimBatch(any(), any(), any())) - .thenReturn(batch) - - whenever(eventBatchProcessor.processBatch(any(), any(), any(), any())) - .thenThrow(RuntimeException("Unexpected error")) - - // When & Then - // 예외가 발생해도 relay.run()이 안전하게 처리되어야 함 - relay.run() - - // 배치 조회는 성공했어야 함 - verify(outboxRepository).claimBatch( - limit = eq(100), - now = any(), - stuckThresholdSeconds = eq(120L) - ) - verify(eventBatchProcessor) - .processBatch(any(), any(), any(), any()) - } - - @Test - fun `markAsPublished 실패 시 재시도 로직 동작 확인 - 3회 재시도 (총 3번 호출)`() { - // Given - val batch = listOf(createMockClaimedRow(1L)) - val successResult = ProcessingResult( - successIds = listOf(1L), - failedEvents = emptyList() - ) - - whenever(outboxRepository.claimBatch(any(), any(), any())) - .thenReturn(batch) - - whenever(eventBatchProcessor.processBatch(any(), any(), any(), any())) - .thenReturn(successResult) - - whenever(outboxRepository.markAsPublished(any(), any())) - .thenThrow(RuntimeException("DB connection failed")) - .thenThrow(RuntimeException("DB connection failed")) - .thenAnswer { } - - // When - relay.run() - - // Then - verify(outboxRepository, times(3)) - .markAsPublished(eq(listOf(1L)), any()) - } - - @Test - fun `연속 빈 배치 시 백오프 적용 확인 - 4번 연속 실행 (3번째까지는 즉시, 4번째부터 3초 대기)`() { - // Given - whenever(outboxRepository.claimBatch(any(), any(), any())).thenReturn(emptyList()) - - // When - relay.run() - relay.run() - relay.run() - relay.run() - - // Then - verify(outboxRepository, times(4)) - .claimBatch(any(), any(), any()) - - verifyNoInteractions(eventBatchProcessor) - } - - private fun createMockClaimedRow(eventId: Long): ClaimedRow { - return ClaimedRow( - eventId = eventId, - aggregateId = "transaction-$eventId", - payload = """{"transactionId": $eventId, "status": "COMPLETED"}""", - headers = "{}", - attemptCount = 0 - ) - } -} diff --git a/services/transfer/instances/transfer-relay/src/test/resources/application-test.yml b/services/transfer/instances/transfer-relay/src/test/resources/application-test.yml index c218c0c..ef8dd67 100644 --- a/services/transfer/instances/transfer-relay/src/test/resources/application-test.yml +++ b/services/transfer/instances/transfer-relay/src/test/resources/application-test.yml @@ -1,6 +1,7 @@ spring: application: name: transfer-relay + datasource: url: jdbc:postgresql://localhost:5432/transfer username: postgres @@ -29,24 +30,44 @@ kafka-producer-config: valueSerializer: io.confluent.kafka.serializers.KafkaAvroSerializer compressionType: lz4 acks: "1" - batchSize: 65536 # 64KB + batchSize: 65536 batchSizeBoostFactor: 1 - lingerMs: 5 # 배치 대기 시간 - requestTimeoutMs: 5000 - retryCount: 3 + lingerMs: 5 + requestTimeoutMs: 1000 + retryCount: 0 + +kafka-consumer-config: + key-deserializer: org.apache.kafka.common.serialization.StringDeserializer + value-deserializer: io.confluent.kafka.serializers.KafkaAvroDeserializer + consumer-group-id: fds-consumer-group + auto-offset-reset: earliest + specific-avro-reader-key: specific.avro.reader + specific-avro-reader: true + batch-listener: true + auto-startup: true + concurrency-level: 8 + session-timeout-ms: 10000 + heartbeat-interval-ms: 3000 + max-poll-interval-ms: 300000 + max-poll-records: 500 + max-partition-fetch-bytes-default: 1048576 + max-partition-fetch-bytes-boost-factor: 1 + poll-timeout-ms: 150 app: outbox: relay: - batchSize: 1000 # 테스트에서는 더 크게 + chunkSize: 100 fixedDelayMs: 1000 - initialDelayMs: 2000 + initialDelayMs: 5000 timeoutSeconds: 10 - threadPoolSize: 4 baseBackoffMs: 5000 - maxBackoffMs: 600000 - stuckThresholdSeconds: 120 # 2분 + sendingTimeoutSeconds: 120 slowProcessingThresholdMs: 3000 + + # 파티셔닝 설정 (테스트에서는 코드에서 주입) + instanceId: 0 + totalInstances: 1 kafka: topics: From 1ba6a6cc7aab75c12fa3dc5ee38f8f8988903203 Mon Sep 17 00:00:00 2001 From: hyungkishin Date: Sat, 1 Nov 2025 17:48:38 +0900 Subject: [PATCH 2/2] =?UTF-8?q?faet:=20single=20instance=20multiThread=20?= =?UTF-8?q?=EC=A0=84=EB=9E=B5=EC=9D=98=20relay-server=20=EB=A5=BC=20spring?= =?UTF-8?q?=20batch=20=EB=A1=9C=20=EC=9D=B4=EA=B4=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../common/outbox/transfer/ClaimedRow.kt | 1 - docker-compose.yml | 4 +- ...4\355\212\270 \354\240\225\353\246\254.md" | 3 +- .../consumer/config/KafkaConsumerConfig.kt | 61 +++++- .../main/resources/avro/transfer_event.avsc | 5 - .../service/AnalyzeTransferService.kt | 2 + .../infra/event/TransferKafkaListener.kt | 76 ++++--- .../api/src/main/resources/application.yml | 37 ++-- .../application/TransactionService.kt | 28 +-- .../handler/TransferOutboxEventHandler.kt | 2 +- .../application/mapper/OutboxEventMapper.kt | 5 +- .../port/TransferEventPublisher.kt | 3 +- .../TransferEventsOutboxRepository.kt | 36 +++- .../container/event/TransferEvent.kt | 1 - .../adapter/KafkaTransferEventPublisher.kt | 45 ++++- .../TransferEventsOutboxJdbcRepository.kt | 144 +++++++------ .../api/src/main/resources/application.yml | 24 +-- .../resources/db/migration/V1__init_table.sql | 191 ++++++++++-------- .../instances/transfer-relay/build.gradle.kts | 4 + .../transentia/relay/TransferOutboxRelay.kt | 176 ---------------- .../batch/TransferOutboxItemProcessor.kt | 47 +++++ .../relay/batch/TransferOutboxItemReader.kt | 93 +++++++++ .../relay/batch/TransferOutboxItemWriter.kt | 79 ++++++++ .../relay/batch/TransferOutboxJobLauncher.kt | 43 ++++ .../relay/batch/TransferOutboxSkipListener.kt | 102 ++++++++++ .../relay/batch/TransferOutboxStepListener.kt | 39 ++++ .../relay/component/OutboxEventProcessor.kt | 38 ---- .../relay/component/OutboxEventWriter.kt | 31 --- .../relay/component/OutboxItemReader.kt | 65 ------ .../relay/component/OutboxWorker.kt | 56 ----- .../relay/config/ExecutorServiceConfig.kt | 18 -- .../config/FaultTolerantStepConfigurer.kt | 116 +++++++++++ .../relay/config/OutboxRelayConfig.kt | 32 +-- .../relay/config/TransferOutboxBatchConfig.kt | 80 ++++++++ .../relay/exception/KafkaExceptions.kt | 31 +++ .../relay/model/ProcessingResult.kt | 18 -- .../src/main/resources/application.yml | 43 ++-- 37 files changed, 1053 insertions(+), 726 deletions(-) delete mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelay.kt create mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxItemProcessor.kt create mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxItemReader.kt create mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxItemWriter.kt create mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxJobLauncher.kt create mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxSkipListener.kt create mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxStepListener.kt delete mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxEventProcessor.kt delete mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxEventWriter.kt delete mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxItemReader.kt delete mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxWorker.kt delete mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/ExecutorServiceConfig.kt create mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/FaultTolerantStepConfigurer.kt create mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/TransferOutboxBatchConfig.kt create mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/exception/KafkaExceptions.kt delete mode 100644 services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/model/ProcessingResult.kt diff --git a/common/common-domain/src/main/kotlin/io/github/hyungkishin/transentia/common/outbox/transfer/ClaimedRow.kt b/common/common-domain/src/main/kotlin/io/github/hyungkishin/transentia/common/outbox/transfer/ClaimedRow.kt index 651a9d4..f57246d 100644 --- a/common/common-domain/src/main/kotlin/io/github/hyungkishin/transentia/common/outbox/transfer/ClaimedRow.kt +++ b/common/common-domain/src/main/kotlin/io/github/hyungkishin/transentia/common/outbox/transfer/ClaimedRow.kt @@ -2,7 +2,6 @@ package io.github.hyungkishin.transentia.common.outbox.transfer data class ClaimedRow( val eventId: Long, - val aggregateId: String, val payload: String, val headers: String, val attemptCount: Int = 0 diff --git a/docker-compose.yml b/docker-compose.yml index 0373972..d5de8bf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -82,8 +82,8 @@ services: command: > bash -c " /opt/bitnami/kafka/bin/kafka-topics.sh --bootstrap-server kafka:9092 --create --if-not-exists --topic transfer-complete-events --partitions 8 --replication-factor 1 && - /opt/bitnami/kafka/bin/kafka-topics.sh --bootstrap-server kafka:9092 --create --if-not-exists --topic transfer-transaction-events --partitions 3 --replication-factor 1 && - echo 'topics created' + /opt/bitnami/kafka/bin/kafka-topics.sh --bootstrap-server kafka:9092 --create --if-not-exists --topic transfer-transaction-events --partitions 8 --replication-factor 1 && + echo 'Topics created: transfer-complete-events (8 partitions), transfer-transaction-events (8 partitions) - for 2000 TPS target' " restart: "no" diff --git "a/docs/etc/\354\206\241\352\270\210\353\217\204\353\251\224\354\235\270 \354\235\264\353\262\244\355\212\270 \354\240\225\353\246\254.md" "b/docs/etc/\354\206\241\352\270\210\353\217\204\353\251\224\354\235\270 \354\235\264\353\262\244\355\212\270 \354\240\225\353\246\254.md" index cdaca79..a102d1f 100644 --- "a/docs/etc/\354\206\241\352\270\210\353\217\204\353\251\224\354\235\270 \354\235\264\353\262\244\355\212\270 \354\240\225\353\246\254.md" +++ "b/docs/etc/\354\206\241\352\270\210\353\217\204\353\251\224\354\235\270 \354\235\264\353\262\244\355\212\270 \354\240\225\353\246\254.md" @@ -51,9 +51,8 @@ FDS(이상거래탐지) 쪽에서 반드시 필요한 이벤트 이다. - 이후 발행 여부는 published_at으로만 구분 ## Outbox에 저장되는 필드 -- `event_id` : Snowflake 기반 ID +- `event_id` : Snowflake 기반 ID ( Transaction ID ) - `aggregate_type` : "Transfer" (어떤 Aggregate의 이벤트인지) -- `aggregate_id` : Transaction ID - `event_type` : "TransferRequested", "TransferCompleted", "TransferFailed" - `payload` : 위 JSON 직렬화 결과 - `headers` : traceId, correlationId 등 diff --git a/infrastructure/kafka/kafka-consumer/src/main/kotlin/io/github/hyungkishin/transentia/infrastructure/kafka/consumer/config/KafkaConsumerConfig.kt b/infrastructure/kafka/kafka-consumer/src/main/kotlin/io/github/hyungkishin/transentia/infrastructure/kafka/consumer/config/KafkaConsumerConfig.kt index f724a9e..a336f68 100644 --- a/infrastructure/kafka/kafka-consumer/src/main/kotlin/io/github/hyungkishin/transentia/infrastructure/kafka/consumer/config/KafkaConsumerConfig.kt +++ b/infrastructure/kafka/kafka-consumer/src/main/kotlin/io/github/hyungkishin/transentia/infrastructure/kafka/consumer/config/KafkaConsumerConfig.kt @@ -11,6 +11,7 @@ import org.springframework.kafka.config.KafkaListenerContainerFactory import org.springframework.kafka.core.ConsumerFactory import org.springframework.kafka.core.DefaultKafkaConsumerFactory import org.springframework.kafka.listener.ConcurrentMessageListenerContainer +import org.springframework.kafka.listener.ContainerProperties import java.io.Serializable @Configuration @@ -22,21 +23,50 @@ class KafkaConsumerConfig( @Bean fun consumerConfigs(): Map { return mutableMapOf().apply { + // 기본 설정 put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, kafkaConfigData.bootstrapServers) put(ConsumerConfig.GROUP_ID_CONFIG, kafkaConsumerConfigData.consumerGroupId) put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, kafkaConsumerConfigData.keyDeserializer) put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, kafkaConsumerConfigData.valueDeserializer) put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, kafkaConsumerConfigData.autoOffsetReset) + + // Avro 설정 put(kafkaConfigData.schemaRegistryUrlKey, kafkaConfigData.schemaRegistryUrl) put(kafkaConsumerConfigData.specificAvroReaderKey, kafkaConsumerConfigData.specificAvroReader) + + // Consumer Group 관리 put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, kafkaConsumerConfigData.sessionTimeoutMs) put(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG, kafkaConsumerConfigData.heartbeatIntervalMs) put(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, kafkaConsumerConfigData.maxPollIntervalMs) - put( - ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG, - kafkaConsumerConfigData.maxPartitionFetchBytesDefault * kafkaConsumerConfigData.maxPartitionFetchBytesBoostFactor + + // Fetch 설정 + put(ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG, + kafkaConsumerConfigData.maxPartitionFetchBytesDefault * + kafkaConsumerConfigData.maxPartitionFetchBytesBoostFactor ) put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, kafkaConsumerConfigData.maxPollRecords) + + // Fetch 최소 바이트: 1KB + // - 브로커가 최소 이 크기만큼 데이터가 쌓일 때까지 대기 + // - 너무 작으면 네트워크 오버헤드, 너무 크면 지연 발생 + put(ConsumerConfig.FETCH_MIN_BYTES_CONFIG, 1024) + + // Fetch 최대 대기 시간: 500ms + // - fetch.min.bytes에 도달하지 않아도 이 시간 후 응답 + // - 실시간성과 처리량의 균형 + put(ConsumerConfig.FETCH_MAX_WAIT_MS_CONFIG, 500) + + // 자동 커밋 비활성화 (수동 제어) + // - Spring Kafka의 AckMode로 제어 + put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false) + + // Isolation Level: read_committed + // - 트랜잭션 커밋된 메시지만 읽음 + // - 데이터 정합성 보장 + put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed") + + // Client ID (모니터링용) + put(ConsumerConfig.CLIENT_ID_CONFIG, "fds-consumer-\${spring.application.name}") } } @@ -45,14 +75,35 @@ class KafkaConsumerConfig( return DefaultKafkaConsumerFactory(consumerConfigs()) } + /** + * 단일 이벤트 처리용 Kafka Listener Container Factory + * + * - Batch Listener: false (단일 이벤트) + * - Concurrency: 8 (파티션당 1 스레드) + * - AckMode: MANUAL_IMMEDIATE (수동 커밋, 즉시) + */ @Bean fun kafkaListenerContainerFactory(): KafkaListenerContainerFactory> { val factory = ConcurrentKafkaListenerContainerFactory() + factory.consumerFactory = consumerFactory() + + // 단일 이벤트 처리 factory.isBatchListener = kafkaConsumerConfigData.batchListener + + // Concurrency 설정 (파티션 수와 동일하게) factory.setConcurrency(kafkaConsumerConfigData.concurrencyLevel) + + // 자동 시작 factory.setAutoStartup(kafkaConsumerConfigData.autoStartup) - factory.containerProperties.pollTimeout = kafkaConsumerConfigData.pollTimeoutMs + + // Container Properties 설정 + factory.containerProperties.apply { + pollTimeout = kafkaConsumerConfigData.pollTimeoutMs + ackMode = ContainerProperties.AckMode.MANUAL_IMMEDIATE + } + return factory } -} \ No newline at end of file + +} diff --git a/infrastructure/kafka/kafka-model/src/main/resources/avro/transfer_event.avsc b/infrastructure/kafka/kafka-model/src/main/resources/avro/transfer_event.avsc index 9c16770..2be80bc 100644 --- a/infrastructure/kafka/kafka-model/src/main/resources/avro/transfer_event.avsc +++ b/infrastructure/kafka/kafka-model/src/main/resources/avro/transfer_event.avsc @@ -18,11 +18,6 @@ }, "doc": "Type of transfer event" }, - { - "name": "aggregateId", - "type": "string", - "doc": "Transaction aggregate ID as string" - }, { "name": "transactionId", "type": "long", diff --git a/services/fds/application/src/main/kotlin/io/github/hyungkishin/transentia/application/service/AnalyzeTransferService.kt b/services/fds/application/src/main/kotlin/io/github/hyungkishin/transentia/application/service/AnalyzeTransferService.kt index 1af70a2..e36bc47 100644 --- a/services/fds/application/src/main/kotlin/io/github/hyungkishin/transentia/application/service/AnalyzeTransferService.kt +++ b/services/fds/application/src/main/kotlin/io/github/hyungkishin/transentia/application/service/AnalyzeTransferService.kt @@ -27,6 +27,8 @@ class AnalyzeTransferService( // TODO: 엣지케이스 -> 알림 + log 성 + 학습 + 관리자 ! // 과연 은행사마다 만들었을까 ? 이상감지를 탐지해주는 패턴이 있을것이다. + // NOTE : Hive 류의 빅데이터 플랫폼 <- 데이터의 근거 + // 10년치 계좌의 모든 계좌 이력의 전체 -> 불특정 다수 -> 관계도를 -> queryBase 로 찾을 경우 ( 성능 up 비용이 높을때다. ) // LAG + LLM // 모든 활성화된 룰 조회 diff --git a/services/fds/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/event/TransferKafkaListener.kt b/services/fds/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/event/TransferKafkaListener.kt index 99565ae..571f0af 100644 --- a/services/fds/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/event/TransferKafkaListener.kt +++ b/services/fds/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/event/TransferKafkaListener.kt @@ -2,68 +2,66 @@ package io.github.hyungkishin.transentia.infra.event import io.github.hyungkishin.transentia.application.service.AnalyzeTransferService import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventAvroModel +import org.apache.kafka.clients.consumer.ConsumerRecord import org.slf4j.LoggerFactory -import org.springframework.beans.factory.annotation.Value import org.springframework.kafka.annotation.KafkaListener -import org.springframework.messaging.handler.annotation.Headers +import org.springframework.kafka.support.Acknowledgment +import org.springframework.kafka.support.KafkaHeaders +import org.springframework.messaging.handler.annotation.Header import org.springframework.messaging.handler.annotation.Payload import org.springframework.stereotype.Component @Component class TransferKafkaListener( - @Value("\${app.transfer.topic}") private val transferTopic: String, private val analyzeTransferService: AnalyzeTransferService, private val transferEventMapper: TransferEventMapper, ) { private val log = LoggerFactory.getLogger(javaClass) - /** - * TODO: 이벤트를 처리하는 쪽의 성능 - * TODO: 메세지 중복처리 할때의 문제 ( 현재 너무 risk ) -> 방어책 - * TODO: offset update 여부 ( Big data tools 로 확인 ) - * - * - 보내는 쪽과 받는쪽의 쓰루풋을 어떻게 조율 할 것인지 - * - producer 몇대 , consumer 몇대 , 파티션 몇개 - */ @KafkaListener( id = "\${kafka-consumer-config.consumer-group-id}", topics = ["\${app.transfer.topic}"], + containerFactory = "kafkaListenerContainerFactory" ) fun receive( - @Payload messages: List, - @Headers headers: Map + @Payload message: TransferEventAvroModel, + @Header(KafkaHeaders.RECEIVED_PARTITION) partition: Int, + @Header(KafkaHeaders.OFFSET) offset: Long, + @Header(value = "eventType", required = false) eventType: String?, + @Header(value = "X-Trace-Id", required = false) traceId: String?, + consumerRecord: ConsumerRecord, + acknowledgment: Acknowledgment? ) { - val eventType = headers["eventType"]?.toString() - val traceId = headers["X-Trace-Id"]?.toString() + try { + log.debug( + "[FDS-Consumer] Received - partition={} offset={} eventId={} traceId={}", + partition, offset, message.eventId, traceId + ) - log.info("@@@@@[FDS-Consumer] RECEIVED {} messages, traceId={}", messages.size, traceId) + // Domain Event 변환 + val domainEvent = transferEventMapper.toDomain(message) - // TODO : offset 동작 확인 - messages.forEach { avroMessage -> - try { - log.info( - "@@@@@[FDS-Consumer] Processing eventId={} amount={} status={}", - avroMessage.eventId, avroMessage.amount, avroMessage.status - ) + // FDS 분석 실행 + val riskLog = analyzeTransferService.analyze(domainEvent) - val domainEvent = transferEventMapper.toDomain(avroMessage) + log.info( + "[FDS-Consumer] Analysis complete - eventId={} decision={} hits={}", + domainEvent.eventId, + riskLog.decision, + riskLog.ruleHits.size, + ) - val riskLog = analyzeTransferService.analyze(domainEvent) + // 수동 커밋 (MANUAL_IMMEDIATE 모드인 경우) + acknowledgment?.acknowledge() - log.info( - "[FDS-Consumer] Analysis complete - eventId={} decision={} hits={}", - domainEvent.eventId, riskLog.decision, riskLog.ruleHits.size - ) - // TODO: Thread.sleep 을 걸었을때의 문제 발생 -> 여러 인스턴스 에서 책정하는것이 명확. - // TODO: Docker -> 인스턴스 3 대 -> log 확인 - - } catch (e: Exception) { - // TODO: 예외 발생시, 카프카 장애 대응 확인 - // TODO: 카프카 쪽의 영향도 확인 - log.error("[FDS-Consumer] Analysis failed - eventId={}", avroMessage.eventId, e) - // 재처리를 위해 예외 전파 - throw e - } + } catch (e: Exception) { + log.error( + "[FDS-Consumer] Analysis failed - partition={} offset={} eventId={} error={}", + partition, offset, message.eventId, e.message, e + ) + // 예외 발생시 재처리를 위해 전파 + throw e } } + } diff --git a/services/fds/instances/api/src/main/resources/application.yml b/services/fds/instances/api/src/main/resources/application.yml index 3b0dab5..28a1355 100644 --- a/services/fds/instances/api/src/main/resources/application.yml +++ b/services/fds/instances/api/src/main/resources/application.yml @@ -9,12 +9,25 @@ spring: username: postgres password: pass1234 driver-class-name: org.postgresql.Driver + # HikariCP 최적화 + hikari: + maximum-pool-size: 20 + minimum-idle: 10 + connection-timeout: 30000 + idle-timeout: 600000 + max-lifetime: 1800000 jpa: open-in-view: false hibernate: ddl-auto: none - show-sql: true + show-sql: false + properties: + hibernate: + jdbc: + batch_size: 20 + order_inserts: true + order_updates: true flyway: enabled: false @@ -23,18 +36,17 @@ spring: logging: level: - org.hibernate.SQL: DEBUG - org.hibernate.type.descriptor.sql.BasicBinder: TRACE + org.hibernate.SQL: INFO + org.hibernate.type.descriptor.sql.BasicBinder: INFO org.springframework.kafka: INFO - io.github.hyungkishin.transentia: DEBUG + io.github.hyungkishin.transentia: INFO kafka-config: bootstrap-servers: host.docker.internal:9094 schema-registry-url-key: schema.registry.url schema-registry-url: http://localhost:8085 - num-of-partitions: 8 + num-of-partitions: 3 replication-factor: 1 - kafka-consumer-config: key-deserializer: org.apache.kafka.common.serialization.StringDeserializer value-deserializer: io.confluent.kafka.serializers.KafkaAvroDeserializer @@ -42,16 +54,17 @@ kafka-consumer-config: auto-offset-reset: earliest specific-avro-reader-key: specific.avro.reader specific-avro-reader: true - batch-listener: true + + batch-listener: false auto-startup: true - concurrency-level: 2 # 2000 TPS 는 4 + concurrency-level: 3 + max-poll-records: 100 + max-partition-fetch-bytes-default: 1048576 + max-partition-fetch-bytes-boost-factor: 1 + poll-timeout-ms: 1000 session-timeout-ms: 10000 heartbeat-interval-ms: 3000 max-poll-interval-ms: 300000 - max-poll-records: 500 - max-partition-fetch-bytes-default: 1048576 - max-partition-fetch-bytes-boost-factor: 1 - poll-timeout-ms: 500 app: transfer: diff --git a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/TransactionService.kt b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/TransactionService.kt index a0fc6cf..73dd17e 100644 --- a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/TransactionService.kt +++ b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/TransactionService.kt @@ -1,10 +1,8 @@ package io.github.hyungkishin.transentia.application -import io.github.hyungkishin.transentia.application.mapper.OutboxEventMapper import io.github.hyungkishin.transentia.application.provided.TransactionRegister import io.github.hyungkishin.transentia.application.provided.command.TransferRequestCommand import io.github.hyungkishin.transentia.application.required.TransactionRepository -import io.github.hyungkishin.transentia.application.required.TransferEventsOutboxRepository import io.github.hyungkishin.transentia.application.required.UserRepository import io.github.hyungkishin.transentia.application.required.command.TransferResponseCommand import io.github.hyungkishin.transentia.common.error.CommonError @@ -14,7 +12,6 @@ import io.github.hyungkishin.transentia.common.snowflake.IdGenerator import io.github.hyungkishin.transentia.common.snowflake.SnowFlakeId import io.github.hyungkishin.transentia.container.model.transaction.Transaction import io.github.hyungkishin.transentia.container.validator.transfer.TransferValidator -import org.slf4j.LoggerFactory import org.springframework.context.ApplicationEventPublisher import org.springframework.stereotype.Service import org.springframework.transaction.annotation.Transactional @@ -24,19 +21,16 @@ import java.time.Instant class TransactionService( private val transactionRepository: TransactionRepository, private val userRepository: UserRepository, - private val outboxRepository: TransferEventsOutboxRepository, - private val outboxEventMapper: OutboxEventMapper, private val idGenerator: IdGenerator, private val eventPublisher: ApplicationEventPublisher, ) : TransactionRegister { - private val log = LoggerFactory.getLogger(javaClass) - @Transactional override fun createTransfer(command: TransferRequestCommand): TransferResponseCommand { val (sender, receiver) = loadUsers(command) val amount = command.amount() + // LocalRule ( 송금자/수신자 블랙리스트, 일일 송금액) 적용 TransferValidator.validate(sender, receiver, amount) val transaction = Transaction.of( @@ -55,28 +49,13 @@ class TransactionService( val completeEvent = transaction.complete() - // outbox 먼저 저장 - saveToOutbox(completeEvent, savedTransaction.id.value) - - // 이벤트 발행 (커밋 후 별도 스레드에서 Kafka 전송) + // 이벤트 발행 (커밋 후 별도 스레드에서 Kafka 전송 시도) - @see TransferOutboxEventHandler + // Kafka 전송 실패 시 Outbox 저장 - @see KafkaTransferEventPublisher eventPublisher.publishEvent(completeEvent) return TransferResponseCommand.from(savedTransaction) } - private fun saveToOutbox(event: TransferCompleted, transactionId: Long) { - try { - val outboxEvent = outboxEventMapper.toOutboxEvent(event, transactionId) - outboxRepository.save(outboxEvent, Instant.now()) - } catch (e: Exception) { - throw DomainException( - CommonError.Conflict("outbox_save_failed"), - "송금 처리 중 시스템 오류가 발생했습니다.", - e - ) - } - } - private fun loadUsers(command: TransferRequestCommand) = Pair( userRepository.findById(command.senderId) @@ -100,4 +79,5 @@ class TransactionService( ) return TransferResponseCommand.from(tx) } + } diff --git a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/handler/TransferOutboxEventHandler.kt b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/handler/TransferOutboxEventHandler.kt index 4caa316..0a5ae15 100644 --- a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/handler/TransferOutboxEventHandler.kt +++ b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/handler/TransferOutboxEventHandler.kt @@ -17,7 +17,7 @@ class TransferOutboxEventHandler( @Async("outboxEventExecutor") @TransactionalEventListener(phase = TransactionPhase.AFTER_COMMIT) fun handle(event: TransferCompleted) { - log.debug("비동기 Kafka 전송 시도: transactionId={}, eventId={}", event.transactionId, event.eventId) + log.debug("비동기 Kafka 전송 시도: transactionId={}", event.transactionId) eventPublisher.publish(event) } diff --git a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/mapper/OutboxEventMapper.kt b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/mapper/OutboxEventMapper.kt index 01fefb0..4f2384b 100644 --- a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/mapper/OutboxEventMapper.kt +++ b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/mapper/OutboxEventMapper.kt @@ -11,11 +11,10 @@ import java.util.* class OutboxEventMapper( private val objectMapper: ObjectMapper ) { - fun toOutboxEvent(event: TransferCompleted, transactionId: Long): TransferEvent { + fun toOutboxEvent(event: TransferCompleted): TransferEvent { return TransferEvent( - eventId = event.eventId, + eventId = event.transactionId, aggregateType = "Transaction", - aggregateId = transactionId.toString(), eventType = "TRANSFER_COMPLETED", payload = objectMapper.writeValueAsString( mapOf( diff --git a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/port/TransferEventPublisher.kt b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/port/TransferEventPublisher.kt index 73c75d7..e6d345d 100644 --- a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/port/TransferEventPublisher.kt +++ b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/port/TransferEventPublisher.kt @@ -8,8 +8,9 @@ import io.github.hyungkishin.transentia.common.message.transfer.TransferComplete interface TransferEventPublisher { /** * 송금 완료 이벤트 발행 - * + * * 호출자가 이미 비동기 스레드에서 실행 중 */ fun publish(event: TransferCompleted) + } diff --git a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/required/TransferEventsOutboxRepository.kt b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/required/TransferEventsOutboxRepository.kt index 72d730c..9c62fa7 100644 --- a/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/required/TransferEventsOutboxRepository.kt +++ b/services/transfer/application/src/main/kotlin/io/github/hyungkishin/transentia/application/required/TransferEventsOutboxRepository.kt @@ -9,10 +9,15 @@ interface TransferEventsOutboxRepository { fun save(row: TransferEvent, now: Instant) /** - * 처리 대기 중인 이벤트를 조회하고 SENDING 상태로 변경 - * - * SKIP LOCKED로 동시성 제어 - * 우선순위: PENDING > SENDING(Stuck) > FAILED + * 처리 대기 중인 이벤트 조회 및 claim + * + * - PENDING -> SENDING (attempt + 1, watchdog 설정) + * - SENDING(stuck) -> SENDING (attempt 유지, watchdog 재설정) + * - watchdog: next_retry_at = now + sendingTimeoutSeconds + * + * @param limit 조회 건수 + * @param now 현재 시각 + * @param sendingTimeoutSeconds SENDING 타임아웃 (초) */ fun claimBatch( limit: Int, @@ -20,7 +25,28 @@ interface TransferEventsOutboxRepository { sendingTimeoutSeconds: Long = 120 ): List + /** + * Kafka 발행 성공 + */ fun markAsPublished(ids: List, now: Instant) - fun markFailedWithBackoff(id: Long, cause: String?, backoffMillis: Long, now: Instant) + /** + * 재시도 예약 + * + * SENDING -> PENDING + */ + fun markForRetry( + eventId: Long, + attemptCount: Int, + nextRetryAt: Instant, + error: String?, + now: Instant + ) + + /** + * DEAD_LETTER 전환 + * + * maxAttempts 초과 시 + */ + fun markAsDeadLetter(eventId: Long, error: String?, now: Instant) } diff --git a/services/transfer/domain/src/main/kotlin/io/github/hyungkishin/transentia/container/event/TransferEvent.kt b/services/transfer/domain/src/main/kotlin/io/github/hyungkishin/transentia/container/event/TransferEvent.kt index e8525b5..9c2b4a7 100644 --- a/services/transfer/domain/src/main/kotlin/io/github/hyungkishin/transentia/container/event/TransferEvent.kt +++ b/services/transfer/domain/src/main/kotlin/io/github/hyungkishin/transentia/container/event/TransferEvent.kt @@ -5,7 +5,6 @@ import io.github.hyungkishin.transentia.common.event.DomainEvent data class TransferEvent( val eventId: Long, val aggregateType: String, - val aggregateId: String, val eventType: String, val payload: String, // JSON val headers: String // JSON (traceId 등) diff --git a/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/adapter/KafkaTransferEventPublisher.kt b/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/adapter/KafkaTransferEventPublisher.kt index f32d940..65afd2b 100644 --- a/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/adapter/KafkaTransferEventPublisher.kt +++ b/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/adapter/KafkaTransferEventPublisher.kt @@ -28,9 +28,8 @@ class KafkaTransferEventPublisher( override fun publish(event: TransferCompleted) { try { val avroModel = TransferEventAvroModel.newBuilder() - .setEventId(event.eventId) + .setEventId(event.transactionId) .setEventType(TransferEventType.TRANSFER_COMPLETED) - .setAggregateId(event.transactionId.toString()) .setTransactionId(event.transactionId) .setSenderId(event.senderUserId) .setReceiverId(event.receiverUserId) @@ -52,12 +51,44 @@ class KafkaTransferEventPublisher( .build() kafkaProducer.sendSync(topicName, avroModel) - - outboxRepository.markAsPublished(listOf(event.eventId), Instant.now()) - log.debug("Kafka 전송 및 outbox PUBLISHED 완료: eventId={}", event.eventId) - + log.info("Kafka 전송 성공: eventId={}", event.transactionId) + } catch (e: Exception) { - log.warn("Kafka 전송 실패 (relay 재시도): eventId={}, error={}", event.eventId, e.message) + log.warn("Kafka 전송 실패, Outbox 저장: eventId={}, error={}", event.transactionId, e.message) + saveToOutbox(event) + } + } + + private fun saveToOutbox(event: TransferCompleted) { + try { + val outboxEvent = io.github.hyungkishin.transentia.container.event.TransferEvent( + eventId = event.transactionId, + aggregateType = "Transaction", + eventType = "TRANSFER_COMPLETED", + payload = objectMapper.writeValueAsString( + mapOf( + "transactionId" to event.transactionId, + "senderId" to event.senderUserId, + "receiverId" to event.receiverUserId, + "amount" to event.amount, + "status" to "COMPLETED", + "occurredAt" to event.occurredAt.toEpochMilli() + ) + ), + headers = objectMapper.writeValueAsString( + mapOf( + "eventType" to "TRANSFER_COMPLETED", + "eventVersion" to "v1", + "traceId" to (MDC.get("traceId") ?: UUID.randomUUID().toString()), + "producer" to "transfer-api-fallback", + "contentType" to "application/json" + ) + ) + ) + outboxRepository.save(outboxEvent, Instant.now()) + log.info("Outbox 저장 성공: eventId={}", event.transactionId) + } catch (outboxEx: Exception) { + log.error("Outbox 저장 실패: eventId={}, error={}", event.transactionId, outboxEx.message, outboxEx) } } } diff --git a/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/rdb/adapter/TransferEventsOutboxJdbcRepository.kt b/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/rdb/adapter/TransferEventsOutboxJdbcRepository.kt index 2311d9c..4124b18 100644 --- a/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/rdb/adapter/TransferEventsOutboxJdbcRepository.kt +++ b/services/transfer/infra/src/main/kotlin/io/github/hyungkishin/transentia/infra/rdb/adapter/TransferEventsOutboxJdbcRepository.kt @@ -19,19 +19,24 @@ class TransferEventsOutboxJdbcRepository( val sql = """ INSERT INTO transfer_events( - event_id, event_version, aggregate_type, aggregate_id, event_type, - payload, headers, status, attempt_count, created_at, updated_at, next_retry_at - ) VALUES (:eventId, 1, :aggType, :aggId, :eventType, - CAST(:payload AS JSONB), CAST(:headers AS JSONB), - 'PENDING', 0, :now, :now, :now) - ON CONFLICT (event_id) DO NOTHING + event_id, event_version, aggregate_type, event_type, + payload, headers, status, attempt_count, + created_at, updated_at, next_retry_at + ) VALUES ( + :eventId, 1, :aggType, :eventType, + CAST(:payload AS JSONB), CAST(:headers AS JSONB), + 'PENDING', 0, + :now, :now, :now + ) + ON CONFLICT (event_id) DO UPDATE + SET status = EXCLUDED.status, + updated_at = EXCLUDED.updated_at """.trimIndent() jdbc.update( sql, mapOf( "eventId" to row.eventId, "aggType" to row.aggregateType, - "aggId" to row.aggregateId, "eventType" to row.eventType, "payload" to row.payload, "headers" to row.headers, @@ -41,65 +46,49 @@ class TransferEventsOutboxJdbcRepository( } /** - * 처리 대기 중인 이벤트를 조회하고 SENDING 상태로 변경 - * - * SKIP LOCKED로 동시성 제어 - * 우선순위: PENDING > SENDING(Stuck) > FAILED + * Claim + Read (원자적 처리) + * + * 개선사항: + * 1. FOR UPDATE SKIP LOCKED로 경합 방지 + * 2. 재시도 대상 포함 (next_retry_at 지난 것) + * 3. 최대 재시도 횟수 체크 (attempt_count < 5) */ override fun claimBatch( limit: Int, now: Instant, sendingTimeoutSeconds: Long ): List { - val stuckThreshold = Timestamp.from(now.minusSeconds(sendingTimeoutSeconds)) val currentTime = Timestamp.from(now) val sql = """ - WITH grabbed AS ( - SELECT event_id + SELECT + event_id, + payload::text AS payload, + headers::text AS headers, + attempt_count FROM transfer_events - WHERE ( - status IN ('PENDING', 'FAILED') - OR (status = 'SENDING' AND updated_at < :stuckThreshold) - ) - AND next_retry_at <= :now - AND attempt_count < 5 - ORDER BY - CASE - WHEN status = 'PENDING' THEN 0 - WHEN status = 'SENDING' THEN 1 - ELSE 2 - END, - created_at + WHERE status = 'PENDING' + AND (next_retry_at IS NULL OR next_retry_at <= :now) + AND attempt_count < :maxAttempts + ORDER BY next_retry_at NULLS FIRST, created_at FOR UPDATE SKIP LOCKED LIMIT :limit - ) - UPDATE transfer_events t - SET status = 'SENDING', - attempt_count = CASE - WHEN t.status = 'SENDING' THEN t.attempt_count - ELSE t.attempt_count + 1 - END, - updated_at = :now - FROM grabbed g - WHERE t.event_id = g.event_id - RETURNING t.event_id, t.aggregate_id, t.payload::text AS payload, - t.headers::text AS headers, t.attempt_count """.trimIndent() return jdbc.query( - sql, - mapOf( + sql, mapOf( "limit" to limit, "now" to currentTime, - "stuckThreshold" to stuckThreshold - ), - claimedRowMapper + "maxAttempts" to 5 + ), claimedRowMapper ) } /** - * Kafka 발행 성공한 이벤트를 PUBLISHED로 변경 + * 발행 완료 처리 + * + * 개선사항: + * - status 체크 제거 (PENDING에서 바로 PUBLISHED로) */ override fun markAsPublished( ids: List, @@ -125,33 +114,57 @@ class TransferEventsOutboxJdbcRepository( ) } - override fun markFailedWithBackoff( - id: Long, - cause: String?, - backoffMillis: Long, + /** + * 재시도 예약 + * + * - status는 PENDING 유지 + * - attempt_count 증가 + * - next_retry_at 설정 + */ + override fun markForRetry( + eventId: Long, + attemptCount: Int, + nextRetryAt: Instant, + error: String?, now: Instant ) { - val currentTime = Timestamp.from(now) - val nextRetry = Timestamp.from(now.plusMillis(backoffMillis)) + val sql = """ + UPDATE transfer_events + SET attempt_count = :attemptCount, + next_retry_at = :nextRetryAt, + error_message = :error, + updated_at = :now + WHERE event_id = :eventId + """.trimIndent() + + jdbc.update( + sql, mapOf( + "eventId" to eventId, + "attemptCount" to attemptCount, + "nextRetryAt" to Timestamp.from(nextRetryAt), + "error" to error, + "now" to Timestamp.from(now) + ) + ) + } + /** + * DLQ 이동 + */ + override fun markAsDeadLetter(eventId: Long, error: String?, now: Instant) { val sql = """ - UPDATE transfer_events - SET status = CASE - WHEN attempt_count >= 5 THEN 'DEAD_LETTER'::transfer_outbox_status - ELSE 'FAILED'::transfer_outbox_status - END, - last_error = :errorMessage, - updated_at = :now, - next_retry_at = :nextRetry - WHERE event_id = :eventId - """.trimIndent() + UPDATE transfer_events + SET status = 'DEAD_LETTER', + error_message = :error, + updated_at = :now + WHERE event_id = :eventId + """.trimIndent() jdbc.update( sql, mapOf( - "eventId" to id, - "errorMessage" to (cause ?: "UNKNOWN"), - "now" to currentTime, - "nextRetry" to nextRetry + "eventId" to eventId, + "error" to error, + "now" to Timestamp.from(now) ) ) } @@ -159,7 +172,6 @@ class TransferEventsOutboxJdbcRepository( private val claimedRowMapper = RowMapper { rs, _ -> ClaimedRow( eventId = rs.getLong("event_id"), - aggregateId = rs.getString("aggregate_id"), payload = rs.getString("payload"), headers = rs.getString("headers"), attemptCount = rs.getInt("attempt_count") diff --git a/services/transfer/instances/api/src/main/resources/application.yml b/services/transfer/instances/api/src/main/resources/application.yml index 7bb8c26..dc1ad6e 100644 --- a/services/transfer/instances/api/src/main/resources/application.yml +++ b/services/transfer/instances/api/src/main/resources/application.yml @@ -44,29 +44,11 @@ kafka-producer-config: valueSerializer: io.confluent.kafka.serializers.KafkaAvroSerializer compressionType: lz4 acks: "1" - batchSize: 65536 # 64KB + batchSize: 65536 batchSizeBoostFactor: 1 - lingerMs: 5 # 배치 대기 시간 + lingerMs: 5 requestTimeoutMs: 1000 - retryCount: 0 # 재시도 없음 (빠른 실패) - -kafka-consumer-config: - key-deserializer: org.apache.kafka.common.serialization.StringDeserializer - value-deserializer: io.confluent.kafka.serializers.KafkaAvroDeserializer - consumer-group-id: fds-consumer-group - auto-offset-reset: earliest - specific-avro-reader-key: specific.avro.reader - specific-avro-reader: true - batch-listener: true - auto-startup: true - concurrency-level: 8 - session-timeout-ms: 10000 - heartbeat-interval-ms: 3000 - max-poll-interval-ms: 300000 - max-poll-records: 500 - max-partition-fetch-bytes-default: 1048576 - max-partition-fetch-bytes-boost-factor: 1 - poll-timeout-ms: 150 + retryCount: 0 app: kafka: diff --git a/services/transfer/instances/api/src/main/resources/db/migration/V1__init_table.sql b/services/transfer/instances/api/src/main/resources/db/migration/V1__init_table.sql index 1824d3a..0148317 100644 --- a/services/transfer/instances/api/src/main/resources/db/migration/V1__init_table.sql +++ b/services/transfer/instances/api/src/main/resources/db/migration/V1__init_table.sql @@ -1,41 +1,44 @@ -- ENUM 타입 정의 -DO $$ -BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'user_status') THEN - CREATE TYPE user_status AS ENUM ('ACTIVE','SUSPENDED','DEACTIVATED'); - END IF; - IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'user_role') THEN - CREATE TYPE user_role AS ENUM ('USER','ADMIN','AUDITOR'); - END IF; - IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'account_type') THEN - CREATE TYPE account_type AS ENUM ('CHECKING', 'SAVINGS', 'DEPOSIT'); - END IF; - IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'account_status') THEN - CREATE TYPE account_status AS ENUM ('ACTIVE', 'SUSPENDED', 'CLOSED'); - END IF; - IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'transaction_status') THEN - CREATE TYPE transaction_status AS ENUM ('PENDING','COMPLETED','FAILED','CORRECTED'); - END IF; - IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'transaction_history_status') THEN - CREATE TYPE transaction_history_status AS ENUM ('SUCCESS','FAIL'); - END IF; - IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'transfer_outbox_status') THEN - CREATE TYPE transfer_outbox_status AS ENUM ('PENDING','PUBLISHED','DEAD_LETTER'); - END IF; -END $$; +DO +$$ + BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'user_status') THEN + CREATE TYPE user_status AS ENUM ('ACTIVE','SUSPENDED','DEACTIVATED'); + END IF; + IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'user_role') THEN + CREATE TYPE user_role AS ENUM ('USER','ADMIN','AUDITOR'); + END IF; + IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'account_type') THEN + CREATE TYPE account_type AS ENUM ('CHECKING', 'SAVINGS', 'DEPOSIT'); + END IF; + IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'account_status') THEN + CREATE TYPE account_status AS ENUM ('ACTIVE', 'SUSPENDED', 'CLOSED'); + END IF; + IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'transaction_status') THEN + CREATE TYPE transaction_status AS ENUM ('PENDING','COMPLETED','FAILED','CORRECTED'); + END IF; + IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'transaction_history_status') THEN + CREATE TYPE transaction_history_status AS ENUM ('SUCCESS','FAIL'); + END IF; + IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'transfer_outbox_status') THEN + CREATE TYPE transfer_outbox_status AS ENUM ('PENDING', 'SENDING', 'PUBLISHED', 'DEAD_LETTER'); + END IF; + END +$$; -- users 테이블 -CREATE TABLE IF NOT EXISTS users ( +CREATE TABLE IF NOT EXISTS users +( id BIGINT PRIMARY KEY, -- SnowFlakeId - name VARCHAR(100) NOT NULL, + name VARCHAR(100) NOT NULL, email VARCHAR(255) UNIQUE NOT NULL, - status user_status NOT NULL DEFAULT 'ACTIVE', - is_transfer_locked BOOLEAN NOT NULL DEFAULT false, + status user_status NOT NULL DEFAULT 'ACTIVE', + is_transfer_locked BOOLEAN NOT NULL DEFAULT false, transfer_lock_reason VARCHAR(500), - daily_transfer_limit BIGINT NOT NULL DEFAULT 5000000, - role user_role NOT NULL DEFAULT 'USER', - created_at TIMESTAMPTZ NOT NULL DEFAULT now(), - updated_at TIMESTAMPTZ NOT NULL DEFAULT now() + daily_transfer_limit BIGINT NOT NULL DEFAULT 5000000, + role user_role NOT NULL DEFAULT 'USER', + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS idx_users_status_locked ON users (status, is_transfer_locked); @@ -52,23 +55,24 @@ COMMENT ON COLUMN users.created_at IS '계정 생성 일시'; COMMENT ON COLUMN users.updated_at IS '마지막 정보 갱신 일시'; -- account_balances 테이블 -CREATE TABLE IF NOT EXISTS account_balances ( +CREATE TABLE IF NOT EXISTS account_balances +( id BIGINT PRIMARY KEY, -- SnowFlakeId - user_id BIGINT NOT NULL REFERENCES users(id), - account_number VARCHAR(20) NOT NULL, - balance BIGINT NOT NULL DEFAULT 0, - account_type account_type NOT NULL DEFAULT 'CHECKING', + user_id BIGINT NOT NULL REFERENCES users (id), + account_number VARCHAR(20) NOT NULL, + balance BIGINT NOT NULL DEFAULT 0, + account_type account_type NOT NULL DEFAULT 'CHECKING', status account_status NOT NULL DEFAULT 'ACTIVE', - version BIGINT NOT NULL DEFAULT 0, - created_at TIMESTAMPTZ NOT NULL DEFAULT now(), - updated_at TIMESTAMPTZ NOT NULL DEFAULT now() + version BIGINT NOT NULL DEFAULT 0, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() ); -CREATE UNIQUE INDEX IF NOT EXISTS idx_account_balances_account_number -ON account_balances (account_number); +CREATE UNIQUE INDEX IF NOT EXISTS idx_account_balances_account_number + ON account_balances (account_number); -CREATE INDEX IF NOT EXISTS idx_account_balances_user_id -ON account_balances (user_id); +CREATE INDEX IF NOT EXISTS idx_account_balances_user_id + ON account_balances (user_id); COMMENT ON TABLE account_balances IS '사용자 계좌 잔액'; COMMENT ON COLUMN account_balances.id IS '계좌 ID (Snowflake)'; @@ -82,29 +86,30 @@ COMMENT ON COLUMN account_balances.created_at IS '계좌 생성 일시'; COMMENT ON COLUMN account_balances.updated_at IS '마지막 갱신 일시'; -- transactions 테이블 -CREATE TABLE IF NOT EXISTS transactions ( +CREATE TABLE IF NOT EXISTS transactions +( id BIGINT PRIMARY KEY, -- SnowFlakeId - sender_user_id BIGINT NOT NULL REFERENCES users(id), - receiver_user_id BIGINT NOT NULL REFERENCES users(id), - amount BIGINT NOT NULL CHECK (amount > 0), + sender_user_id BIGINT NOT NULL REFERENCES users (id), + receiver_user_id BIGINT NOT NULL REFERENCES users (id), + amount BIGINT NOT NULL CHECK (amount > 0), status transaction_status NOT NULL DEFAULT 'PENDING', - received_at TIMESTAMPTZ NOT NULL DEFAULT now(), - status_updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), - created_at TIMESTAMPTZ NOT NULL DEFAULT now(), - updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), - version BIGINT NOT NULL DEFAULT 0, - + received_at TIMESTAMPTZ NOT NULL DEFAULT now(), + status_updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), + version BIGINT NOT NULL DEFAULT 0, + CONSTRAINT ck_tx_sender_ne_receiver CHECK (sender_user_id <> receiver_user_id) ); -CREATE INDEX IF NOT EXISTS idx_tx_sender_created -ON transactions (sender_user_id, created_at DESC); +CREATE INDEX IF NOT EXISTS idx_tx_sender_created + ON transactions (sender_user_id, created_at DESC); -CREATE INDEX IF NOT EXISTS idx_tx_receiver_created -ON transactions (receiver_user_id, created_at DESC); +CREATE INDEX IF NOT EXISTS idx_tx_receiver_created + ON transactions (receiver_user_id, created_at DESC); -CREATE INDEX IF NOT EXISTS idx_tx_status_updated -ON transactions (status, status_updated_at DESC); +CREATE INDEX IF NOT EXISTS idx_tx_status_updated + ON transactions (status, status_updated_at DESC); COMMENT ON TABLE transactions IS '송금 트랜잭션'; COMMENT ON COLUMN transactions.id IS '트랜잭션 ID'; @@ -117,15 +122,16 @@ COMMENT ON COLUMN transactions.created_at IS '생성 시각'; COMMENT ON COLUMN transactions.status_updated_at IS '상태 최종 갱신 시각'; -- transaction_histories 테이블 -CREATE TABLE IF NOT EXISTS transaction_histories ( +CREATE TABLE IF NOT EXISTS transaction_histories +( id BIGINT PRIMARY KEY, - transaction_id BIGINT NOT NULL REFERENCES transactions(id), + transaction_id BIGINT NOT NULL REFERENCES transactions (id), status transaction_history_status NOT NULL, - created_at TIMESTAMPTZ NOT NULL DEFAULT now() + created_at TIMESTAMPTZ NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS idx_tx_histories_txid_created -ON transaction_histories (transaction_id, created_at); + ON transaction_histories (transaction_id, created_at); COMMENT ON TABLE transaction_histories IS '트랜잭션 상태 변경 이력'; COMMENT ON COLUMN transaction_histories.status IS '최종 송금 상태'; @@ -133,34 +139,39 @@ COMMENT ON COLUMN transaction_histories.transaction_id IS '참조 트랜잭션 I COMMENT ON COLUMN transaction_histories.created_at IS '생성 시각'; -- transfer_events (Outbox) -CREATE TABLE IF NOT EXISTS transfer_events ( +CREATE TABLE IF NOT EXISTS transfer_events +( event_id BIGINT PRIMARY KEY, - transaction_id BIGINT NOT NULL REFERENCES transactions(id), - payload JSONB NOT NULL, + event_version INT NOT NULL DEFAULT 1, + aggregate_type VARCHAR(100) NOT NULL, + event_type VARCHAR(100) NOT NULL, + payload JSONB NOT NULL, + headers JSONB NOT NULL DEFAULT '{}'::jsonb, status transfer_outbox_status NOT NULL DEFAULT 'PENDING', - attempt_count INT NOT NULL DEFAULT 0, + attempt_count INT NOT NULL DEFAULT 0, error_message TEXT, - created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), published_at TIMESTAMPTZ, - + next_retry_at TIMESTAMPTZ NOT NULL DEFAULT now(), + CONSTRAINT ck_transfer_events_payload_object CHECK (jsonb_typeof(payload) = 'object'), CONSTRAINT ck_published_requires_timestamp CHECK ( status <> 'PUBLISHED' OR published_at IS NOT NULL - ), + ), CONSTRAINT ck_attempt_count_positive CHECK (attempt_count >= 0) ); CREATE INDEX IF NOT EXISTS idx_transfer_events_pending -ON transfer_events (created_at) -WHERE status = 'PENDING'; + ON transfer_events (created_at) + WHERE status = 'PENDING'; CREATE INDEX IF NOT EXISTS idx_transfer_events_dead_letter -ON transfer_events (created_at) -WHERE status = 'DEAD_LETTER'; + ON transfer_events (created_at) + WHERE status = 'DEAD_LETTER'; COMMENT ON TABLE transfer_events IS 'Outbox: Kafka 발행 실패 시 재시도용'; -COMMENT ON COLUMN transfer_events.event_id IS '이벤트 고유 ID (Snowflake)'; -COMMENT ON COLUMN transfer_events.transaction_id IS '참조 트랜잭션 ID'; +COMMENT ON COLUMN transfer_events.event_id IS '이벤트 고유 ID (Snowflake) - transaction_id 와 동일'; COMMENT ON COLUMN transfer_events.payload IS 'FDS 전송용 이벤트 데이터 (JSONB)'; COMMENT ON COLUMN transfer_events.status IS '상태: PENDING, PUBLISHED, DEAD_LETTER'; COMMENT ON COLUMN transfer_events.attempt_count IS '발행 재시도 누적 횟수'; @@ -170,7 +181,8 @@ COMMENT ON COLUMN transfer_events.published_at IS 'Kafka 발행 성공 시각'; -- updated_at 자동 갱신 함수 CREATE OR REPLACE FUNCTION update_updated_at_column() -RETURNS TRIGGER AS $$ + RETURNS TRIGGER AS +$$ BEGIN NEW.updated_at = now(); RETURN NEW; @@ -179,23 +191,27 @@ $$ LANGUAGE plpgsql; -- updated_at 트리거 CREATE TRIGGER trg_users_updated_at - BEFORE UPDATE ON users + BEFORE UPDATE + ON users FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); +EXECUTE FUNCTION update_updated_at_column(); CREATE TRIGGER trg_account_balances_updated_at - BEFORE UPDATE ON account_balances + BEFORE UPDATE + ON account_balances FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); +EXECUTE FUNCTION update_updated_at_column(); CREATE TRIGGER trg_transactions_updated_at - BEFORE UPDATE ON transactions + BEFORE UPDATE + ON transactions FOR EACH ROW - EXECUTE FUNCTION update_updated_at_column(); +EXECUTE FUNCTION update_updated_at_column(); -- status_updated_at 자동 갱신 함수 CREATE OR REPLACE FUNCTION update_status_updated_at_column() -RETURNS TRIGGER AS $$ + RETURNS TRIGGER AS +$$ BEGIN IF NEW.status IS DISTINCT FROM OLD.status THEN NEW.status_updated_at = now(); @@ -206,6 +222,7 @@ $$ LANGUAGE plpgsql; -- status_updated_at 트리거 CREATE TRIGGER trg_transactions_status_updated_at - BEFORE UPDATE ON transactions + BEFORE UPDATE + ON transactions FOR EACH ROW - EXECUTE FUNCTION update_status_updated_at_column(); +EXECUTE FUNCTION update_status_updated_at_column(); diff --git a/services/transfer/instances/transfer-relay/build.gradle.kts b/services/transfer/instances/transfer-relay/build.gradle.kts index ecbdc89..7619f5f 100644 --- a/services/transfer/instances/transfer-relay/build.gradle.kts +++ b/services/transfer/instances/transfer-relay/build.gradle.kts @@ -4,6 +4,9 @@ plugins { } dependencies { + // Spring Batch + implementation("org.springframework.boot:spring-boot-starter-batch") + implementation(project(":transfer-application")) implementation(project(":transfer-infra")) implementation(project(":common-application")) @@ -17,6 +20,7 @@ dependencies { // 테스트 의존성 추가 testImplementation(project(":transfer-domain")) // TransferEvent 사용을 위해 TODO application 으로 eventType 분리 개선 testImplementation("org.springframework.boot:spring-boot-starter-jdbc") + testImplementation("org.springframework.batch:spring-batch-test") testImplementation("org.mockito.kotlin:mockito-kotlin:4.1.0") testImplementation("org.jetbrains.kotlin:kotlin-test") } diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelay.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelay.kt deleted file mode 100644 index f6b9f03..0000000 --- a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/TransferOutboxRelay.kt +++ /dev/null @@ -1,176 +0,0 @@ -package io.github.hyungkishin.transentia.relay - -import io.github.hyungkishin.transentia.application.required.TransferEventsOutboxRepository -import io.github.hyungkishin.transentia.relay.component.OutboxEventWriter -import io.github.hyungkishin.transentia.relay.component.OutboxEventProcessor -import io.github.hyungkishin.transentia.relay.component.OutboxItemReader -import io.github.hyungkishin.transentia.relay.component.OutboxWorker -import io.github.hyungkishin.transentia.relay.config.OutboxRelayConfig -import io.github.hyungkishin.transentia.relay.model.ProcessingResult -import jakarta.annotation.PreDestroy -import org.slf4j.LoggerFactory -import org.springframework.beans.factory.annotation.Qualifier -import org.springframework.beans.factory.annotation.Value -import org.springframework.scheduling.annotation.Scheduled -import org.springframework.stereotype.Component -import java.time.Instant -import java.util.concurrent.ConcurrentLinkedQueue -import java.util.concurrent.ExecutorService -import java.util.concurrent.TimeUnit - -/** - * Outbox 이벤트를 Kafka로 전송하는 Relay - * - * Spring Batch Multi-threaded Step 패턴: - * - ItemReader: Thread-safe하게 이벤트 읽기 - * - ItemProcessor: 데이터 변환 - * - ItemWriter: Kafka 전송 - */ -@Component -class TransferOutboxRelay( - private val outboxRepository: TransferEventsOutboxRepository, - private val reader: OutboxItemReader, - private val processor: OutboxEventProcessor, - private val writer: OutboxEventWriter, - private val config: OutboxRelayConfig, - @Qualifier("outboxExecutorService") private val executorService: ExecutorService, - @Value("\${app.kafka.topics.transfer-events}") private val topicName: String -) { - private val log = LoggerFactory.getLogger(javaClass) - private var consecutiveEmptyCount = 0 - - @Scheduled( - fixedDelayString = "\${app.outbox.relay.fixedDelayMs:1000}", - initialDelayString = "\${app.outbox.relay.initialDelayMs:5000}" - ) - fun run() { - try { - val startTime = System.currentTimeMillis() - val now = Instant.now() - - reader.reset() - - val result = processBatch() - - if (result.totalProcessed == 0) { - handleEmptyBatch() - return - } - - consecutiveEmptyCount = 0 - val processingTime = System.currentTimeMillis() - startTime - - // 성공 처리 - if (result.successIds.isNotEmpty()) { - outboxRepository.markAsPublished(result.successIds, now) - - log.info( - "Published {} events ({}% success) in {}ms", - result.successIds.size, - "%.1f".format(result.successRate * 100), - processingTime - ) - } - - // 실패 처리 - if (result.failedEvents.isNotEmpty()) { - handleFailedEvents(result.failedEvents, now) - } - - // 성능 모니터링 - if (processingTime > config.slowProcessingThresholdMs) { - log.warn("Slow batch processing: {}ms for {} events", processingTime, result.totalProcessed) - } - - } catch (e: Exception) { - log.error("Relay batch processing failed", e) - } - } - - private fun processBatch(): ProcessingResult { - val successIds = ConcurrentLinkedQueue() - val failedEvents = ConcurrentLinkedQueue() - - // Worker 생성 (Reader-Processor-Writer 조합) - val workers = (1..config.threadPoolSize).map { workerId -> - OutboxWorker( - workerId = workerId, - reader = reader, - processor = processor, - writer = writer, - topicName = topicName, - successIds = successIds, - failedEvents = failedEvents - ) - } - - // 병렬 실행 - val futures = workers.map { executorService.submit(it) } - - try { - futures.forEach { it.get(config.timeoutSeconds, TimeUnit.SECONDS) } - } catch (e: Exception) { - log.error("Worker execution failed", e) - futures.forEach { it.cancel(true) } - } - - return ProcessingResult( - successIds = successIds.toList(), - failedEvents = failedEvents.toList() - ) - } - - private fun handleFailedEvents(failedEvents: List, now: Instant) { - log.warn("Failed to publish {} events", failedEvents.size) - - failedEvents.forEach { failed -> - val backoffMillis = calculateBackoff(failed.attemptCount) - outboxRepository.markFailedWithBackoff( - id = failed.eventId, - cause = failed.error, - backoffMillis = backoffMillis, - now = now - ) - } - } - - /** - * 지수 백오프 계산 - * - * 1회: 5초 - * 2회: 10초 - * 3회: 20초 - * 4회: 40초 - * 5회: DEAD_LETTER - */ - private fun calculateBackoff(attemptCount: Int): Long { - return config.baseBackoffMs * (1L shl (attemptCount - 1)) - } - - private fun handleEmptyBatch() { - consecutiveEmptyCount++ - if (consecutiveEmptyCount > 3) { - Thread.sleep(3000) - } - } - - @PreDestroy - fun cleanup() { - log.info("executor service 종료") - executorService.shutdown() - - try { - if (!executorService.awaitTermination(30, TimeUnit.SECONDS)) { - log.warn("강제 종료") - executorService.shutdownNow() - - if (!executorService.awaitTermination(1, TimeUnit.SECONDS)) { - log.error("Executor did not terminate") - } - } - } catch (e: InterruptedException) { - executorService.shutdownNow() - Thread.currentThread().interrupt() - } - } -} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxItemProcessor.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxItemProcessor.kt new file mode 100644 index 0000000..c1c0d4c --- /dev/null +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxItemProcessor.kt @@ -0,0 +1,47 @@ +package io.github.hyungkishin.transentia.relay.batch + +import com.fasterxml.jackson.databind.ObjectMapper +import io.github.hyungkishin.transentia.common.outbox.transfer.ClaimedRow +import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventAvroModel +import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventType +import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferStatus +import io.github.hyungkishin.transentia.relay.exception.InvalidEventDataException +import io.github.hyungkishin.transentia.relay.model.TransferPayload +import org.springframework.batch.item.ItemProcessor +import org.springframework.stereotype.Component + +@Component +class TransferOutboxItemProcessor( + private val objectMapper: ObjectMapper +) : ItemProcessor> { + + override fun process(item: ClaimedRow): Pair { + try { + val payload = objectMapper.readValue(item.payload, TransferPayload::class.java) + + val avroModel = TransferEventAvroModel.newBuilder() + .setEventId(item.eventId) + .setEventType( + if (payload.status == "COMPLETED") TransferEventType.TRANSFER_COMPLETED + else TransferEventType.TRANSFER_FAILED + ) + .setTransactionId(payload.transactionId) + .setSenderId(payload.senderId) + .setReceiverId(payload.receiverUserId) + .setAmount(payload.amount.toString()) + .setStatus(TransferStatus.valueOf(payload.status)) + .setOccurredAt(payload.occurredAt) + .setHeaders(item.headers) + .setCreatedAt(System.currentTimeMillis()) + .build() + + return item to avroModel + + } catch (e: Exception) { + throw InvalidEventDataException( + "이벤트 변환 실패: eventId=${item.eventId}", + e + ) + } + } +} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxItemReader.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxItemReader.kt new file mode 100644 index 0000000..d68f4ef --- /dev/null +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxItemReader.kt @@ -0,0 +1,93 @@ +package io.github.hyungkishin.transentia.relay.batch + +import io.github.hyungkishin.transentia.application.required.TransferEventsOutboxRepository +import io.github.hyungkishin.transentia.common.outbox.transfer.ClaimedRow +import io.github.hyungkishin.transentia.relay.config.OutboxRelayConfig +import org.slf4j.LoggerFactory +import org.springframework.batch.core.configuration.annotation.StepScope +import org.springframework.batch.item.ItemReader +import org.springframework.stereotype.Component +import java.time.Instant +import java.util.concurrent.ConcurrentLinkedQueue +import java.util.concurrent.locks.ReentrantLock +import kotlin.concurrent.withLock + +@Component +@StepScope +class TransferOutboxItemReader( + private val repository: TransferEventsOutboxRepository, + private val config: OutboxRelayConfig +) : ItemReader { + + private val log = LoggerFactory.getLogger(javaClass) + + // Thread-Safe Queue + private val queue = ConcurrentLinkedQueue() + + // 배치 로딩 Lock (한 번에 한 스레드만 로딩) + private val loadLock = ReentrantLock() + + // 더 이상 읽을 데이터가 없는지 여부 + @Volatile + private var exhausted = false + + override fun read(): ClaimedRow? { + // Queue에서 데이터 꺼내기 + val item = queue.poll() + + if (item != null) { + return item + } + + // Queue가 비었고, 이미 모든 데이터를 읽었으면 종료 + if (exhausted) { + return null + } + + // Queue가 비었으면 새 배치 로드 시도 + return loadLock.withLock { + // Double-check: 다른 스레드가 이미 로드했을 수 있음 + val recheck = queue.poll() + if (recheck != null) { + return recheck + } + + // 새 배치 로드 + loadNextBatch() + + // 로드 후 다시 시도 + queue.poll() + } + } + + private fun loadNextBatch() { + try { + val batch = repository.claimBatch( + limit = config.chunkSize, + now = Instant.now(), + sendingTimeoutSeconds = config.sendingTimeoutSeconds + ) + + if (batch.isEmpty()) { + exhausted = true + log.debug("더 이상 처리할 이벤트가 없습니다") + } else { + queue.addAll(batch) + log.debug("새 배치 로드: {} 건", batch.size) + } + + } catch (e: Exception) { + log.error("배치 로드 실패", e) + exhausted = true + } + } + + /** + * Step 재시작 시 상태 초기화 + */ + fun reset() { + queue.clear() + exhausted = false + log.debug("Reader 상태 초기화") + } +} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxItemWriter.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxItemWriter.kt new file mode 100644 index 0000000..f41d2c5 --- /dev/null +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxItemWriter.kt @@ -0,0 +1,79 @@ +package io.github.hyungkishin.transentia.relay.batch + +import io.github.hyungkishin.transentia.application.required.TransferEventsOutboxRepository +import io.github.hyungkishin.transentia.common.outbox.transfer.ClaimedRow +import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventAvroModel +import io.github.hyungkishin.transentia.infrastructure.kafka.producer.service.KafkaProducer +import io.github.hyungkishin.transentia.relay.exception.RetryableKafkaException +import org.slf4j.LoggerFactory +import org.springframework.batch.item.Chunk +import org.springframework.batch.item.ItemWriter +import org.springframework.beans.factory.annotation.Value +import org.springframework.stereotype.Component +import java.time.Instant + +/** + * Outbox Event Kafka Writer + * + * 흐름: + * 1. Kafka 전송 (동기) + * 2. 성공 시: Outbox PUBLISHED 업데이트 + * 3. 실패 시: RetryableKafkaException throw + * - FaultTolerantStepConfigurer가 retry 처리 (지수 백오프) + * - retry 초과 시: SkipListener가 DLQ로 이동 + */ +@Component +class TransferOutboxItemWriter( + private val kafkaProducer: KafkaProducer, + private val outboxRepository: TransferEventsOutboxRepository, + @Value("\${app.kafka.topics.transfer-events}") + private val topicName: String +) : ItemWriter> { + + private val log = LoggerFactory.getLogger(javaClass) + + override fun write(chunk: Chunk>) { + val successIds = mutableListOf() + + chunk.items.forEach { (claimedRow, avroModel) -> + try { + // 1. Kafka 전송 (동기) + kafkaProducer.sendSync(topicName, avroModel) + + // 2. 성공 ID 수집 + successIds.add(avroModel.eventId) + + log.debug( + "Kafka 전송 성공: eventId={}, attempt={}", + avroModel.eventId, + claimedRow.attemptCount + ) + + } catch (e: Exception) { + log.warn( + "Kafka 전송 실패: eventId={}, attempt={}, error={}", + avroModel.eventId, + claimedRow.attemptCount, + e.message + ) + + // 3. 실패 시 예외 throw (Spring Batch가 retry/skip 처리) + throw RetryableKafkaException( + "Kafka 전송 실패: eventId=${avroModel.eventId}", + e + ) + } + } + + // 4. 성공한 이벤트 Outbox 업데이트 (PUBLISHED) + if (successIds.isNotEmpty()) { + try { + outboxRepository.markAsPublished(successIds, Instant.now()) + log.info("Chunk 전송 완료: {} 건", successIds.size) + } catch (e: Exception) { + log.error("Outbox 업데이트 실패", e) + throw e + } + } + } +} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxJobLauncher.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxJobLauncher.kt new file mode 100644 index 0000000..86a424d --- /dev/null +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxJobLauncher.kt @@ -0,0 +1,43 @@ +package io.github.hyungkishin.transentia.relay.batch + +import org.slf4j.LoggerFactory +import org.springframework.batch.core.Job +import org.springframework.batch.core.JobParametersBuilder +import org.springframework.batch.core.launch.JobLauncher +import org.springframework.scheduling.annotation.Scheduled +import org.springframework.stereotype.Component + +/** + * Spring Batch Job 실행 + * + * 기존: TransferOutboxRelay + * 개선: ExecutorService 수동 관리 -> JobLauncher 사용 + */ +@Component +class TransferOutboxJobLauncher( + private val jobLauncher: JobLauncher, + private val transferOutboxJob: Job +) { + + private val log = LoggerFactory.getLogger(javaClass) + + @Scheduled( + fixedDelayString = "\${app.outbox.relay.fixedDelayMs:2000}", + initialDelayString = "\${app.outbox.relay.initialDelayMs:5000}" + ) + fun runJob() { + try { + val jobParameters = JobParametersBuilder() + .addLong("timestamp", System.currentTimeMillis()) + .toJobParameters() + + val jobExecution = jobLauncher.run(transferOutboxJob, jobParameters) + + log.debug("Job 실행 완료: {}", jobExecution.exitStatus.exitCode) + + } catch (e: Exception) { + log.error("Job 실행 실패", e) + } + } + +} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxSkipListener.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxSkipListener.kt new file mode 100644 index 0000000..e092e5c --- /dev/null +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxSkipListener.kt @@ -0,0 +1,102 @@ +package io.github.hyungkishin.transentia.relay.batch + +import io.github.hyungkishin.transentia.application.required.TransferEventsOutboxRepository +import io.github.hyungkishin.transentia.common.outbox.transfer.ClaimedRow +import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventAvroModel +import org.slf4j.LoggerFactory +import org.springframework.batch.core.SkipListener +import org.springframework.stereotype.Component +import java.time.Instant + +/** + * Skip Item 처리 리스너 + * + * 역할: + * - Spring Batch의 retry 초과로 skip된 item을 DLQ로 이동 + * - 실패 원인 로깅 + */ +@Component +class TransferOutboxSkipListener( + private val outboxRepository: TransferEventsOutboxRepository +) : SkipListener> { + + private val log = LoggerFactory.getLogger(javaClass) + + /** + * Writer에서 skip된 item 처리 + * + * - Kafka 전송 실패 후 재시도 초과한 item + * - DLQ로 이동 (status = DEAD_LETTER) + */ + override fun onSkipInWrite( + item: Pair, + t: Throwable + ) { + val (claimedRow, avroModel) = item + + try { + outboxRepository.markAsDeadLetter( + eventId = avroModel.eventId, + error = "${t.javaClass.simpleName}: ${t.message}", + now = Instant.now() + ) + + log.error( + "[DLQ] Kafka 전송 재시도 초과 - eventId={}, attempt={}, error={}", + avroModel.eventId, + claimedRow.attemptCount, + t.message, + t + ) + } catch (e: Exception) { + log.error( + "[DLQ] DLQ 이동 실패 - eventId={}, error={}", + avroModel.eventId, + e.message, + e + ) + } + } + + /** + * Reader에서 skip된 item 처리 + * + * - DB 조회 실패 등 + */ + override fun onSkipInRead(t: Throwable) { + log.warn("[Skip-Read] Reader skip 발생: {}", t.message, t) + } + + /** + * Processor에서 skip된 item 처리 + * + * - Avro 변환 실패 등 + * - ClaimedRow 받음 (Processor의 input) + */ + override fun onSkipInProcess( + item: ClaimedRow, + t: Throwable + ) { + try { + outboxRepository.markAsDeadLetter( + eventId = item.eventId, + error = "${t.javaClass.simpleName}: ${t.message}", + now = Instant.now() + ) + + log.error( + "[DLQ] Processor 변환 실패 - eventId={}, error={}", + item.eventId, + t.message, + t + ) + } catch (e: Exception) { + log.error( + "[DLQ] DLQ 이동 실패 - eventId={}, error={}", + item.eventId, + e.message, + e + ) + } + } +} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxStepListener.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxStepListener.kt new file mode 100644 index 0000000..204e5b5 --- /dev/null +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/batch/TransferOutboxStepListener.kt @@ -0,0 +1,39 @@ +package io.github.hyungkishin.transentia.relay.batch + +import org.slf4j.LoggerFactory +import org.springframework.batch.core.ExitStatus +import org.springframework.batch.core.StepExecution +import org.springframework.batch.core.StepExecutionListener +import org.springframework.stereotype.Component +import java.time.Duration + +@Component +class TransferOutboxStepListener( + private val reader: TransferOutboxItemReader +) : StepExecutionListener { + + private val log = LoggerFactory.getLogger(javaClass) + + override fun beforeStep(stepExecution: StepExecution) { + reader.reset() + log.info("Step 시작: {}", stepExecution.stepName) + } + + override fun afterStep(stepExecution: StepExecution): ExitStatus? { + val duration = stepExecution.endTime?.let { + Duration.between(stepExecution.startTime, it).toMillis() + } ?: 0 + + log.info( + "Step 완료: 읽기={}, 쓰기={}, 커밋={}, 롤백={}, Skip={}, 소요={}ms", + stepExecution.readCount, + stepExecution.writeCount, + stepExecution.commitCount, + stepExecution.rollbackCount, + stepExecution.skipCount, + duration + ) + + return stepExecution.exitStatus + } +} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxEventProcessor.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxEventProcessor.kt deleted file mode 100644 index fc32ebe..0000000 --- a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxEventProcessor.kt +++ /dev/null @@ -1,38 +0,0 @@ -package io.github.hyungkishin.transentia.relay.component - -import com.fasterxml.jackson.databind.ObjectMapper -import io.github.hyungkishin.transentia.common.outbox.transfer.ClaimedRow -import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventAvroModel -import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventType -import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferStatus -import io.github.hyungkishin.transentia.relay.model.TransferPayload -import org.springframework.stereotype.Component - -/** - * ClaimedRow 를 TransferEventAvroModel 로 변환한다. - */ -@Component -class OutboxEventProcessor( - private val objectMapper: ObjectMapper -) { - fun process(row: ClaimedRow): TransferEventAvroModel { - val payload = objectMapper.readValue(row.payload, TransferPayload::class.java) - - return TransferEventAvroModel.newBuilder() - .setEventId(row.eventId) - .setEventType( - if (payload.status == "COMPLETED") TransferEventType.TRANSFER_COMPLETED - else TransferEventType.TRANSFER_FAILED - ) - .setAggregateId(row.aggregateId) - .setTransactionId(payload.transactionId) - .setSenderId(payload.senderId) - .setReceiverId(payload.receiverUserId) - .setAmount(payload.amount.toString()) - .setStatus(TransferStatus.valueOf(payload.status)) - .setOccurredAt(payload.occurredAt) - .setHeaders(row.headers) - .setCreatedAt(System.currentTimeMillis()) - .build() - } -} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxEventWriter.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxEventWriter.kt deleted file mode 100644 index 7746e39..0000000 --- a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxEventWriter.kt +++ /dev/null @@ -1,31 +0,0 @@ -package io.github.hyungkishin.transentia.relay.component - -import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventAvroModel -import io.github.hyungkishin.transentia.infrastructure.kafka.producer.service.KafkaProducer -import org.slf4j.LoggerFactory -import org.springframework.stereotype.Component - -/** - * TransferEventAvroModel을 Kafka로 전송한다. - */ -@Component -class OutboxEventWriter( - private val kafkaProducer: KafkaProducer -) { - private val log = LoggerFactory.getLogger(javaClass) - - /** - * 단일 아이템 쓰기 - * - * @throws Exception Kafka 전송 실패 시 예외 - */ - fun write(topicName: String, event: TransferEventAvroModel) { - try { - kafkaProducer.sendSync(topicName, event) - log.debug("Successfully wrote event: eventId={}", event.eventId) - } catch (e: Exception) { - log.error("Failed to write event: eventId={}, error={}", event.eventId, e.message) - throw e - } - } -} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxItemReader.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxItemReader.kt deleted file mode 100644 index f1b3bfd..0000000 --- a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxItemReader.kt +++ /dev/null @@ -1,65 +0,0 @@ -package io.github.hyungkishin.transentia.relay.component - -import io.github.hyungkishin.transentia.application.required.TransferEventsOutboxRepository -import io.github.hyungkishin.transentia.common.outbox.transfer.ClaimedRow -import io.github.hyungkishin.transentia.relay.config.OutboxRelayConfig -import org.slf4j.LoggerFactory -import org.springframework.stereotype.Component -import java.time.Instant - -/** - * Thread-safe ItemReader (Spring Batch Multi-threaded Step) - * - * 배치 + 커서 방식: - * - DB에서 한번에 N건 조회 (Batch) - * - 메모리에서 순차 반환 (Cursor) - */ -@Component -class OutboxItemReader( - private val repository: TransferEventsOutboxRepository, - private val config: OutboxRelayConfig -) { - private val log = LoggerFactory.getLogger(javaClass) - - private var items: List = emptyList() - private var currentIndex: Int = 0 - - @Synchronized - fun read(): ClaimedRow? { - // 메모리에 읽지 않은 아이템이 남아있으면 반환한다. - if (currentIndex < items.size) { - return items[currentIndex++] - } - - fetchItems() - - if (items.isEmpty()) { - return null - } - - return items[currentIndex++] - } - - private fun fetchItems() { - try { - items = repository.claimBatch( - limit = config.chunkSize, - now = Instant.now(), - sendingTimeoutSeconds = config.sendingTimeoutSeconds - ) - currentIndex = 0 - - log.debug("Fetched {} items from outbox", items.size) - - } catch (e: Exception) { - log.error("Failed to fetch items from outbox", e) - items = emptyList() - currentIndex = 0 - } - } - - fun reset() { - items = emptyList() - currentIndex = 0 - } -} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxWorker.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxWorker.kt deleted file mode 100644 index 9073e17..0000000 --- a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/component/OutboxWorker.kt +++ /dev/null @@ -1,56 +0,0 @@ -package io.github.hyungkishin.transentia.relay.component - -import io.github.hyungkishin.transentia.relay.model.ProcessingResult -import org.slf4j.LoggerFactory -import java.util.concurrent.ConcurrentLinkedQueue - -class OutboxWorker( - private val workerId: Int, - private val reader: OutboxItemReader, - private val processor: OutboxEventProcessor, - private val writer: OutboxEventWriter, - private val topicName: String, - private val successIds: ConcurrentLinkedQueue, - private val failedEvents: ConcurrentLinkedQueue -) : Runnable { - - private val log = LoggerFactory.getLogger(javaClass) - private var processedCount = 0 - - override fun run() { - try { - while (true) { - val item = reader.read() ?: break - - processItem(item) - - processedCount++ - } - - log.debug("Worker-{} finished: {} items", workerId, processedCount) - } catch (e: Exception) { - log.error("Worker-{} failed", workerId, e) - } - } - - private fun processItem(row: io.github.hyungkishin.transentia.common.outbox.transfer.ClaimedRow) { - try { - val avroModel = processor.process(row) - - writer.write(topicName, avroModel) - - successIds.add(row.eventId) - - } catch (e: Exception) { - // 실패 - log.warn("Failed to process event: eventId={}, error={}", row.eventId, e.message) - failedEvents.add( - ProcessingResult.FailedEvent( - eventId = row.eventId, - error = e.message ?: "Unknown error", - attemptCount = row.attemptCount - ) - ) - } - } -} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/ExecutorServiceConfig.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/ExecutorServiceConfig.kt deleted file mode 100644 index 7bb41d9..0000000 --- a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/ExecutorServiceConfig.kt +++ /dev/null @@ -1,18 +0,0 @@ -package io.github.hyungkishin.transentia.relay.config - -import org.springframework.context.annotation.Bean -import org.springframework.context.annotation.Configuration -import java.util.concurrent.ExecutorService -import java.util.concurrent.Executors - -@Configuration -class ExecutorServiceConfig( - private val config: OutboxRelayConfig -) { - - @Bean("outboxExecutorService") - fun outboxExecutorService(): ExecutorService { - return Executors.newFixedThreadPool(config.threadPoolSize) - } - -} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/FaultTolerantStepConfigurer.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/FaultTolerantStepConfigurer.kt new file mode 100644 index 0000000..663678d --- /dev/null +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/FaultTolerantStepConfigurer.kt @@ -0,0 +1,116 @@ +package io.github.hyungkishin.transentia.relay.config + +import io.github.hyungkishin.transentia.relay.exception.InvalidEventDataException +import io.github.hyungkishin.transentia.relay.exception.NonRetryableKafkaException +import io.github.hyungkishin.transentia.relay.exception.RetryableKafkaException +import org.springframework.batch.core.step.builder.FaultTolerantStepBuilder +import org.springframework.retry.backoff.ExponentialBackOffPolicy +import org.springframework.stereotype.Component +import java.net.SocketTimeoutException +import java.util.concurrent.TimeoutException + +/** + * Spring Batch FaultTolerant 정책 설정 + * + * 역할 + * - Retry / Skip / NoRetry 정책을 한 곳에서 관리 + * - Step 설정 로직에서 예외 정책 분리 + * - 재사용 가능한 정책 컴포넌트 + * + * 예외 분류 + * - Retry: 일시적 네트워크 오류 (재시도 가능) + * - Skip: 데이터 오류 (복구 불가, DLQ 이동) + * - NoRetry / NoSkip: 코드 버그 (즉시 실패) + */ +@Component +class FaultTolerantStepConfigurer( + private val config: OutboxRelayConfig +) { + + /** + * FaultTolerant 정책 적용 + * + * @param builder Step의 FaultTolerantStepBuilder + * @return 정책이 적용된 builder + */ + fun configure( + builder: FaultTolerantStepBuilder + ): FaultTolerantStepBuilder { + + return builder.apply { + // Retry 정책 + configureRetryPolicy() + + // Skip 정책 + configureSkipPolicy() + + // NoRetry / NoSkip 정책 + configureNoRetryPolicy() + } + + } + + /** + * Retry 정책 설정 + * + * - 일시적 오류는 지수 백오프로 재시도 + * - maxAttempts 초과 시 skip 정책으로 이동 + */ + private fun FaultTolerantStepBuilder.configureRetryPolicy() { + // 재시도 가능한 예외 + retry(RetryableKafkaException::class.java) + retry(TimeoutException::class.java) + retry(SocketTimeoutException::class.java) + + // 최대 재시도 횟수 + retryLimit(config.maxAttempts) + + // 지수 백오프 정책 + backOffPolicy(exponentialBackOffPolicy()) + } + + /** + * Skip 정책 설정 + * + * - 복구 불가능한 데이터 오류 + * - SkipListener에서 DLQ로 이동됨 + */ + private fun FaultTolerantStepBuilder.configureSkipPolicy() { + skip(NonRetryableKafkaException::class.java) + skip(InvalidEventDataException::class.java) + + // Chunk 크기만큼 허용 + skipLimit(config.chunkSize) + } + + /** + * NoRetry / NoSkip 정책 설정 + * + * - 코드 버그는 즉시 Job 실패 + * - 빠른 피드백으로 수정 유도 + */ + private fun FaultTolerantStepBuilder.configureNoRetryPolicy() { + noSkip(NullPointerException::class.java) + noSkip(IllegalArgumentException::class.java) + noSkip(IllegalStateException::class.java) + + noRetry(NullPointerException::class.java) + noRetry(IllegalArgumentException::class.java) + } + + /** + * 지수 백오프 정책 + * + * - 초기 대기: baseBackoffMs + * - 배수: 2배 + * - 최대 대기: 60초 + */ + private fun exponentialBackOffPolicy(): ExponentialBackOffPolicy { + return ExponentialBackOffPolicy().apply { + initialInterval = config.baseBackoffMs + multiplier = 2.0 + maxInterval = 60000 // 60초 + } + } + +} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/OutboxRelayConfig.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/OutboxRelayConfig.kt index d609c4b..a897951 100644 --- a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/OutboxRelayConfig.kt +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/OutboxRelayConfig.kt @@ -4,31 +4,33 @@ import org.springframework.boot.context.properties.ConfigurationProperties /** * Outbox Relay 설정 + * + * Phase 1: 단일 인스턴스 + 멀티스레드 */ @ConfigurationProperties(prefix = "app.outbox.relay") data class OutboxRelayConfig( /** DB 조회 배치 크기 */ - val chunkSize: Int = 100, + val chunkSize: Int = 500, - /** 스케줄링 간격 (ms) */ - val fixedDelayMs: Long = 1000, - - /** 첫 실행 지연 시간 (ms) */ - val initialDelayMs: Long = 5000, - - /** Worker 스레드 개수 */ - val threadPoolSize: Int = Runtime.getRuntime().availableProcessors() * 2, + /** + * Worker 스레드 개수 + * + * 결정 기준 + * - 처리 속도 목표 + * - 부하 테스트로 최종 결정 + */ + val threadPoolSize: Int = 3, /** Worker 타임아웃 (초) */ - val timeoutSeconds: Long = 5, + val timeoutSeconds: Long = 30, + + /** 최대 재시도 횟수 (초과 시 DLQ) */ + val maxAttempts: Int = 5, /** 첫 재시도 백오프 시간 (ms) - 지수 증가 */ val baseBackoffMs: Long = 5000, - /** Stuck SENDING 판단 기준 (초) */ - val sendingTimeoutSeconds: Long = 120, - - /** 느린 처리 경고 임계값 (ms) */ - val slowProcessingThresholdMs: Long = 3000 + /** 사용 안 함 (호환성 유지) */ + val sendingTimeoutSeconds: Long = 300 ) diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/TransferOutboxBatchConfig.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/TransferOutboxBatchConfig.kt new file mode 100644 index 0000000..264e2d7 --- /dev/null +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/config/TransferOutboxBatchConfig.kt @@ -0,0 +1,80 @@ +package io.github.hyungkishin.transentia.relay.config + +import io.github.hyungkishin.transentia.common.outbox.transfer.ClaimedRow +import io.github.hyungkishin.transentia.infrastructure.kafka.model.TransferEventAvroModel +import io.github.hyungkishin.transentia.relay.batch.* +import org.springframework.batch.core.Job +import org.springframework.batch.core.Step +import org.springframework.batch.core.job.builder.JobBuilder +import org.springframework.batch.core.repository.JobRepository +import org.springframework.batch.core.step.builder.StepBuilder +import org.springframework.beans.factory.annotation.Qualifier +import org.springframework.context.annotation.Bean +import org.springframework.context.annotation.Configuration +import org.springframework.core.task.TaskExecutor +import org.springframework.scheduling.annotation.EnableScheduling +import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor +import org.springframework.transaction.PlatformTransactionManager + +typealias OutboxItem = Pair + +/** + * Spring Batch Configuration + */ +@Configuration +@EnableScheduling +class TransferOutboxBatchConfig( + private val jobRepository: JobRepository, + private val transactionManager: PlatformTransactionManager, + private val relayConfig: OutboxRelayConfig, + private val faultTolerantConfigurer: FaultTolerantStepConfigurer +) { + + @Bean + fun transferOutboxJob(transferOutboxStep: Step): Job { + return JobBuilder("transferOutboxJob", jobRepository) + .start(transferOutboxStep) + .build() + } + + @Bean + fun transferOutboxStep( + reader: TransferOutboxItemReader, + processor: TransferOutboxItemProcessor, + writer: TransferOutboxItemWriter, + stepListener: TransferOutboxStepListener, + skipListener: TransferOutboxSkipListener, + @Qualifier("relayTaskExecutor") taskExecutor: TaskExecutor + ): Step { + return StepBuilder("transferOutboxStep", jobRepository) + .chunk(relayConfig.chunkSize, transactionManager) + .reader(reader) + .processor(processor) + .writer(writer) + .taskExecutor(taskExecutor) + .listener(stepListener) + .listener(skipListener) + .let { faultTolerantConfigurer.configure(it.faultTolerant()) } + .build() + } + + /** + * Batch TaskExecutor + * + * 목적: Spring Batch 전용 스레드풀 + * 이름: relayTaskExecutor (충돌 방지) + */ + @Bean("relayTaskExecutor") + fun relayTaskExecutor(): TaskExecutor { + return ThreadPoolTaskExecutor().apply { + corePoolSize = relayConfig.threadPoolSize + maxPoolSize = relayConfig.threadPoolSize + queueCapacity = relayConfig.chunkSize * 2 + setThreadNamePrefix("relay-") + setWaitForTasksToCompleteOnShutdown(true) + setAwaitTerminationSeconds(60) + initialize() + } + } + +} diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/exception/KafkaExceptions.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/exception/KafkaExceptions.kt new file mode 100644 index 0000000..045c76d --- /dev/null +++ b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/exception/KafkaExceptions.kt @@ -0,0 +1,31 @@ +package io.github.hyungkishin.transentia.relay.exception + +/** + * 재시도 가능한 Kafka 예외 + * + * 일시적 네트워크 오류, Broker 일시 장애 등 + */ +class RetryableKafkaException( + message: String, + cause: Throwable? = null +) : RuntimeException(message, cause) + +/** + * 재시도 불가능한 Kafka 예외 + * + * Serialization 실패, 잘못된 토픽 등 + */ +class NonRetryableKafkaException( + message: String, + cause: Throwable? = null +) : RuntimeException(message, cause) + +/** + * 잘못된 데이터 예외 + * + * Payload 파싱 실패, 필수 필드 누락 등 + */ +class InvalidEventDataException( + message: String, + cause: Throwable? = null +) : RuntimeException(message, cause) diff --git a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/model/ProcessingResult.kt b/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/model/ProcessingResult.kt deleted file mode 100644 index 88b5fbf..0000000 --- a/services/transfer/instances/transfer-relay/src/main/kotlin/io/github/hyungkishin/transentia/relay/model/ProcessingResult.kt +++ /dev/null @@ -1,18 +0,0 @@ -package io.github.hyungkishin.transentia.relay.model - -/** - * 배치 처리 결과를 담는 데이터 클래스 - */ -data class ProcessingResult( - val successIds: List, - val failedEvents: List -) { - val totalProcessed: Int get() = successIds.size + failedEvents.size - val successRate: Double get() = if (totalProcessed == 0) 0.0 else successIds.size.toDouble() / totalProcessed - - data class FailedEvent( - val eventId: Long, - val error: String, - val attemptCount: Int - ) -} \ No newline at end of file diff --git a/services/transfer/instances/transfer-relay/src/main/resources/application.yml b/services/transfer/instances/transfer-relay/src/main/resources/application.yml index 4eb8eff..3dac9c9 100644 --- a/services/transfer/instances/transfer-relay/src/main/resources/application.yml +++ b/services/transfer/instances/transfer-relay/src/main/resources/application.yml @@ -2,14 +2,20 @@ spring: application: name: transfer-relay + batch: + jdbc: + initialize-schema: always + job: + enabled: false + datasource: url: jdbc:postgresql://localhost:5432/transfer username: postgres password: pass1234 driver-class-name: org.postgresql.Driver hikari: - maximum-pool-size: 10 - minimum-idle: 3 + maximum-pool-size: 20 + minimum-idle: 10 connection-timeout: 3000 idle-timeout: 600000 max-lifetime: 1800000 @@ -30,40 +36,23 @@ kafka-producer-config: valueSerializer: io.confluent.kafka.serializers.KafkaAvroSerializer compressionType: lz4 acks: "1" - batchSize: 65536 # 64KB로 증가 + batchSize: 65536 batchSizeBoostFactor: 1 - lingerMs: 5 # 배치 대기 시간 - requestTimeoutMs: 1000 # 1초로 단축 - retryCount: 0 # 재시도 없음 (빠른 실패) - -kafka-consumer-config: - key-deserializer: org.apache.kafka.common.serialization.StringDeserializer - value-deserializer: io.confluent.kafka.serializers.KafkaAvroDeserializer - consumer-group-id: fds-consumer-group - auto-offset-reset: earliest - specific-avro-reader-key: specific.avro.reader - specific-avro-reader: true - batch-listener: true - auto-startup: true - concurrency-level: 8 - session-timeout-ms: 10000 - heartbeat-interval-ms: 3000 - max-poll-interval-ms: 300000 - max-poll-records: 500 - max-partition-fetch-bytes-default: 1048576 - max-partition-fetch-bytes-boost-factor: 1 - poll-timeout-ms: 150 + lingerMs: 5 + requestTimeoutMs: 1000 + retryCount: 0 app: outbox: relay: - chunkSize: 100 + chunkSize: 1000 + threadPoolSize: 8 fixedDelayMs: 1000 initialDelayMs: 5000 timeoutSeconds: 10 + maxAttempts: 5 baseBackoffMs: 5000 - sendingTimeoutSeconds: 120 - slowProcessingThresholdMs: 3000 + sendingTimeoutSeconds: 300 kafka: topics: