diff --git a/docker-compose.yml b/docker-compose.yml index 8a120d4e..b6b7d736 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,15 +10,9 @@ services: # 컨테이너 내부에서 localhost는 도커 네트워크에서 자신의 주소 - SPRING_DATA_MONGODB_URI=mongodb://mongo:27017/llv_api_local - SPRING_DATA_REDIS_HOST=redis - # Redlock 기본 활성화 - - WORD_SINGLE_FLIGHT_REDLOCK_ENABLED=true - - WORD_SINGLE_FLIGHT_REDLOCK_NODE_ADDRESSES=redis://redis-a:6379,redis://redis-b:6379,redis://redis-c:6379 depends_on: - mongo - redis - - redis-a - - redis-b - - redis-c volumes: - ./logs:/app/logs mongo: @@ -40,39 +34,6 @@ services: - redis_data:/data restart: unless-stopped - redis-a: - image: redis:7-alpine - container_name: llv-redis-a - ports: - - "6380:6379" - command: ["redis-server", "--appendonly", "yes"] - volumes: - - redis_a_data:/data - restart: unless-stopped - - redis-b: - image: redis:7-alpine - container_name: llv-redis-b - ports: - - "6381:6379" - command: ["redis-server", "--appendonly", "yes"] - volumes: - - redis_b_data:/data - restart: unless-stopped - - redis-c: - image: redis:7-alpine - container_name: llv-redis-c - ports: - - "6382:6379" - command: ["redis-server", "--appendonly", "yes"] - volumes: - - redis_c_data:/data - restart: unless-stopped - volumes: mongo_data: redis_data: - redis_a_data: - redis_b_data: - redis_c_data: diff --git a/docs/decisions/011-word-single-flight-distributed-stability-with-redlock.md b/docs/decisions/011-word-single-flight-distributed-stability-with-redlock.md index 13b3e69f..130c349e 100644 --- a/docs/decisions/011-word-single-flight-distributed-stability-with-redlock.md +++ b/docs/decisions/011-word-single-flight-distributed-stability-with-redlock.md @@ -1,4 +1,4 @@ -# Word Single-Flight 분산 안정화 보고서 (Redlock 적용) +# Word Single-Flight 분산 안정화 보고서 (RLock 표준화) ## 문제 @@ -7,22 +7,22 @@ ## 선택 -single-flight 조정 경로를 Redisson 기반으로 전환하고, Redlock 경로를 기본값으로 채택했다. -동시에 노드 설정 이상 상황에서는 단일 락 폴백 경로로 기동하도록 fail-safe 동작을 추가했다. +single-flight 조정 경로를 Redisson 기반으로 전환하고, 운영 표준은 `RLock + watchdog`으로 확정했다. 또한 follower 에러 처리와 락 만료 시맨틱을 보정해 단기 장애 증폭 가능성을 낮췄다. ## 이유 우선순위는 단기간 내 운영 리스크 완화와 기동 안전성 확보로 설정했다. +AWS Bedrock 동기 추론 API(Converse/InvokeModel)에는 멱등키가 없어 호출 단계 중복 제거를 플랫폼에 위임하기 어려웠다. `fencing token` 기반 모델은 저장소/다운스트림 검증 지점 추가와 토큰 단조성 보장 설계가 필요해 즉시 적용 범위에서 제외했다. 또한 본 건의 핵심 목표가 AI 요청 수 절감인데, fencing token은 stale write 방지에는 유효해도 AI 중복 호출 자체를 차단하지는 못한다. -이에 따라 1차 조치는 duplicate-call 완화와 fail-safe 확보에 집중하고, 엄격 정합성 요구는 후속 과제로 분리했다. +이에 따라 1차 조치는 `RLock + watchdog + 결과 캐시/idempotency key` 조합으로 duplicate-call 완화에 집중하고, 엄격 정합성 요구는 후속 과제로 분리했다. ## 검증 -- [WordSingleFlightRedisCoordinator.java](../../src/main/java/com/linglevel/api/word/service/singleflight/WordSingleFlightRedisCoordinator.java) 기준으로 락 경로가 Redisson 기반으로 전환된 것을 확인했다. +- [WordSingleFlightRedisCoordinator.java](../../src/main/java/com/linglevel/api/word/service/WordSingleFlightRedisCoordinator.java) 기준으로 락 경로가 Redisson `RLock` 기반으로 표준화된 것을 확인했다. - follower timeout 및 leader 실패 전파 시맨틱 보정 사항을 코드 단위로 확인했다. -- 로컬 3노드 Redis 환경에서 Redlock 경로와 단일 락 폴백 경로를 테스트로 검증했다. +- 두 인스턴스 동시 요청에서 single-flight 1회 실행, leader 실패 전파, timeout fallback을 테스트로 검증했다. - 변경 사항은 PR 단위로 분리해 검증했다: `#328`(분산 안정화), `#330`(만료/에러 시맨틱 보정), `#331`(Redlock + 폴백 검증). ## 결과와 남은 이슈 diff --git a/docs/decisions/README.md b/docs/decisions/README.md index 0144c0a0..f792d8db 100644 --- a/docs/decisions/README.md +++ b/docs/decisions/README.md @@ -35,4 +35,4 @@ - [008. 글로벌 이미지 전달 성능 최적화](008-image-delivery-optimization.md) - [009. DSL 기반 크롤링 규칙 관리 구조 도입](009-dsl-driven-crawling.md) - [010. 미션 기반 Codex 에이전트 운영 규칙 정리](010-mission-oriented-agent-guidelines.md) -- [011. Word Single-Flight 분산 안정화와 Redlock 도입](011-word-single-flight-distributed-stability-with-redlock.md) +- [011. Word Single-Flight 분산 안정화와 RLock 표준화](011-word-single-flight-distributed-stability-with-redlock.md) diff --git a/src/main/java/com/linglevel/api/word/service/WordSingleFlightProperties.java b/src/main/java/com/linglevel/api/word/service/WordSingleFlightProperties.java index 0b535da6..bc0fe601 100644 --- a/src/main/java/com/linglevel/api/word/service/WordSingleFlightProperties.java +++ b/src/main/java/com/linglevel/api/word/service/WordSingleFlightProperties.java @@ -5,9 +5,6 @@ import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.stereotype.Component; -import java.util.ArrayList; -import java.util.List; - @Getter @Setter @Component @@ -16,8 +13,6 @@ public class WordSingleFlightProperties { private boolean enabled = true; - private long lockTtlMs = 20_000; - private long waitTimeoutMs = 5_000; private long resultTtlMs = 60_000; @@ -27,8 +22,4 @@ public class WordSingleFlightProperties { private String model = "default"; private String schemaVersion = "v2"; - - private boolean redlockEnabled = false; - - private List redlockNodeAddresses = new ArrayList<>(); } diff --git a/src/main/java/com/linglevel/api/word/service/WordSingleFlightRedisCoordinator.java b/src/main/java/com/linglevel/api/word/service/WordSingleFlightRedisCoordinator.java index 217c823a..1b872881 100644 --- a/src/main/java/com/linglevel/api/word/service/WordSingleFlightRedisCoordinator.java +++ b/src/main/java/com/linglevel/api/word/service/WordSingleFlightRedisCoordinator.java @@ -10,11 +10,8 @@ import jakarta.annotation.PreDestroy; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.redisson.Redisson; -import org.redisson.RedissonRedLock; import org.redisson.api.RLock; import org.redisson.api.RedissonClient; -import org.redisson.config.Config; import org.springframework.data.redis.connection.Message; import org.springframework.data.redis.connection.MessageListener; import org.springframework.data.redis.core.StringRedisTemplate; @@ -26,7 +23,6 @@ import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.time.Duration; -import java.util.ArrayList; import java.util.HexFormat; import java.util.List; import java.util.Locale; @@ -54,26 +50,17 @@ public class WordSingleFlightRedisCoordinator { private final ObjectMapper objectMapper; private final ConcurrentHashMap>> channelWaiters = new ConcurrentHashMap<>(); - private final List redlockClients = new ArrayList<>(); private final MessageListener doneListener = this::onDoneMessage; @PostConstruct void initialize() { redisMessageListenerContainer.addMessageListener(doneListener, new PatternTopic(DONE_PATTERN)); - initializeRedlockClients(); } @PreDestroy void shutdown() { - for (RedissonClient client : redlockClients) { - try { - client.shutdown(); - } catch (Exception e) { - log.warn("Failed to shutdown single-flight Redlock client", e); - } - } - redlockClients.clear(); + } public List execute( @@ -91,7 +78,7 @@ public List execute( return unwrap(cached, keys.digest()); } - RLock lock = createLeaderLock(keys.lockKey()); + RLock lock = createLock(keys.lockKey()); boolean lockAcquired = tryAcquireLeaderLock(lock); if (lockAcquired) { return executeAsLeader(keys, lock, leaderAction); @@ -178,68 +165,10 @@ private void releaseLock(RLock lock, String lockKey) { } } - private RLock createLeaderLock(String lockKey) { - if (properties.isRedlockEnabled() && redlockClients.size() >= 3) { - RLock[] locks = redlockClients.stream() - .map(client -> client.getLock(lockKey)) - .toArray(RLock[]::new); - return new RedissonRedLock(locks); - } - - if (properties.isRedlockEnabled()) { - log.warn("Redlock is enabled but usable node count is {} (<3). Fallback to single RLock.", - redlockClients.size()); - } + private RLock createLock(String lockKey) { return redissonClient.getLock(lockKey); } - private void initializeRedlockClients() { - if (!properties.isRedlockEnabled()) { - return; - } - - List addresses = properties.getRedlockNodeAddresses().stream() - .map(String::trim) - .filter(value -> !value.isBlank()) - .toList(); - - if (addresses.isEmpty()) { - log.warn("Redlock is enabled but no node addresses configured. Fallback to single RLock."); - return; - } - - for (String rawAddress : addresses) { - String address = normalizeAddress(rawAddress); - try { - Config config = new Config(); - config.useSingleServer().setAddress(address); - redlockClients.add(Redisson.create(config)); - } catch (Exception e) { - log.warn("Skipping invalid/unavailable Redlock node address '{}'. Fallback candidates will continue.", rawAddress, e); - } - } - - if (redlockClients.isEmpty()) { - log.warn("Redlock is enabled but no valid/usable nodes initialized. Fallback to single RLock."); - return; - } - - if (redlockClients.size() < 3) { - log.warn("Redlock requires at least 3 independent nodes, but only {} configured. Fallback to single RLock.", - redlockClients.size()); - return; - } - - log.info("Single-flight Redlock mode initialized with {} nodes.", redlockClients.size()); - } - - private String normalizeAddress(String rawAddress) { - if (rawAddress.startsWith("redis://") || rawAddress.startsWith("rediss://")) { - return rawAddress; - } - return "redis://" + rawAddress; - } - private ResultEnvelope readResult(String resultKey) { String raw = stringRedisTemplate.opsForValue().get(resultKey); if (raw == null) { diff --git a/src/main/resources/application-local.properties b/src/main/resources/application-local.properties index aee36a1c..c104d73b 100644 --- a/src/main/resources/application-local.properties +++ b/src/main/resources/application-local.properties @@ -9,10 +9,6 @@ spring.data.mongodb.database=llv_api_local spring.data.redis.host=localhost spring.data.redis.port=6379 -# Redlock (local default) -word.single-flight.redlock-enabled=true -word.single-flight.redlock-node-addresses=redis://localhost:6379,redis://localhost:6380,redis://localhost:6381 - # Logging for Local Development logging.level.com.linglevel.api=DEBUG logging.level.org.springframework.data.mongodb=DEBUG diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 25666a8c..8c029c5d 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -53,14 +53,11 @@ spring.data.redis.ssl.enabled=false # Word single-flight (Redis lock + Pub/Sub + result key fallback) word.single-flight.enabled=true -word.single-flight.lock-ttl-ms=13000 word.single-flight.wait-timeout-ms=11000 word.single-flight.result-ttl-ms=25000 word.single-flight.prompt-version=v1 word.single-flight.model=${spring.ai.bedrock.converse.chat.options.model:default} word.single-flight.schema-version=v2 -word.single-flight.redlock-enabled=false -word.single-flight.redlock-node-addresses= # AWS S3 (AI Input/Output buckets) aws.s3.region=${S3_REGION} diff --git a/src/test/java/com/linglevel/api/word/service/WordSingleFlightRedisCoordinatorIntegrationTest.java b/src/test/java/com/linglevel/api/word/service/WordSingleFlightRedisCoordinatorIntegrationTest.java index d8bd6e4d..c408abd6 100644 --- a/src/test/java/com/linglevel/api/word/service/WordSingleFlightRedisCoordinatorIntegrationTest.java +++ b/src/test/java/com/linglevel/api/word/service/WordSingleFlightRedisCoordinatorIntegrationTest.java @@ -18,7 +18,6 @@ import org.springframework.data.redis.listener.RedisMessageListenerContainer; import org.springframework.test.util.ReflectionTestUtils; import org.testcontainers.containers.GenericContainer; -import org.testcontainers.utility.DockerImageName; import java.util.List; import java.util.concurrent.CountDownLatch; @@ -33,25 +32,6 @@ class WordSingleFlightRedisCoordinatorIntegrationTest extends AbstractRedisTest { - private static final GenericContainer redlockRedisA; - private static final GenericContainer redlockRedisB; - private static final GenericContainer redlockRedisC; - - static { - redlockRedisA = new GenericContainer<>(DockerImageName.parse("redis:7-alpine")) - .withExposedPorts(6379) - .withReuse(true); - redlockRedisB = new GenericContainer<>(DockerImageName.parse("redis:7-alpine")) - .withExposedPorts(6379) - .withReuse(true); - redlockRedisC = new GenericContainer<>(DockerImageName.parse("redis:7-alpine")) - .withExposedPorts(6379) - .withReuse(true); - redlockRedisA.start(); - redlockRedisB.start(); - redlockRedisC.start(); - } - private CoordinatorFixture nodeA; private CoordinatorFixture nodeB; @@ -137,57 +117,7 @@ void propagatesLeaderFailureAcrossTwoCoordinatorsUsingRealRedis() { assertThat(aiCalls.get()).isEqualTo(1); } - @Test - @DisplayName("Redlock(3노드) 구성에서도 두 인스턴스 동시 요청은 1회만 실행된다") - void deduplicatesAcrossTwoCoordinatorsUsingRealRedisWithRedlock() throws Exception { - CoordinatorFixture redlockNodeA = createNode("test-model-redlock", 3_000, true, redlockNodeAddresses()); - CoordinatorFixture redlockNodeB = createNode("test-model-redlock", 3_000, true, redlockNodeAddresses()); - flushAll(redlockNodeA.template); - - AtomicInteger aiCalls = new AtomicInteger(); - ExecutorService executor = Executors.newFixedThreadPool(2); - CountDownLatch start = new CountDownLatch(1); - - try { - Future> f1 = executor.submit(() -> { - start.await(1, TimeUnit.SECONDS); - return redlockNodeA.coordinator.execute("sprint", LanguageCode.KO, () -> { - aiCalls.incrementAndGet(); - sleep(250); - return List.of(sample("sprint")); - }); - }); - - Future> f2 = executor.submit(() -> { - start.await(1, TimeUnit.SECONDS); - return redlockNodeB.coordinator.execute("sprint", LanguageCode.KO, () -> { - aiCalls.incrementAndGet(); - return List.of(sample("sprint")); - }); - }); - - start.countDown(); - - List r1 = f1.get(5, TimeUnit.SECONDS); - List r2 = f2.get(5, TimeUnit.SECONDS); - - assertThat(r1).hasSize(1); - assertThat(r2).hasSize(1); - assertThat(r1.get(0).getOriginalForm()).isEqualTo("sprint"); - assertThat(r2.get(0).getOriginalForm()).isEqualTo("sprint"); - assertThat(aiCalls.get()).isEqualTo(1); - } finally { - executor.shutdownNow(); - redlockNodeA.close(); - redlockNodeB.close(); - } - } - private CoordinatorFixture createNode(String model, long waitTimeoutMs) { - return createNode(model, waitTimeoutMs, false, List.of()); - } - - private CoordinatorFixture createNode(String model, long waitTimeoutMs, boolean redlockEnabled, List redlockNodeAddresses) { GenericContainer redis = getRedisContainer(); RedisStandaloneConfiguration config = new RedisStandaloneConfiguration(redis.getHost(), redis.getMappedPort(6379)); @@ -210,14 +140,11 @@ private CoordinatorFixture createNode(String model, long waitTimeoutMs, boolean WordSingleFlightProperties properties = new WordSingleFlightProperties(); properties.setEnabled(true); - properties.setLockTtlMs(5_000); properties.setWaitTimeoutMs(waitTimeoutMs); properties.setResultTtlMs(30_000); properties.setPromptVersion("v1"); properties.setModel(model); properties.setSchemaVersion("v2"); - properties.setRedlockEnabled(redlockEnabled); - properties.setRedlockNodeAddresses(redlockNodeAddresses); WordSingleFlightRedisCoordinator coordinator = new WordSingleFlightRedisCoordinator( template, @@ -231,18 +158,6 @@ private CoordinatorFixture createNode(String model, long waitTimeoutMs, boolean return new CoordinatorFixture(connectionFactory, template, listenerContainer, redissonClient, coordinator); } - private List redlockNodeAddresses() { - return List.of( - toRedisAddress(redlockRedisA), - toRedisAddress(redlockRedisB), - toRedisAddress(redlockRedisC) - ); - } - - private String toRedisAddress(GenericContainer container) { - return "redis://" + container.getHost() + ":" + container.getMappedPort(6379); - } - private void flushAll(StringRedisTemplate template) { RedisConnection connection = template.getConnectionFactory().getConnection(); try { diff --git a/src/test/java/com/linglevel/api/word/service/WordSingleFlightRedisCoordinatorTest.java b/src/test/java/com/linglevel/api/word/service/WordSingleFlightRedisCoordinatorTest.java index 33943f08..c31ce9b0 100644 --- a/src/test/java/com/linglevel/api/word/service/WordSingleFlightRedisCoordinatorTest.java +++ b/src/test/java/com/linglevel/api/word/service/WordSingleFlightRedisCoordinatorTest.java @@ -5,7 +5,6 @@ import com.linglevel.api.word.dto.WordAnalysisResult; import com.linglevel.api.word.exception.WordsErrorCode; import com.linglevel.api.word.exception.WordsException; -import org.redisson.RedissonRedLock; import org.redisson.api.RLock; import org.redisson.api.RedissonClient; import org.junit.jupiter.api.BeforeEach; @@ -35,13 +34,10 @@ import java.util.concurrent.atomic.AtomicInteger; import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatCode; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.doAnswer; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -74,7 +70,6 @@ class WordSingleFlightRedisCoordinatorTest { void setUp() { properties = new WordSingleFlightProperties(); properties.setEnabled(true); - properties.setLockTtlMs(1_000); properties.setWaitTimeoutMs(120); properties.setResultTtlMs(2_000); properties.setPromptVersion("v1"); @@ -233,61 +228,6 @@ void execute_propagatesLeaderDomainErrorCode() { .isEqualTo(WordsErrorCode.WORD_IS_MEANINGLESS)); } - @Test - @DisplayName("Redlock 활성 + 3개 노드 구성 시 RedissonRedLock을 사용한다") - void createLeaderLock_usesRedlockWhenConfigured() { - properties.setRedlockEnabled(true); - - RedissonClient nodeA = mock(RedissonClient.class); - RedissonClient nodeB = mock(RedissonClient.class); - RedissonClient nodeC = mock(RedissonClient.class); - when(nodeA.getLock(anyString())).thenReturn(mock(RLock.class)); - when(nodeB.getLock(anyString())).thenReturn(mock(RLock.class)); - when(nodeC.getLock(anyString())).thenReturn(mock(RLock.class)); - - @SuppressWarnings("unchecked") - List clients = (List) ReflectionTestUtils.getField(coordinator, "redlockClients"); - clients.add(nodeA); - clients.add(nodeB); - clients.add(nodeC); - - RLock lock = ReflectionTestUtils.invokeMethod(coordinator, "createLeaderLock", "sf:word:lock:redlock"); - assertThat(lock).isInstanceOf(RedissonRedLock.class); - verify(redissonClient, never()).getLock("sf:word:lock:redlock"); - } - - @Test - @DisplayName("Redlock 활성 + 노드 2개 구성 시 single RLock으로 폴백한다") - void createLeaderLock_fallsBackToSingleRLockWhenInsufficientNodes() { - properties.setRedlockEnabled(true); - - RedissonClient nodeA = mock(RedissonClient.class); - RedissonClient nodeB = mock(RedissonClient.class); - - @SuppressWarnings("unchecked") - List clients = (List) ReflectionTestUtils.getField(coordinator, "redlockClients"); - clients.add(nodeA); - clients.add(nodeB); - - RLock lock = ReflectionTestUtils.invokeMethod(coordinator, "createLeaderLock", "sf:word:lock:fallback"); - assertThat(lock).isSameAs(redissonLock); - verify(redissonClient).getLock("sf:word:lock:fallback"); - } - - @Test - @DisplayName("Redlock 노드 주소가 잘못되어도 초기화 예외 없이 single RLock으로 폴백한다") - void initializeRedlockClients_doesNotFailStartupOnBadAddress() { - properties.setRedlockEnabled(true); - properties.setRedlockNodeAddresses(List.of("bad address with space")); - - assertThatCode(() -> ReflectionTestUtils.invokeMethod(coordinator, "initializeRedlockClients")) - .doesNotThrowAnyException(); - - RLock lock = ReflectionTestUtils.invokeMethod(coordinator, "createLeaderLock", "sf:word:lock:bad-address"); - assertThat(lock).isSameAs(redissonLock); - verify(redissonClient).getLock("sf:word:lock:bad-address"); - } - private void sleep(long millis) { try { Thread.sleep(millis);