diff --git a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveAudioFormatTests.java b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveAudioFormatTests.java index f9a841b7db92..388a676647c2 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveAudioFormatTests.java +++ b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveAudioFormatTests.java @@ -44,18 +44,18 @@ public class VoiceLiveAudioFormatTests extends VoiceLiveTestBase { static Stream modelAndSamplingRateProvider() { - return withApiVersions(Stream.of(Arguments.of("gpt-4o-realtime", 16000), Arguments.of("gpt-4o-realtime", 44100), - Arguments.of("gpt-4o-realtime", 8000), Arguments.of("gpt-4o", 16000), Arguments.of("gpt-4o", 44100), + return withApiVersions(Stream.of(Arguments.of("gpt-realtime", 16000), Arguments.of("gpt-realtime", 44100), + Arguments.of("gpt-realtime", 8000), Arguments.of("gpt-4o", 16000), Arguments.of("gpt-4o", 44100), Arguments.of("gpt-4.1", 8000)), API_VERSION_GA, API_VERSION_PREVIEW); } static Stream modelAndInputAudioFormatProvider() { return withApiVersions(Stream.of(Arguments.of("gpt-4o", "g711_ulaw", "azure_semantic_vad"), Arguments.of("gpt-4o", "g711_alaw", "azure_semantic_vad"), - Arguments.of("gpt-4o-realtime-preview", "g711_ulaw", "azure_semantic_vad"), - Arguments.of("gpt-4o-realtime-preview", "g711_ulaw", "server_vad"), - Arguments.of("gpt-4o-realtime-preview", "g711_alaw", "azure_semantic_vad"), - Arguments.of("gpt-4o-realtime-preview", "g711_alaw", "server_vad"))); + Arguments.of("gpt-realtime", "g711_ulaw", "azure_semantic_vad"), + Arguments.of("gpt-realtime", "g711_ulaw", "server_vad"), + Arguments.of("gpt-realtime", "g711_alaw", "azure_semantic_vad"), + Arguments.of("gpt-realtime", "g711_alaw", "server_vad"))); } static Stream modelAndOutputAudioFormatAzureVoiceProvider() { @@ -67,8 +67,8 @@ static Stream modelAndOutputAudioFormatAzureVoiceProvider() { } static Stream modelAndOutputAudioFormatOpenAIVoiceProvider() { - return withApiVersions(Stream.of(Arguments.of("gpt-4o-realtime", "pcm16"), - Arguments.of("gpt-4o-realtime", "g711_ulaw"), Arguments.of("gpt-4o-realtime", "g711_alaw"))); + return withApiVersions(Stream.of(Arguments.of("gpt-realtime", "pcm16"), + Arguments.of("gpt-realtime", "g711_ulaw"), Arguments.of("gpt-realtime", "g711_alaw"))); } @ParameterizedTest diff --git a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveAudioTests.java b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveAudioTests.java index 34b33236fc24..2b697d5a0f73 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveAudioTests.java +++ b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveAudioTests.java @@ -36,7 +36,7 @@ public class VoiceLiveAudioTests extends VoiceLiveTestBase { static Stream audioParams() { - return crossProduct(new String[] { "gpt-4o-realtime-preview", "gpt-4.1" }, + return crossProduct(new String[] { "gpt-realtime", "gpt-4.1" }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW }); } @@ -108,7 +108,7 @@ public void testRealtimeServiceWithAudio(String model, String apiVersion) throws } static Stream audioEnhancementsParams() { - return crossProduct(new String[] { "gpt-4o-realtime-preview", "gpt-4.1" }, + return crossProduct(new String[] { "gpt-realtime", "gpt-4.1" }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW }); } @@ -183,77 +183,7 @@ public void testRealtimeServiceWithAudioEnhancements(String model, String apiVer } static Stream echoCancellationParams() { - return crossProduct(new String[] { "gpt-4o-realtime-preview", "gpt-4.1" }, + return crossProduct(new String[] { "gpt-realtime", "gpt-4.1" }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW }); } - - @ParameterizedTest - @MethodSource("echoCancellationParams") - @LiveOnly - public void testRealtimeServiceWithEchoCancellation(String model, String apiVersion) - throws InterruptedException, IOException { - VoiceLiveAsyncClient client = createClient(apiVersion); - - byte[] audioData = loadAudioFile("4-1.wav"); - - AtomicInteger speechStartedEvents = new AtomicInteger(0); - AtomicInteger audioResponseBytes = new AtomicInteger(0); - CountDownLatch responseLatch = new CountDownLatch(1); - - VoiceLiveSessionAsyncClient session = null; - Disposable subscription = null; - try { - VoiceLiveSessionOptions sessionOptions - = new VoiceLiveSessionOptions().setInputAudioTranscription(getSpeechRecognitionSetting(model)) - .setInputAudioEchoCancellation(new AudioEchoCancellation()); - - session = client.startSession(model).block(SESSION_TIMEOUT); - - Assertions.assertNotNull(session, "Session should be created successfully"); - - subscription = session.receiveEvents().subscribe(event -> { - ServerEventType eventType = event.getType(); - - if (eventType == ServerEventType.INPUT_AUDIO_BUFFER_SPEECH_STARTED) { - speechStartedEvents.incrementAndGet(); - } else if (eventType == ServerEventType.RESPONSE_AUDIO_DELTA) { - if (event instanceof SessionUpdateResponseAudioDelta) { - SessionUpdateResponseAudioDelta audioDelta = (SessionUpdateResponseAudioDelta) event; - if (audioDelta.getDelta() != null) { - audioResponseBytes.addAndGet(audioDelta.getDelta().length); - } - } - responseLatch.countDown(); - } else if (eventType == ServerEventType.ERROR) { - handleError(event); - responseLatch.countDown(); - } - }, error -> { - System.err.println("Error receiving events: " + error.getMessage()); - responseLatch.countDown(); - }); - - waitForSetup(); - - ClientEventSessionUpdate updateEvent = new ClientEventSessionUpdate(sessionOptions); - session.sendEvent(updateEvent).block(SEND_TIMEOUT); - - waitForSetup(); - - session.sendInputAudio(audioData).block(SEND_TIMEOUT); - session.sendInputAudio(getTrailingSilenceBytes()).block(SEND_TIMEOUT); - - boolean received = responseLatch.await(EVENT_TIMEOUT_SECONDS, TimeUnit.SECONDS); - - Assertions.assertTrue(received, "Should receive response within timeout"); - Assertions.assertTrue(speechStartedEvents.get() > 1, - "Expected more than 1 speech segment, got " + speechStartedEvents.get()); - Assertions.assertTrue(audioResponseBytes.get() > 0, "Audio bytes should be greater than 0"); - } finally { - if (subscription != null) { - subscription.dispose(); - } - closeSession(session); - } - } } diff --git a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveConversationTests.java b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveConversationTests.java index 9168a6e5816f..a756f9b604d9 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveConversationTests.java +++ b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveConversationTests.java @@ -37,7 +37,7 @@ public class VoiceLiveConversationTests extends VoiceLiveTestBase { static Stream retrieveItemParams() { - return crossProduct(new String[] { "gpt-4o-realtime" }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW }); + return crossProduct(new String[] { "gpt-realtime" }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW }); } @ParameterizedTest @@ -132,7 +132,7 @@ public void testRealtimeServiceRetrieveItem(String model, String apiVersion) } static Stream truncateItemParams() { - return crossProduct(new String[] { "gpt-4o-realtime" }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW }); + return crossProduct(new String[] { "gpt-realtime" }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW }); } @ParameterizedTest diff --git a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveTestBase.java b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveTestBase.java index 707e9ea77ee1..89d6d9096fc7 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveTestBase.java +++ b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveTestBase.java @@ -40,9 +40,7 @@ public abstract class VoiceLiveTestBase extends TestProxyTestBase { // Model constants protected static final String MODEL_GPT_4O = "gpt-4o"; - protected static final String MODEL_GPT_4O_REALTIME = "gpt-4o-realtime"; - protected static final String MODEL_GPT_4O_REALTIME_PREVIEW = "gpt-4o-realtime-preview"; - protected static final String MODEL_GPT_4O_REALTIME_PREVIEW_2025_06_03 = "gpt-4o-realtime-preview-2025-06-03"; + protected static final String MODEL_GPT_REALTIME = "gpt-realtime"; protected static final String MODEL_GPT_41 = "gpt-4.1"; protected static final String MODEL_GPT_5 = "gpt-5"; protected static final String MODEL_GPT_5_CHAT = "gpt-5-chat"; @@ -51,7 +49,7 @@ public abstract class VoiceLiveTestBase extends TestProxyTestBase { // Default models for non-parameterized tests protected static final String TEST_MODEL = MODEL_GPT_4O; - protected static final String TEST_MODEL_REALTIME = MODEL_GPT_4O_REALTIME_PREVIEW; + protected static final String TEST_MODEL_REALTIME = MODEL_GPT_REALTIME; // Timeout constants protected static final Duration SESSION_TIMEOUT = Duration.ofSeconds(30); @@ -175,10 +173,9 @@ protected void handleError(SessionUpdate event) { } protected AudioInputTranscriptionOptions getSpeechRecognitionSetting(String model) { - AudioInputTranscriptionOptionsModel transcriptionModel - = model.startsWith("gpt-4o-realtime") || model.startsWith("gpt-4o-mini-realtime") - ? AudioInputTranscriptionOptionsModel.WHISPER_1 - : AudioInputTranscriptionOptionsModel.AZURE_SPEECH; + AudioInputTranscriptionOptionsModel transcriptionModel = model.startsWith("gpt-realtime") + ? AudioInputTranscriptionOptionsModel.WHISPER_1 + : AudioInputTranscriptionOptionsModel.AZURE_SPEECH; return new AudioInputTranscriptionOptions(transcriptionModel).setLanguage("en-US"); } diff --git a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveToolCallTests.java b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveToolCallTests.java index 9864cbcc6093..755ab6db1b57 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveToolCallTests.java +++ b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveToolCallTests.java @@ -49,12 +49,12 @@ public class VoiceLiveToolCallTests extends VoiceLiveTestBase { private static final String API_VERSION_2025_05_01_PREVIEW = "2025-05-01-preview"; // ===== test_realtime_service_tool_call ===== - // Python: models=[gpt-4o-realtime, gpt-4o], api_versions=[2025-10-01, 2026-01-01-preview] + // Python: models=[gpt-realtime, gpt-4o], api_versions=[2025-10-01, 2026-01-01-preview] // Uses _get_speech_recognition_setting(model), audio=4-1.wav, tool=assess_pronunciation // Voice: AzureStandardVoice("en-US-AriaNeural") static Stream toolCallParams() { - return crossProduct(new String[] { MODEL_GPT_4O_REALTIME, MODEL_GPT_4O }, + return crossProduct(new String[] { MODEL_GPT_REALTIME, MODEL_GPT_4O }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW }); } @@ -92,7 +92,7 @@ private void doTestRealtimeServiceToolCall(String model, String apiVersion) List functionCallResults = new ArrayList<>(); CountDownLatch firstDeltaLatch = new CountDownLatch(1); // Track response completions so we can re-issue response.create() if VAD - // triggered a non-tool-call response first (gpt-4o-realtime race condition). + // triggered a non-tool-call response first (gpt-realtime race condition). CountDownLatch responseDoneLatch = new CountDownLatch(1); VoiceLiveSessionAsyncClient session = null; @@ -136,7 +136,7 @@ private void doTestRealtimeServiceToolCall(String model, String apiVersion) session.sendEvent(new ClientEventSessionUpdate(sessionOptions)).block(SEND_TIMEOUT); // Send audio and response.create() in tight succession to beat server VAD. - // With gpt-4o-realtime, the default server VAD detects speech, auto-commits the + // With gpt-realtime, the default server VAD detects speech, auto-commits the // buffer and triggers its own response before a delayed response.create() arrives. session.sendInputAudio(audioData) .then(session.sendEvent(new ClientEventResponseCreate())) @@ -406,7 +406,7 @@ public void testRealtimeServiceToolCallParameter(String model, String apiVersion // Uses azure-speech + ServerVad, audio=ask_weather.wav static Stream liveSessionUpdateParams() { - return crossProduct(new String[] { MODEL_GPT_4O_REALTIME }, + return crossProduct(new String[] { MODEL_GPT_REALTIME }, new String[] { API_VERSION_2025_05_01_PREVIEW, API_VERSION_PREVIEW }); } @@ -569,7 +569,6 @@ public void testRealtimeServiceLiveSessionUpdate(String model, String apiVersion // Python: @pytest.mark.skip() - skipped in Python tests static Stream toolCallNoAudioOverlapParams() { - return crossProduct(new String[] { MODEL_GPT_4O_REALTIME }, - new String[] { API_VERSION_GA, API_VERSION_PREVIEW }); + return crossProduct(new String[] { MODEL_GPT_REALTIME }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW }); } } diff --git a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveTranscriptionTests.java b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveTranscriptionTests.java index bb24cf3d2126..6b81c311e1b3 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveTranscriptionTests.java +++ b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveTranscriptionTests.java @@ -34,7 +34,7 @@ public class VoiceLiveTranscriptionTests extends VoiceLiveTestBase { static Stream whisperTranscriptionParams() { - return crossProduct(new String[] { "gpt-4o-realtime-preview", "gpt-4.1" }, + return crossProduct(new String[] { "gpt-realtime", "gpt-4.1" }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW }); } @@ -117,7 +117,7 @@ static Stream gpt4oTranscribeParams() { @LiveOnly public void testInputAudioTranscriptionWithGpt4oTranscribe(String transcriptionModel, String apiVersion) throws InterruptedException, IOException { - String model = "gpt-4o-realtime-preview"; + String model = "gpt-realtime"; VoiceLiveAsyncClient client = createClient(apiVersion); byte[] audioData = loadAudioFile("largest_lake.wav"); diff --git a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveTurnDetectionTests.java b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveTurnDetectionTests.java index d6b42f8cfff1..5cb735b4fdb5 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveTurnDetectionTests.java +++ b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveTurnDetectionTests.java @@ -38,13 +38,13 @@ public class VoiceLiveTurnDetectionTests extends VoiceLiveTestBase { // ===== test_realtime_service_with_turn_detection_long_tts_vad_duration ===== - // Python: models=[gpt-4o-realtime-preview, gpt-4o], api_versions=[2025-10-01, 2026-01-01-preview] + // Python: models=[gpt-realtime, gpt-4o], api_versions=[2025-10-01, 2026-01-01-preview] // turn_detection: {"type": "azure_semantic_vad", "speech_duration_assistant_speaking_ms": 800} // Note: speechDurationAssistantSpeakingMs not available in Java SDK; // using speechDurationMs(800) as the closest available parameter. static Stream longTtsVadDurationParams() { - return crossProduct(new String[] { MODEL_GPT_4O_REALTIME_PREVIEW, MODEL_GPT_4O }, + return crossProduct(new String[] { MODEL_GPT_REALTIME, MODEL_GPT_4O }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW }); } @@ -135,8 +135,7 @@ private void doTestLongTtsVadDuration(String model, String apiVersion) throws In static Stream multilingualParams() { return withApiVersions(Stream.of( - Arguments.of("gpt-4o-realtime-preview, default", MODEL_GPT_4O_REALTIME_PREVIEW, - new AzureSemanticVadTurnDetectionMultilingual()), + Arguments.of("gpt-realtime, default", MODEL_GPT_REALTIME, new AzureSemanticVadTurnDetectionMultilingual()), Arguments.of("gpt-4o, default", MODEL_GPT_4O, new AzureSemanticVadTurnDetectionMultilingual()), Arguments.of("gpt-4o, speechDuration200", MODEL_GPT_4O, new AzureSemanticVadTurnDetectionMultilingual().setSpeechDurationMs(200)), diff --git a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveVoicePropertiesTests.java b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveVoicePropertiesTests.java index 9df2b5d48e41..90044e045f4c 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveVoicePropertiesTests.java +++ b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/livetests/VoiceLiveVoicePropertiesTests.java @@ -33,7 +33,7 @@ public class VoiceLiveVoicePropertiesTests extends VoiceLiveTestBase { static Stream voicePropertiesParams() { - return crossProduct(new String[] { "gpt-4o-realtime", "gpt-4.1" }, + return crossProduct(new String[] { "gpt-realtime", "gpt-4.1" }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW }); } @@ -110,7 +110,7 @@ public void testRealtimeServiceWithVoiceProperties(String model, String apiVersi } static Stream audioTimestampAndVisemeParams() { - return crossProduct(new String[] { "gpt-4o-realtime-preview", "gpt-4.1" }, + return crossProduct(new String[] { "gpt-realtime", "gpt-4.1" }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW }); }