Azure · xitzhang · May 7, 2026 · May 7, 2026 · May 7, 2026 · May 7, 2026
@@ -44,18 +44,18 @@
 public class VoiceLiveAudioFormatTests extends VoiceLiveTestBase {
 
     static Stream<Arguments> modelAndSamplingRateProvider() {
-        return withApiVersions(Stream.of(Arguments.of("gpt-4o-realtime", 16000), Arguments.of("gpt-4o-realtime", 44100),
-            Arguments.of("gpt-4o-realtime", 8000), Arguments.of("gpt-4o", 16000), Arguments.of("gpt-4o", 44100),
+        return withApiVersions(Stream.of(Arguments.of("gpt-realtime", 16000), Arguments.of("gpt-realtime", 44100),
+            Arguments.of("gpt-realtime", 8000), Arguments.of("gpt-4o", 16000), Arguments.of("gpt-4o", 44100),
             Arguments.of("gpt-4.1", 8000)), API_VERSION_GA, API_VERSION_PREVIEW);
     }
 
     static Stream<Arguments> modelAndInputAudioFormatProvider() {
         return withApiVersions(Stream.of(Arguments.of("gpt-4o", "g711_ulaw", "azure_semantic_vad"),
             Arguments.of("gpt-4o", "g711_alaw", "azure_semantic_vad"),
-            Arguments.of("gpt-4o-realtime-preview", "g711_ulaw", "azure_semantic_vad"),
-            Arguments.of("gpt-4o-realtime-preview", "g711_ulaw", "server_vad"),
-            Arguments.of("gpt-4o-realtime-preview", "g711_alaw", "azure_semantic_vad"),
-            Arguments.of("gpt-4o-realtime-preview", "g711_alaw", "server_vad")));
+            Arguments.of("gpt-realtime", "g711_ulaw", "azure_semantic_vad"),
+            Arguments.of("gpt-realtime", "g711_ulaw", "server_vad"),
+            Arguments.of("gpt-realtime", "g711_alaw", "azure_semantic_vad"),
+            Arguments.of("gpt-realtime", "g711_alaw", "server_vad")));
     }
 
     static Stream<Arguments> modelAndOutputAudioFormatAzureVoiceProvider() {
@@ -67,8 +67,8 @@ static Stream<Arguments> modelAndOutputAudioFormatAzureVoiceProvider() {
     }
 
     static Stream<Arguments> modelAndOutputAudioFormatOpenAIVoiceProvider() {
-        return withApiVersions(Stream.of(Arguments.of("gpt-4o-realtime", "pcm16"),
-            Arguments.of("gpt-4o-realtime", "g711_ulaw"), Arguments.of("gpt-4o-realtime", "g711_alaw")));
+        return withApiVersions(Stream.of(Arguments.of("gpt-realtime", "pcm16"),
+            Arguments.of("gpt-realtime", "g711_ulaw"), Arguments.of("gpt-realtime", "g711_alaw")));
     }
 
     @ParameterizedTest

@@ -36,7 +36,7 @@
 public class VoiceLiveAudioTests extends VoiceLiveTestBase {
 
     static Stream<Arguments> audioParams() {
-        return crossProduct(new String[] { "gpt-4o-realtime-preview", "gpt-4.1" },
+        return crossProduct(new String[] { "gpt-realtime", "gpt-4.1" },
             new String[] { API_VERSION_GA, API_VERSION_PREVIEW });
     }
 
@@ -108,7 +108,7 @@ public void testRealtimeServiceWithAudio(String model, String apiVersion) throws
     }
 
     static Stream<Arguments> audioEnhancementsParams() {
-        return crossProduct(new String[] { "gpt-4o-realtime-preview", "gpt-4.1" },
+        return crossProduct(new String[] { "gpt-realtime", "gpt-4.1" },
             new String[] { API_VERSION_GA, API_VERSION_PREVIEW });
     }
 
@@ -183,77 +183,7 @@ public void testRealtimeServiceWithAudioEnhancements(String model, String apiVer
     }
 
     static Stream<Arguments> echoCancellationParams() {
-        return crossProduct(new String[] { "gpt-4o-realtime-preview", "gpt-4.1" },
+        return crossProduct(new String[] { "gpt-realtime", "gpt-4.1" },
             new String[] { API_VERSION_GA, API_VERSION_PREVIEW });
     }
-
-    @ParameterizedTest
-    @MethodSource("echoCancellationParams")
-    @LiveOnly
-    public void testRealtimeServiceWithEchoCancellation(String model, String apiVersion)
-        throws InterruptedException, IOException {
-        VoiceLiveAsyncClient client = createClient(apiVersion);
-
-        byte[] audioData = loadAudioFile("4-1.wav");
-
-        AtomicInteger speechStartedEvents = new AtomicInteger(0);
-        AtomicInteger audioResponseBytes = new AtomicInteger(0);
-        CountDownLatch responseLatch = new CountDownLatch(1);
-
-        VoiceLiveSessionAsyncClient session = null;
-        Disposable subscription = null;
-        try {
-            VoiceLiveSessionOptions sessionOptions
-                = new VoiceLiveSessionOptions().setInputAudioTranscription(getSpeechRecognitionSetting(model))
-                    .setInputAudioEchoCancellation(new AudioEchoCancellation());
-
-            session = client.startSession(model).block(SESSION_TIMEOUT);
-
-            Assertions.assertNotNull(session, "Session should be created successfully");
-
-            subscription = session.receiveEvents().subscribe(event -> {
-                ServerEventType eventType = event.getType();
-
-                if (eventType == ServerEventType.INPUT_AUDIO_BUFFER_SPEECH_STARTED) {
-                    speechStartedEvents.incrementAndGet();
-                } else if (eventType == ServerEventType.RESPONSE_AUDIO_DELTA) {
-                    if (event instanceof SessionUpdateResponseAudioDelta) {
-                        SessionUpdateResponseAudioDelta audioDelta = (SessionUpdateResponseAudioDelta) event;
-                        if (audioDelta.getDelta() != null) {
-                            audioResponseBytes.addAndGet(audioDelta.getDelta().length);
-                        }
-                    }
-                    responseLatch.countDown();
-                } else if (eventType == ServerEventType.ERROR) {
-                    handleError(event);
-                    responseLatch.countDown();
-                }
-            }, error -> {
-                System.err.println("Error receiving events: " + error.getMessage());
-                responseLatch.countDown();
-            });
-
-            waitForSetup();
-
-            ClientEventSessionUpdate updateEvent = new ClientEventSessionUpdate(sessionOptions);
-            session.sendEvent(updateEvent).block(SEND_TIMEOUT);
-
-            waitForSetup();
-
-            session.sendInputAudio(audioData).block(SEND_TIMEOUT);
-            session.sendInputAudio(getTrailingSilenceBytes()).block(SEND_TIMEOUT);
-
-            boolean received = responseLatch.await(EVENT_TIMEOUT_SECONDS, TimeUnit.SECONDS);
-
-            Assertions.assertTrue(received, "Should receive response within timeout");
-            Assertions.assertTrue(speechStartedEvents.get() > 1,
-                "Expected more than 1 speech segment, got " + speechStartedEvents.get());
-            Assertions.assertTrue(audioResponseBytes.get() > 0, "Audio bytes should be greater than 0");
-        } finally {
-            if (subscription != null) {
-                subscription.dispose();
-            }
-            closeSession(session);
-        }
-    }
 }
@@ -37,7 +37,7 @@
 public class VoiceLiveConversationTests extends VoiceLiveTestBase {
 
     static Stream<Arguments> retrieveItemParams() {
-        return crossProduct(new String[] { "gpt-4o-realtime" }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW });
+        return crossProduct(new String[] { "gpt-realtime" }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW });
     }
 
     @ParameterizedTest
@@ -132,7 +132,7 @@ public void testRealtimeServiceRetrieveItem(String model, String apiVersion)
     }
 
     static Stream<Arguments> truncateItemParams() {
-        return crossProduct(new String[] { "gpt-4o-realtime" }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW });
+        return crossProduct(new String[] { "gpt-realtime" }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW });
     }
 
     @ParameterizedTest

@@ -40,9 +40,7 @@ public abstract class VoiceLiveTestBase extends TestProxyTestBase {
 
     // Model constants
     protected static final String MODEL_GPT_4O = "gpt-4o";
-    protected static final String MODEL_GPT_4O_REALTIME = "gpt-4o-realtime";
-    protected static final String MODEL_GPT_4O_REALTIME_PREVIEW = "gpt-4o-realtime-preview";
-    protected static final String MODEL_GPT_4O_REALTIME_PREVIEW_2025_06_03 = "gpt-4o-realtime-preview-2025-06-03";
+    protected static final String MODEL_GPT_REALTIME = "gpt-realtime";
     protected static final String MODEL_GPT_41 = "gpt-4.1";
     protected static final String MODEL_GPT_5 = "gpt-5";
     protected static final String MODEL_GPT_5_CHAT = "gpt-5-chat";
@@ -51,7 +49,7 @@ public abstract class VoiceLiveTestBase extends TestProxyTestBase {
 
     // Default models for non-parameterized tests
     protected static final String TEST_MODEL = MODEL_GPT_4O;
-    protected static final String TEST_MODEL_REALTIME = MODEL_GPT_4O_REALTIME_PREVIEW;
+    protected static final String TEST_MODEL_REALTIME = MODEL_GPT_REALTIME;
 
     // Timeout constants
     protected static final Duration SESSION_TIMEOUT = Duration.ofSeconds(30);
@@ -175,10 +173,9 @@ protected void handleError(SessionUpdate event) {
     }
 
     protected AudioInputTranscriptionOptions getSpeechRecognitionSetting(String model) {
-        AudioInputTranscriptionOptionsModel transcriptionModel
-            = model.startsWith("gpt-4o-realtime") || model.startsWith("gpt-4o-mini-realtime")
-                ? AudioInputTranscriptionOptionsModel.WHISPER_1
-                : AudioInputTranscriptionOptionsModel.AZURE_SPEECH;
+        AudioInputTranscriptionOptionsModel transcriptionModel = model.startsWith("gpt-realtime")
+            ? AudioInputTranscriptionOptionsModel.WHISPER_1
+            : AudioInputTranscriptionOptionsModel.AZURE_SPEECH;
         return new AudioInputTranscriptionOptions(transcriptionModel).setLanguage("en-US");
     }
 

@@ -49,12 +49,12 @@ public class VoiceLiveToolCallTests extends VoiceLiveTestBase {
     private static final String API_VERSION_2025_05_01_PREVIEW = "2025-05-01-preview";
 
     // ===== test_realtime_service_tool_call =====
-    // Python: models=[gpt-4o-realtime, gpt-4o], api_versions=[2025-10-01, 2026-01-01-preview]
+    // Python: models=[gpt-realtime, gpt-4o], api_versions=[2025-10-01, 2026-01-01-preview]
     // Uses _get_speech_recognition_setting(model), audio=4-1.wav, tool=assess_pronunciation
     // Voice: AzureStandardVoice("en-US-AriaNeural")
 
     static Stream<Arguments> toolCallParams() {
-        return crossProduct(new String[] { MODEL_GPT_4O_REALTIME, MODEL_GPT_4O },
+        return crossProduct(new String[] { MODEL_GPT_REALTIME, MODEL_GPT_4O },
             new String[] { API_VERSION_GA, API_VERSION_PREVIEW });
     }
 
@@ -92,7 +92,7 @@ private void doTestRealtimeServiceToolCall(String model, String apiVersion)
         List<SessionUpdateResponseFunctionCallArgumentsDelta> functionCallResults = new ArrayList<>();
         CountDownLatch firstDeltaLatch = new CountDownLatch(1);
         // Track response completions so we can re-issue response.create() if VAD
-        // triggered a non-tool-call response first (gpt-4o-realtime race condition).
+        // triggered a non-tool-call response first (gpt-realtime race condition).
         CountDownLatch responseDoneLatch = new CountDownLatch(1);
 
         VoiceLiveSessionAsyncClient session = null;
@@ -136,7 +136,7 @@ private void doTestRealtimeServiceToolCall(String model, String apiVersion)
             session.sendEvent(new ClientEventSessionUpdate(sessionOptions)).block(SEND_TIMEOUT);
 
             // Send audio and response.create() in tight succession to beat server VAD.
-            // With gpt-4o-realtime, the default server VAD detects speech, auto-commits the
+            // With gpt-realtime, the default server VAD detects speech, auto-commits the
             // buffer and triggers its own response before a delayed response.create() arrives.
             session.sendInputAudio(audioData)
                 .then(session.sendEvent(new ClientEventResponseCreate()))
@@ -406,7 +406,7 @@ public void testRealtimeServiceToolCallParameter(String model, String apiVersion
     // Uses azure-speech + ServerVad, audio=ask_weather.wav
 
     static Stream<Arguments> liveSessionUpdateParams() {
-        return crossProduct(new String[] { MODEL_GPT_4O_REALTIME },
+        return crossProduct(new String[] { MODEL_GPT_REALTIME },
             new String[] { API_VERSION_2025_05_01_PREVIEW, API_VERSION_PREVIEW });
     }
 
@@ -569,7 +569,6 @@ public void testRealtimeServiceLiveSessionUpdate(String model, String apiVersion
     // Python: @pytest.mark.skip() - skipped in Python tests
 
     static Stream<Arguments> toolCallNoAudioOverlapParams() {
-        return crossProduct(new String[] { MODEL_GPT_4O_REALTIME },
-            new String[] { API_VERSION_GA, API_VERSION_PREVIEW });
+        return crossProduct(new String[] { MODEL_GPT_REALTIME }, new String[] { API_VERSION_GA, API_VERSION_PREVIEW });
     }
 }
@@ -34,7 +34,7 @@
 public class VoiceLiveTranscriptionTests extends VoiceLiveTestBase {
 
     static Stream<Arguments> whisperTranscriptionParams() {
-        return crossProduct(new String[] { "gpt-4o-realtime-preview", "gpt-4.1" },
+        return crossProduct(new String[] { "gpt-realtime", "gpt-4.1" },
             new String[] { API_VERSION_GA, API_VERSION_PREVIEW });
     }
 
@@ -117,7 +117,7 @@ static Stream<Arguments> gpt4oTranscribeParams() {
     @LiveOnly
     public void testInputAudioTranscriptionWithGpt4oTranscribe(String transcriptionModel, String apiVersion)
         throws InterruptedException, IOException {
-        String model = "gpt-4o-realtime-preview";
+        String model = "gpt-realtime";
         VoiceLiveAsyncClient client = createClient(apiVersion);
 
         byte[] audioData = loadAudioFile("largest_lake.wav");

@@ -38,13 +38,13 @@
 public class VoiceLiveTurnDetectionTests extends VoiceLiveTestBase {
 
     // ===== test_realtime_service_with_turn_detection_long_tts_vad_duration =====
-    // Python: models=[gpt-4o-realtime-preview, gpt-4o], api_versions=[2025-10-01, 2026-01-01-preview]
+    // Python: models=[gpt-realtime, gpt-4o], api_versions=[2025-10-01, 2026-01-01-preview]
     // turn_detection: {"type": "azure_semantic_vad", "speech_duration_assistant_speaking_ms": 800}
     // Note: speechDurationAssistantSpeakingMs not available in Java SDK;
     // using speechDurationMs(800) as the closest available parameter.
 
     static Stream<Arguments> longTtsVadDurationParams() {
-        return crossProduct(new String[] { MODEL_GPT_4O_REALTIME_PREVIEW, MODEL_GPT_4O },
+        return crossProduct(new String[] { MODEL_GPT_REALTIME, MODEL_GPT_4O },
             new String[] { API_VERSION_GA, API_VERSION_PREVIEW });
     }
 
@@ -135,8 +135,7 @@ private void doTestLongTtsVadDuration(String model, String apiVersion) throws In
 
     static Stream<Arguments> multilingualParams() {
         return withApiVersions(Stream.of(
-            Arguments.of("gpt-4o-realtime-preview, default", MODEL_GPT_4O_REALTIME_PREVIEW,
-                new AzureSemanticVadTurnDetectionMultilingual()),
+            Arguments.of("gpt-realtime, default", MODEL_GPT_REALTIME, new AzureSemanticVadTurnDetectionMultilingual()),
             Arguments.of("gpt-4o, default", MODEL_GPT_4O, new AzureSemanticVadTurnDetectionMultilingual()),
             Arguments.of("gpt-4o, speechDuration200", MODEL_GPT_4O,
                 new AzureSemanticVadTurnDetectionMultilingual().setSpeechDurationMs(200)),

@@ -33,7 +33,7 @@
 public class VoiceLiveVoicePropertiesTests extends VoiceLiveTestBase {
 
     static Stream<Arguments> voicePropertiesParams() {
-        return crossProduct(new String[] { "gpt-4o-realtime", "gpt-4.1" },
+        return crossProduct(new String[] { "gpt-realtime", "gpt-4.1" },
             new String[] { API_VERSION_GA, API_VERSION_PREVIEW });
     }
 
@@ -110,7 +110,7 @@ public void testRealtimeServiceWithVoiceProperties(String model, String apiVersi
     }
 
     static Stream<Arguments> audioTimestampAndVisemeParams() {
-        return crossProduct(new String[] { "gpt-4o-realtime-preview", "gpt-4.1" },
+        return crossProduct(new String[] { "gpt-realtime", "gpt-4.1" },
             new String[] { API_VERSION_GA, API_VERSION_PREVIEW });
     }