From d829414b3ee2d97b3503d71135a288f916530587 Mon Sep 17 00:00:00 2001
From: Sahil Yadav <yadavsahil197@gmail.com>
Date: Wed, 18 Mar 2026 17:19:16 -0700
Subject: [PATCH 1/2] Fix sse handling for speech models

---
 src/together/resources/audio/speech.py |  1 -
 src/together/types/__init__.py         |  2 --
 src/together/types/audio_speech.py     | 41 ++++----------------------
 3 files changed, 6 insertions(+), 38 deletions(-)
diff --git a/src/together/resources/audio/speech.py b/src/together/resources/audio/speech.py
index 1c3231b7..5c048f0b 100644
--- a/src/together/resources/audio/speech.py
+++ b/src/together/resources/audio/speech.py
@@ -10,7 +10,6 @@
     AudioLanguage,
     AudioResponseEncoding,
     AudioSpeechStreamChunk,
-    AudioSpeechStreamEvent,
     AudioSpeechStreamResponse,
     TogetherClient,
     TogetherRequest,
diff --git a/src/together/types/__init__.py b/src/together/types/__init__.py
index 351f2a11..c499c68e 100644
--- a/src/together/types/__init__.py
+++ b/src/together/types/__init__.py
@@ -5,7 +5,6 @@
     AudioResponseFormat,
     AudioSpeechRequest,
     AudioSpeechStreamChunk,
-    AudioSpeechStreamEvent,
     AudioSpeechStreamResponse,
     AudioTimestampGranularities,
     AudioTranscriptionRequest,
@@ -134,7 +133,6 @@
     "AudioLanguage",
     "AudioResponseEncoding",
     "AudioSpeechStreamChunk",
-    "AudioSpeechStreamEvent",
     "AudioSpeechStreamResponse",
     "AudioTranscriptionRequest",
     "AudioTranslationRequest",
diff --git a/src/together/types/audio_speech.py b/src/together/types/audio_speech.py
index 82636e5c..1886dee5 100644
--- a/src/together/types/audio_speech.py
+++ b/src/together/types/audio_speech.py
@@ -41,14 +41,6 @@ class AudioResponseEncoding(str, Enum):
     PCM_ALAW = "pcm_alaw"
 
 
-class AudioObjectType(str, Enum):
-    AUDIO_TTS_CHUNK = "audio.tts.chunk"
-
-
-class StreamSentinelType(str, Enum):
-    DONE = "[DONE]"
-
-
 class AudioSpeechRequest(BaseModel):
     model: str
     input: str
@@ -61,21 +53,8 @@ class AudioSpeechRequest(BaseModel):
 
 
 class AudioSpeechStreamChunk(BaseModel):
-    object: AudioObjectType = AudioObjectType.AUDIO_TTS_CHUNK
-    model: str
-    b64: str
-
-
-class AudioSpeechStreamEvent(BaseModel):
-    data: AudioSpeechStreamChunk
-
-
-class StreamSentinel(BaseModel):
-    data: StreamSentinelType = StreamSentinelType.DONE
-
-
-class AudioSpeechStreamEventResponse(BaseModel):
-    response: AudioSpeechStreamEvent | StreamSentinel
+    type: str = "conversation.item.audio_output.delta"
+    delta: str
 
 
 class AudioSpeechStreamResponse(BaseModel):
@@ -127,18 +106,10 @@ def stream_to_file(
                 if isinstance(chunk.data, bytes):
                     audio_chunks.append(chunk.data)
                 elif isinstance(chunk.data, dict):
-                    # SSE format with JSON/base64
-                    try:
-                        stream_event = AudioSpeechStreamEventResponse(
-                            response={"data": chunk.data}
-                        )
-                        if isinstance(stream_event.response, StreamSentinel):
-                            break
-                        audio_chunks.append(
-                            base64.b64decode(stream_event.response.data.b64)
-                        )
-                    except Exception:
-                        continue  # Skip malformed chunks
+                    # SSE format: {"type": "conversation.item.audio_output.delta", "delta": "<base64>"}
+                    delta = chunk.data.get("delta")
+                    if delta:
+                        audio_chunks.append(base64.b64decode(delta))
 
             if not audio_chunks:
                 raise ValueError("No audio data received in streaming response")

From 5a8c26316b0d022fe8118c9e3c319c1e6283fd31 Mon Sep 17 00:00:00 2001
From: Sahil Yadav <yadavsahil197@gmail.com>
Date: Wed, 18 Mar 2026 17:53:15 -0700
Subject: [PATCH 2/2] fix integration tests

---
 tests/integration/constants.py                        | 2 +-
 tests/integration/resources/test_completion_stream.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/constants.py b/tests/integration/constants.py
index 1c56030a..1dc5f8f8 100644
--- a/tests/integration/constants.py
+++ b/tests/integration/constants.py
@@ -1,5 +1,5 @@
 completion_test_model_list = [
-    "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+    "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 ]
 chat_test_model_list = []
 embedding_test_model_list = []
diff --git a/tests/integration/resources/test_completion_stream.py b/tests/integration/resources/test_completion_stream.py
index 588cbe6d..7fe1c559 100644
--- a/tests/integration/resources/test_completion_stream.py
+++ b/tests/integration/resources/test_completion_stream.py
@@ -35,7 +35,7 @@ def test_create(
         random_repetition_penalty,  # noqa
     ) -> None:
         prompt = "The space robots have"
-        model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
+        model = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
         stop = ["</s>"]
 
         # max_tokens should be a reasonable number for this test