From d829414b3ee2d97b3503d71135a288f916530587 Mon Sep 17 00:00:00 2001 From: Sahil Yadav Date: Wed, 18 Mar 2026 17:19:16 -0700 Subject: [PATCH 1/2] Fix sse handling for speech models --- src/together/resources/audio/speech.py | 1 - src/together/types/__init__.py | 2 -- src/together/types/audio_speech.py | 41 ++++---------------------- 3 files changed, 6 insertions(+), 38 deletions(-) diff --git a/src/together/resources/audio/speech.py b/src/together/resources/audio/speech.py index 1c3231b7..5c048f0b 100644 --- a/src/together/resources/audio/speech.py +++ b/src/together/resources/audio/speech.py @@ -10,7 +10,6 @@ AudioLanguage, AudioResponseEncoding, AudioSpeechStreamChunk, - AudioSpeechStreamEvent, AudioSpeechStreamResponse, TogetherClient, TogetherRequest, diff --git a/src/together/types/__init__.py b/src/together/types/__init__.py index 351f2a11..c499c68e 100644 --- a/src/together/types/__init__.py +++ b/src/together/types/__init__.py @@ -5,7 +5,6 @@ AudioResponseFormat, AudioSpeechRequest, AudioSpeechStreamChunk, - AudioSpeechStreamEvent, AudioSpeechStreamResponse, AudioTimestampGranularities, AudioTranscriptionRequest, @@ -134,7 +133,6 @@ "AudioLanguage", "AudioResponseEncoding", "AudioSpeechStreamChunk", - "AudioSpeechStreamEvent", "AudioSpeechStreamResponse", "AudioTranscriptionRequest", "AudioTranslationRequest", diff --git a/src/together/types/audio_speech.py b/src/together/types/audio_speech.py index 82636e5c..1886dee5 100644 --- a/src/together/types/audio_speech.py +++ b/src/together/types/audio_speech.py @@ -41,14 +41,6 @@ class AudioResponseEncoding(str, Enum): PCM_ALAW = "pcm_alaw" -class AudioObjectType(str, Enum): - AUDIO_TTS_CHUNK = "audio.tts.chunk" - - -class StreamSentinelType(str, Enum): - DONE = "[DONE]" - - class AudioSpeechRequest(BaseModel): model: str input: str @@ -61,21 +53,8 @@ class AudioSpeechRequest(BaseModel): class AudioSpeechStreamChunk(BaseModel): - object: AudioObjectType = AudioObjectType.AUDIO_TTS_CHUNK - model: str - b64: str - - -class AudioSpeechStreamEvent(BaseModel): - data: AudioSpeechStreamChunk - - -class StreamSentinel(BaseModel): - data: StreamSentinelType = StreamSentinelType.DONE - - -class AudioSpeechStreamEventResponse(BaseModel): - response: AudioSpeechStreamEvent | StreamSentinel + type: str = "conversation.item.audio_output.delta" + delta: str class AudioSpeechStreamResponse(BaseModel): @@ -127,18 +106,10 @@ def stream_to_file( if isinstance(chunk.data, bytes): audio_chunks.append(chunk.data) elif isinstance(chunk.data, dict): - # SSE format with JSON/base64 - try: - stream_event = AudioSpeechStreamEventResponse( - response={"data": chunk.data} - ) - if isinstance(stream_event.response, StreamSentinel): - break - audio_chunks.append( - base64.b64decode(stream_event.response.data.b64) - ) - except Exception: - continue # Skip malformed chunks + # SSE format: {"type": "conversation.item.audio_output.delta", "delta": ""} + delta = chunk.data.get("delta") + if delta: + audio_chunks.append(base64.b64decode(delta)) if not audio_chunks: raise ValueError("No audio data received in streaming response") From 5a8c26316b0d022fe8118c9e3c319c1e6283fd31 Mon Sep 17 00:00:00 2001 From: Sahil Yadav Date: Wed, 18 Mar 2026 17:53:15 -0700 Subject: [PATCH 2/2] fix integration tests --- tests/integration/constants.py | 2 +- tests/integration/resources/test_completion_stream.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/constants.py b/tests/integration/constants.py index 1c56030a..1dc5f8f8 100644 --- a/tests/integration/constants.py +++ b/tests/integration/constants.py @@ -1,5 +1,5 @@ completion_test_model_list = [ - "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + "meta-llama/Llama-3.3-70B-Instruct-Turbo", ] chat_test_model_list = [] embedding_test_model_list = [] diff --git a/tests/integration/resources/test_completion_stream.py b/tests/integration/resources/test_completion_stream.py index 588cbe6d..7fe1c559 100644 --- a/tests/integration/resources/test_completion_stream.py +++ b/tests/integration/resources/test_completion_stream.py @@ -35,7 +35,7 @@ def test_create( random_repetition_penalty, # noqa ) -> None: prompt = "The space robots have" - model = "meta-llama/Llama-4-Scout-17B-16E-Instruct" + model = "meta-llama/Llama-3.3-70B-Instruct-Turbo" stop = [""] # max_tokens should be a reasonable number for this test