From 24c07df7ce9324f6454592d7301aa00c21e0f161 Mon Sep 17 00:00:00 2001 From: AssemblyAI Date: Mon, 15 Jun 2026 09:57:50 -0600 Subject: [PATCH] Project import generated by Copybara. GitOrigin-RevId: 5a3e58c4adf7f3fc384336603c30353161504fe6 --- assemblyai/__version__.py | 2 +- assemblyai/streaming/v3/models.py | 1 + assemblyai/sync_api.py | 12 ++-- assemblyai/types.py | 47 +++++++------ tests/unit/test_streaming.py | 29 +++++++++ tests/unit/test_sync.py | 105 +++++++++++++++++++++++++++--- 6 files changed, 162 insertions(+), 34 deletions(-) diff --git a/assemblyai/__version__.py b/assemblyai/__version__.py index 5c34a51..0baa0e8 100644 --- a/assemblyai/__version__.py +++ b/assemblyai/__version__.py @@ -1 +1 @@ -__version__ = "0.64.16" +__version__ = "0.64.20" diff --git a/assemblyai/streaming/v3/models.py b/assemblyai/streaming/v3/models.py index c45a34d..089a98f 100644 --- a/assemblyai/streaming/v3/models.py +++ b/assemblyai/streaming/v3/models.py @@ -159,6 +159,7 @@ class SpeechModel(str, Enum): u3_rt_pro = "u3-rt-pro" u3_rt_pro_beta_1 = "u3-rt-pro-beta-1" whisper_rt = "whisper-rt" + universal_3_5_pro = "universal-3-5-pro" u3_pro = "u3-pro" # Deprecated: Use u3_rt_pro instead def __str__(self): diff --git a/assemblyai/sync_api.py b/assemblyai/sync_api.py index 632eeb7..41d279e 100644 --- a/assemblyai/sync_api.py +++ b/assemblyai/sync_api.py @@ -13,9 +13,10 @@ def _error_from_response(response: httpx.Response) -> types.SyncTranscriptError: """ Builds a `SyncTranscriptError` from a non-200 response. - The service uses two error envelopes: `{"error_code", "message"}` for - audio/capacity/inference errors and `{"detail"}` for auth and rate-limit - errors. Parse by status code, not by assuming `error_code` is present. + The service returns an RFC 9457 problem-details envelope + (`{"status", "title", "detail"}`); `error_code` is the snake_cased + `title` (e.g. `"Audio Too Large"` -> `audio_too_large`). Older envelopes + (`{"error_code", "message"}` and `{"detail"}`) are still accepted. """ error_code: Optional[str] = None message: Optional[str] = None @@ -24,7 +25,10 @@ def _error_from_response(response: httpx.Response) -> types.SyncTranscriptError: body = response.json() if isinstance(body, dict): error_code = body.get("error_code") - message = body.get("message") or body.get("detail") + title = body.get("title") + if error_code is None and isinstance(title, str) and title: + error_code = title.lower().replace(" ", "_") + message = body.get("detail") or body.get("message") except Exception: message = response.text or None diff --git a/assemblyai/types.py b/assemblyai/types.py index bafa3c7..7597481 100644 --- a/assemblyai/types.py +++ b/assemblyai/types.py @@ -88,9 +88,10 @@ class SyncTranscriptError(AssemblyAIError): """ Error raised when a synchronous transcription request fails. - Carries the server's machine-readable `error_code` (e.g. `bad_audio`, - `audio_too_large`, `capacity_exceeded`, `inference_timeout`) when present, - and `retry_after` (seconds) for 429/503 responses that include a + Carries a machine-readable `error_code` — the snake_cased problem-details + `title` from the server (e.g. `bad_audio`, `audio_too_large`, + `capacity_exceeded`, `inference_timeout`) — when present, and + `retry_after` (seconds) for 429/503 responses that include a `Retry-After` header. """ @@ -2981,8 +2982,9 @@ class LemurPurgeResponse(BaseModel): "The result of the LeMUR purge request" -# Caps mirror the sync service's `config` part so an oversized request fails -# locally with a clear message instead of a 400 round trip. +# Caps mirror the sync service's `config` part. `prompt` and `word_boost` +# over their caps are rejected; `conversation_context` over its caps is +# trimmed (oldest turns first), matching the server. _SYNC_MAX_PROMPT_LEN = 4096 _SYNC_MAX_WORD_BOOST_LEN = 2048 _SYNC_MAX_CONVERSATION_CONTEXT_TURNS = 100 @@ -2990,7 +2992,11 @@ class LemurPurgeResponse(BaseModel): def _normalize_conversation_context(v): - """Coerce a single string to a one-turn list, strip + drop empties, cap. + """Coerce a single string to a one-turn list, strip + drop empties, trim. + + Context over the turn-count or character caps is trimmed by dropping the + oldest turns (front of the list) first — the same trim the server applies + — so the most recent turn is kept whenever it fits on its own. Shared by the pydantic v1 and v2 validators on ``SyncTranscriptionConfig``. """ @@ -2999,17 +3005,13 @@ def _normalize_conversation_context(v): if isinstance(v, str): v = [v] turns = [t.strip() for t in v if t and t.strip()] - if len(turns) > _SYNC_MAX_CONVERSATION_CONTEXT_TURNS: - raise ValueError( - f"conversation_context exceeds {_SYNC_MAX_CONVERSATION_CONTEXT_TURNS} " - f"turns (got {len(turns)})" - ) total = sum(len(t) for t in turns) - if total > _SYNC_MAX_CONVERSATION_CONTEXT_LEN: - raise ValueError( - f"conversation_context exceeds {_SYNC_MAX_CONVERSATION_CONTEXT_LEN} " - f"characters (got {total})" - ) + while turns and ( + len(turns) > _SYNC_MAX_CONVERSATION_CONTEXT_TURNS + or total > _SYNC_MAX_CONVERSATION_CONTEXT_LEN + ): + total -= len(turns[0]) + turns = turns[1:] return turns or None @@ -3045,9 +3047,11 @@ class SyncTranscriptionConfig(BaseModel): audio so it transcribes the clip with better continuity and proper-noun consistency. Include turns from either side of the conversation (e.g. a voice agent's replies) as separate entries; entries carry no speaker labels. - A single string is accepted and treated as one turn. Max 100 turns / 4096 - characters total; when the prompt exceeds the model token budget the oldest - turns are dropped first, so put the most recent turn last.""" + A single string is accepted and treated as one turn. Capped at 100 turns / + 4096 characters total — over-cap context is trimmed (oldest turns dropped + first), not rejected, and the oldest turns are likewise dropped first when + the prompt exceeds the model token budget, so put the most recent turn + last.""" language_code: Optional[Union[str, List[str]]] = None """ISO 639-1 language code, or a list of codes for multilingual audio (e.g. @@ -3118,3 +3122,8 @@ class SyncTranscriptResponse(BaseModel): session_id: str "Server-generated UUID for this request. Record it to correlate with support." + + request_time_ms: Optional[float] = None + """End-to-end server-side request time in milliseconds: queue wait, auth, + multipart parse, decode, inference, and serialization. ``None`` when the + server predates the field.""" diff --git a/tests/unit/test_streaming.py b/tests/unit/test_streaming.py index c9cbbe2..fc84951 100644 --- a/tests/unit/test_streaming.py +++ b/tests/unit/test_streaming.py @@ -877,6 +877,35 @@ def mocked_websocket_connect( assert "speech_model=whisper-rt" in actual_url +def test_client_connect_with_universal_3_5_pro(mocker: MockFixture): + actual_url = None + + def mocked_websocket_connect( + url: str, additional_headers: dict, open_timeout: float + ): + nonlocal actual_url + actual_url = url + + mocker.patch( + "assemblyai.streaming.v3.client.websocket_connect", + new=mocked_websocket_connect, + ) + + _disable_rw_threads(mocker) + + options = StreamingClientOptions(api_key="test", api_host="api.example.com") + client = StreamingClient(options) + + params = StreamingParameters( + sample_rate=16000, + speech_model=SpeechModel.universal_3_5_pro, + ) + + client.connect(params) + + assert "speech_model=universal-3-5-pro" in actual_url + + def test_turn_event_with_speaker_label(): data = { "type": "Turn", diff --git a/tests/unit/test_sync.py b/tests/unit/test_sync.py index 33e3c5e..fb0efcc 100644 --- a/tests/unit/test_sync.py +++ b/tests/unit/test_sync.py @@ -17,6 +17,7 @@ "confidence": 0.92, "audio_duration_ms": 400, "session_id": "eb92c4ff-4bbb-429f-9b99-7279d7fe738f", + "request_time_ms": 243.7, } @@ -43,6 +44,24 @@ def test_transcribe_bytes_parses_response(httpx_mock: HTTPXMock): assert result.words[0].start == 0 assert result.words[0].end == 200 assert result.words[1].text == "world" + assert result.request_time_ms == 243.7 + + +def test_transcribe_parses_response_without_request_time(httpx_mock: HTTPXMock): + # Given a server response that predates the request_time_ms field + response = {k: v for k, v in _OK_RESPONSE.items() if k != "request_time_ms"} + httpx_mock.add_response( + url=TRANSCRIBE_URL, + method="POST", + status_code=httpx.codes.OK, + json=response, + ) + + # When transcribing + result = aai.SyncTranscriber().transcribe(b"RIFFfake-wav-bytes") + + # Then request_time_ms is None instead of a parse failure + assert result.request_time_ms is None def test_transcribe_sends_model_header_and_wav_part(httpx_mock: HTTPXMock): @@ -124,11 +143,29 @@ def test_transcribe_coerces_conversation_context_string(httpx_mock: HTTPXMock): assert b'"Sure, what date were you thinking?"' in body -def test_conversation_context_rejects_too_many_chars(): - # Given conversation_context whose total length exceeds the cap, - # When/Then constructing the config raises a validation error - with pytest.raises(Exception): - aai.SyncTranscriptionConfig(conversation_context=["a" * 5000]) +def test_conversation_context_trims_oldest_turns_over_char_cap(): + # Given conversation_context whose total length exceeds the 4096-char cap + config = aai.SyncTranscriptionConfig(conversation_context=["a" * 3000, "b" * 3000]) + + # Then the oldest turn is dropped and the most recent turn is kept + assert config.conversation_context == ["b" * 3000] + + +def test_conversation_context_trims_oldest_turns_over_turn_cap(): + # Given conversation_context with more than 100 turns + turns = [f"turn {i}" for i in range(120)] + config = aai.SyncTranscriptionConfig(conversation_context=turns) + + # Then it is trimmed to the 100 most recent turns, oldest dropped first + assert config.conversation_context == turns[20:] + + +def test_conversation_context_empties_when_single_turn_over_char_cap(): + # Given a single turn that alone exceeds the character cap + config = aai.SyncTranscriptionConfig(conversation_context=["a" * 5000]) + + # Then the context trims to nothing rather than raising + assert config.conversation_context is None def test_transcribe_sends_single_language_code(httpx_mock: HTTPXMock): @@ -226,8 +263,32 @@ def test_word_boost_too_long_raises(): aai.SyncTranscriptionConfig(word_boost=["x" * 3000]) -def test_error_envelope_maps_to_sync_transcript_error(httpx_mock: HTTPXMock): - # Given the server rejects oversized audio +def test_problem_details_envelope_maps_to_sync_transcript_error( + httpx_mock: HTTPXMock, +): + # Given the server rejects oversized audio with a problem-details body + httpx_mock.add_response( + url=TRANSCRIBE_URL, + method="POST", + status_code=413, + json={"status": 413, "title": "Audio Too Large", "detail": "too long"}, + ) + + # When transcribing, Then a SyncTranscriptError carries the snake_cased + # title as error_code, plus the status and detail + with pytest.raises(aai.SyncTranscriptError) as exc_info: + aai.SyncTranscriber().transcribe(b"RIFFfake-wav-bytes") + + error = exc_info.value + assert error.status_code == 413 + assert error.error_code == "audio_too_large" + assert "too long" in str(error) + + +def test_legacy_error_envelope_maps_to_sync_transcript_error( + httpx_mock: HTTPXMock, +): + # Given a server still on the pre-problem-details envelope httpx_mock.add_response( url=TRANSCRIBE_URL, method="POST", @@ -251,20 +312,44 @@ def test_rate_limit_surfaces_retry_after(httpx_mock: HTTPXMock): url=TRANSCRIBE_URL, method="POST", status_code=429, - json={"detail": "Too many requests"}, + json={ + "status": 429, + "title": "Too Many Requests", + "detail": "Too many requests", + }, headers={"Retry-After": "5"}, ) - # When transcribing, Then retry_after is parsed and error_code is absent + # When transcribing, Then retry_after and the snake_cased title are parsed with pytest.raises(aai.SyncTranscriptError) as exc_info: aai.SyncTranscriber().transcribe(b"RIFFfake-wav-bytes") error = exc_info.value assert error.status_code == 429 - assert error.error_code is None + assert error.error_code == "too_many_requests" assert error.retry_after == 5 +def test_legacy_detail_only_envelope(httpx_mock: HTTPXMock): + # Given an auth-style body with only a detail field + httpx_mock.add_response( + url=TRANSCRIBE_URL, + method="POST", + status_code=401, + json={"detail": "Invalid API key"}, + ) + + # When transcribing, Then the detail becomes the message and error_code + # stays absent + with pytest.raises(aai.SyncTranscriptError) as exc_info: + aai.SyncTranscriber().transcribe(b"RIFFfake-wav-bytes") + + error = exc_info.value + assert error.status_code == 401 + assert error.error_code is None + assert "Invalid API key" in str(error) + + def test_default_model_is_u3_sync_pro(): # Given a default config # When inspecting the model