AssemblyAI · bgotthold-aai · Jun 15, 2026 · Jun 15, 2026
diff --git a/assemblyai/__version__.py b/assemblyai/__version__.py
@@ -1 +1 @@
-__version__ = "0.64.16"
+__version__ = "0.64.20"
diff --git a/assemblyai/streaming/v3/models.py b/assemblyai/streaming/v3/models.py
@@ -159,6 +159,7 @@ class SpeechModel(str, Enum):
     u3_rt_pro = "u3-rt-pro"
     u3_rt_pro_beta_1 = "u3-rt-pro-beta-1"
     whisper_rt = "whisper-rt"
+    universal_3_5_pro = "universal-3-5-pro"
     u3_pro = "u3-pro"  # Deprecated: Use u3_rt_pro instead
 
     def __str__(self):

diff --git a/assemblyai/sync_api.py b/assemblyai/sync_api.py
@@ -13,9 +13,10 @@ def _error_from_response(response: httpx.Response) -> types.SyncTranscriptError:
     """
     Builds a `SyncTranscriptError` from a non-200 response.
 
-    The service uses two error envelopes: `{"error_code", "message"}` for
-    audio/capacity/inference errors and `{"detail"}` for auth and rate-limit
-    errors. Parse by status code, not by assuming `error_code` is present.
+    The service returns an RFC 9457 problem-details envelope
+    (`{"status", "title", "detail"}`); `error_code` is the snake_cased
+    `title` (e.g. `"Audio Too Large"` -> `audio_too_large`). Older envelopes
+    (`{"error_code", "message"}` and `{"detail"}`) are still accepted.
     """
     error_code: Optional[str] = None
     message: Optional[str] = None
@@ -24,7 +25,10 @@ def _error_from_response(response: httpx.Response) -> types.SyncTranscriptError:
         body = response.json()
         if isinstance(body, dict):
             error_code = body.get("error_code")
-            message = body.get("message") or body.get("detail")
+            title = body.get("title")
+            if error_code is None and isinstance(title, str) and title:
+                error_code = title.lower().replace(" ", "_")
+            message = body.get("detail") or body.get("message")
     except Exception:
         message = response.text or None
 

diff --git a/assemblyai/types.py b/assemblyai/types.py
@@ -88,9 +88,10 @@ class SyncTranscriptError(AssemblyAIError):
     """
     Error raised when a synchronous transcription request fails.
 
-    Carries the server's machine-readable `error_code` (e.g. `bad_audio`,
-    `audio_too_large`, `capacity_exceeded`, `inference_timeout`) when present,
-    and `retry_after` (seconds) for 429/503 responses that include a
+    Carries a machine-readable `error_code` — the snake_cased problem-details
+    `title` from the server (e.g. `bad_audio`, `audio_too_large`,
+    `capacity_exceeded`, `inference_timeout`) — when present, and
+    `retry_after` (seconds) for 429/503 responses that include a
     `Retry-After` header.
     """
 
@@ -2981,16 +2982,21 @@ class LemurPurgeResponse(BaseModel):
     "The result of the LeMUR purge request"
 
 
-# Caps mirror the sync service's `config` part so an oversized request fails
-# locally with a clear message instead of a 400 round trip.
+# Caps mirror the sync service's `config` part. `prompt` and `word_boost`
+# over their caps are rejected; `conversation_context` over its caps is
+# trimmed (oldest turns first), matching the server.
 _SYNC_MAX_PROMPT_LEN = 4096
 _SYNC_MAX_WORD_BOOST_LEN = 2048
 _SYNC_MAX_CONVERSATION_CONTEXT_TURNS = 100
 _SYNC_MAX_CONVERSATION_CONTEXT_LEN = 4096
 
 
 def _normalize_conversation_context(v):
-    """Coerce a single string to a one-turn list, strip + drop empties, cap.
+    """Coerce a single string to a one-turn list, strip + drop empties, trim.
+
+    Context over the turn-count or character caps is trimmed by dropping the
+    oldest turns (front of the list) first — the same trim the server applies
+    — so the most recent turn is kept whenever it fits on its own.
 
     Shared by the pydantic v1 and v2 validators on ``SyncTranscriptionConfig``.
     """
@@ -2999,17 +3005,13 @@ def _normalize_conversation_context(v):
     if isinstance(v, str):
         v = [v]
     turns = [t.strip() for t in v if t and t.strip()]
-    if len(turns) > _SYNC_MAX_CONVERSATION_CONTEXT_TURNS:
-        raise ValueError(
-            f"conversation_context exceeds {_SYNC_MAX_CONVERSATION_CONTEXT_TURNS} "
-            f"turns (got {len(turns)})"
-        )
     total = sum(len(t) for t in turns)
-    if total > _SYNC_MAX_CONVERSATION_CONTEXT_LEN:
-        raise ValueError(
-            f"conversation_context exceeds {_SYNC_MAX_CONVERSATION_CONTEXT_LEN} "
-            f"characters (got {total})"
-        )
+    while turns and (
+        len(turns) > _SYNC_MAX_CONVERSATION_CONTEXT_TURNS
+        or total > _SYNC_MAX_CONVERSATION_CONTEXT_LEN
+    ):
+        total -= len(turns[0])
+        turns = turns[1:]
     return turns or None
 
 
@@ -3045,9 +3047,11 @@ class SyncTranscriptionConfig(BaseModel):
     audio so it transcribes the clip with better continuity and proper-noun
     consistency. Include turns from either side of the conversation (e.g. a
     voice agent's replies) as separate entries; entries carry no speaker labels.
-    A single string is accepted and treated as one turn. Max 100 turns / 4096
-    characters total; when the prompt exceeds the model token budget the oldest
-    turns are dropped first, so put the most recent turn last."""
+    A single string is accepted and treated as one turn. Capped at 100 turns /
+    4096 characters total — over-cap context is trimmed (oldest turns dropped
+    first), not rejected, and the oldest turns are likewise dropped first when
+    the prompt exceeds the model token budget, so put the most recent turn
+    last."""
 
     language_code: Optional[Union[str, List[str]]] = None
     """ISO 639-1 language code, or a list of codes for multilingual audio (e.g.
@@ -3118,3 +3122,8 @@ class SyncTranscriptResponse(BaseModel):
 
     session_id: str
     "Server-generated UUID for this request. Record it to correlate with support."
+
+    request_time_ms: Optional[float] = None
+    """End-to-end server-side request time in milliseconds: queue wait, auth,
+    multipart parse, decode, inference, and serialization. ``None`` when the
+    server predates the field."""
diff --git a/tests/unit/test_streaming.py b/tests/unit/test_streaming.py
@@ -877,6 +877,35 @@ def mocked_websocket_connect(
     assert "speech_model=whisper-rt" in actual_url
 
 
+def test_client_connect_with_universal_3_5_pro(mocker: MockFixture):
+    actual_url = None
+
+    def mocked_websocket_connect(
+        url: str, additional_headers: dict, open_timeout: float
+    ):
+        nonlocal actual_url
+        actual_url = url
+
+    mocker.patch(
+        "assemblyai.streaming.v3.client.websocket_connect",
+        new=mocked_websocket_connect,
+    )
+
+    _disable_rw_threads(mocker)
+
+    options = StreamingClientOptions(api_key="test", api_host="api.example.com")
+    client = StreamingClient(options)
+
+    params = StreamingParameters(
+        sample_rate=16000,
+        speech_model=SpeechModel.universal_3_5_pro,
+    )
+
+    client.connect(params)
+
+    assert "speech_model=universal-3-5-pro" in actual_url
+
+
 def test_turn_event_with_speaker_label():
     data = {
         "type": "Turn",

diff --git a/tests/unit/test_sync.py b/tests/unit/test_sync.py
@@ -17,6 +17,7 @@
     "confidence": 0.92,
     "audio_duration_ms": 400,
     "session_id": "eb92c4ff-4bbb-429f-9b99-7279d7fe738f",
+    "request_time_ms": 243.7,
 }
 
 
@@ -43,6 +44,24 @@ def test_transcribe_bytes_parses_response(httpx_mock: HTTPXMock):
     assert result.words[0].start == 0
     assert result.words[0].end == 200
     assert result.words[1].text == "world"
+    assert result.request_time_ms == 243.7
+
+
+def test_transcribe_parses_response_without_request_time(httpx_mock: HTTPXMock):
+    # Given a server response that predates the request_time_ms field
+    response = {k: v for k, v in _OK_RESPONSE.items() if k != "request_time_ms"}
+    httpx_mock.add_response(
+        url=TRANSCRIBE_URL,
+        method="POST",
+        status_code=httpx.codes.OK,
+        json=response,
+    )
+
+    # When transcribing
+    result = aai.SyncTranscriber().transcribe(b"RIFFfake-wav-bytes")
+
+    # Then request_time_ms is None instead of a parse failure
+    assert result.request_time_ms is None
 
 
 def test_transcribe_sends_model_header_and_wav_part(httpx_mock: HTTPXMock):
@@ -124,11 +143,29 @@ def test_transcribe_coerces_conversation_context_string(httpx_mock: HTTPXMock):
     assert b'"Sure, what date were you thinking?"' in body
 
 
-def test_conversation_context_rejects_too_many_chars():
-    # Given conversation_context whose total length exceeds the cap,
-    # When/Then constructing the config raises a validation error
-    with pytest.raises(Exception):
-        aai.SyncTranscriptionConfig(conversation_context=["a" * 5000])
+def test_conversation_context_trims_oldest_turns_over_char_cap():
+    # Given conversation_context whose total length exceeds the 4096-char cap
+    config = aai.SyncTranscriptionConfig(conversation_context=["a" * 3000, "b" * 3000])
+
+    # Then the oldest turn is dropped and the most recent turn is kept
+    assert config.conversation_context == ["b" * 3000]
+
+
+def test_conversation_context_trims_oldest_turns_over_turn_cap():
+    # Given conversation_context with more than 100 turns
+    turns = [f"turn {i}" for i in range(120)]
+    config = aai.SyncTranscriptionConfig(conversation_context=turns)
+
+    # Then it is trimmed to the 100 most recent turns, oldest dropped first
+    assert config.conversation_context == turns[20:]
+
+
+def test_conversation_context_empties_when_single_turn_over_char_cap():
+    # Given a single turn that alone exceeds the character cap
+    config = aai.SyncTranscriptionConfig(conversation_context=["a" * 5000])
+
+    # Then the context trims to nothing rather than raising
+    assert config.conversation_context is None
 
 
 def test_transcribe_sends_single_language_code(httpx_mock: HTTPXMock):
@@ -226,8 +263,32 @@ def test_word_boost_too_long_raises():
         aai.SyncTranscriptionConfig(word_boost=["x" * 3000])
 
 
-def test_error_envelope_maps_to_sync_transcript_error(httpx_mock: HTTPXMock):
-    # Given the server rejects oversized audio
+def test_problem_details_envelope_maps_to_sync_transcript_error(
+    httpx_mock: HTTPXMock,
+):
+    # Given the server rejects oversized audio with a problem-details body
+    httpx_mock.add_response(
+        url=TRANSCRIBE_URL,
+        method="POST",
+        status_code=413,
+        json={"status": 413, "title": "Audio Too Large", "detail": "too long"},
+    )
+
+    # When transcribing, Then a SyncTranscriptError carries the snake_cased
+    # title as error_code, plus the status and detail
+    with pytest.raises(aai.SyncTranscriptError) as exc_info:
+        aai.SyncTranscriber().transcribe(b"RIFFfake-wav-bytes")
+
+    error = exc_info.value
+    assert error.status_code == 413
+    assert error.error_code == "audio_too_large"
+    assert "too long" in str(error)
+
+
+def test_legacy_error_envelope_maps_to_sync_transcript_error(
+    httpx_mock: HTTPXMock,
+):
+    # Given a server still on the pre-problem-details envelope
     httpx_mock.add_response(
         url=TRANSCRIBE_URL,
         method="POST",
@@ -251,20 +312,44 @@ def test_rate_limit_surfaces_retry_after(httpx_mock: HTTPXMock):
         url=TRANSCRIBE_URL,
         method="POST",
         status_code=429,
-        json={"detail": "Too many requests"},
+        json={
+            "status": 429,
+            "title": "Too Many Requests",
+            "detail": "Too many requests",
+        },
         headers={"Retry-After": "5"},
     )
 
-    # When transcribing, Then retry_after is parsed and error_code is absent
+    # When transcribing, Then retry_after and the snake_cased title are parsed
     with pytest.raises(aai.SyncTranscriptError) as exc_info:
         aai.SyncTranscriber().transcribe(b"RIFFfake-wav-bytes")
 
     error = exc_info.value
     assert error.status_code == 429
-    assert error.error_code is None
+    assert error.error_code == "too_many_requests"
     assert error.retry_after == 5
 
 
+def test_legacy_detail_only_envelope(httpx_mock: HTTPXMock):
+    # Given an auth-style body with only a detail field
+    httpx_mock.add_response(
+        url=TRANSCRIBE_URL,
+        method="POST",
+        status_code=401,
+        json={"detail": "Invalid API key"},
+    )
+
+    # When transcribing, Then the detail becomes the message and error_code
+    # stays absent
+    with pytest.raises(aai.SyncTranscriptError) as exc_info:
+        aai.SyncTranscriber().transcribe(b"RIFFfake-wav-bytes")
+
+    error = exc_info.value
+    assert error.status_code == 401
+    assert error.error_code is None
+    assert "Invalid API key" in str(error)
+
+
 def test_default_model_is_u3_sync_pro():
     # Given a default config
     # When inspecting the model