From dba441a1f4a317e508f35484b73cd1f7dff42f77 Mon Sep 17 00:00:00 2001
From: tsushanth <78000697+tsushanth@users.noreply.github.com>
Date: Fri, 12 Jun 2026 07:38:58 -0700
Subject: [PATCH] fix(types): make AutohighlightResult.rank Optional to
 tolerate missing field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #148.

The Auto Highlights API has been observed in production to return
individual `auto_highlights_result.results` entries with no `rank`
field set. The SDK currently declares `rank: float` as required, so
the Pydantic validator raises a `ValidationError` while parsing the
overall `TranscriptResponse` — turning a missing-field-on-one-entry
into a hard failure that loses the entire transcript (text,
utterances, words, sentiment, every other audio-intelligence result).

Make `rank` Optional with a default of `None` so a missing rank parses
through cleanly. The absence is observable to callers via `rank is
None` — no silent default-value substitution.

The change is narrowly scoped to the single field surfaced by the
reporter. The other fields on `AutohighlightResult` (count, text,
timestamps) remain required.

Regression test deletes `rank` from the first highlight in the mock
response, asserts the TranscriptResponse parses, the affected
entry's rank is None, and other entries still have their rank set.
Verified that the new test fails on master (the existing required-rank
contract) and passes on this commit.
---
 assemblyai/types.py                |  7 +++++-
 tests/unit/test_auto_highlights.py | 36 ++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/assemblyai/types.py b/assemblyai/types.py
index bafa3c7..967ce10 100644
--- a/assemblyai/types.py
+++ b/assemblyai/types.py
@@ -2140,7 +2140,12 @@ class Timestamp(BaseModel):
 
 class AutohighlightResult(BaseModel):
     count: int
-    rank: float
+    # The Auto Highlights API has been observed in production to return
+    # individual `results` entries with no `rank` field set (see #148).
+    # Treat it as optional so a TranscriptResponse parses successfully when
+    # any single highlight is missing the rank; the absence is then
+    # observable to callers via `rank is None`.
+    rank: Optional[float] = None
     text: str
     timestamps: List[Timestamp]
 
diff --git a/tests/unit/test_auto_highlights.py b/tests/unit/test_auto_highlights.py
index 4648760..1932b6e 100644
--- a/tests/unit/test_auto_highlights.py
+++ b/tests/unit/test_auto_highlights.py
@@ -93,3 +93,39 @@ def test_auto_highlights_enabled(httpx_mock: HTTPXMock):
         ):
             assert transcript_timestamp.start == response_timestamp["start"]
             assert transcript_timestamp.end == response_timestamp["end"]
+
+
+def test_auto_highlights_parses_result_without_rank(httpx_mock: HTTPXMock):
+    """
+    Regression for #148. The Auto Highlights API has been observed in
+    production to return individual `results` entries with no `rank` field
+    set, which previously raised a Pydantic ValidationError and made the
+    entire TranscriptResponse unparseable. The field is now Optional, so a
+    missing rank parses as None and the rest of the response is preserved.
+    """
+    mock_response = factories.generate_dict_factory(
+        AutohighlightTranscriptResponseFactory
+    )()
+    # Strip the `rank` from the first highlight to simulate the API's
+    # observed behavior. Other fields (count, text, timestamps) are left
+    # in place.
+    assert mock_response["auto_highlights_result"]["results"], (
+        "factory should produce at least one highlight"
+    )
+    del mock_response["auto_highlights_result"]["results"][0]["rank"]
+
+    _, transcript = unit_test_utils.submit_mock_transcription_request(
+        httpx_mock,
+        mock_response=mock_response,
+        config=aai.TranscriptionConfig(auto_highlights=True),
+    )
+
+    assert transcript.error is None
+    assert transcript.auto_highlights is not None
+    assert transcript.auto_highlights.results is not None
+    assert transcript.auto_highlights.results[0].rank is None
+    # The rest of the first result is still present, and other entries
+    # are unaffected.
+    assert transcript.auto_highlights.results[0].text is not None
+    if len(transcript.auto_highlights.results) > 1:
+        assert transcript.auto_highlights.results[1].rank is not None