From 44adecaf20948a018fc3db974a85b39638b7f2b4 Mon Sep 17 00:00:00 2001 From: Pavan Kumar Date: Tue, 26 May 2026 16:05:44 +0530 Subject: [PATCH] fix(links): use pydash.get + decode body to fix analyze chain (CON-573) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The in-house navigate_dict, copy-pasted into five link modules, used `key in current` for traversal — which on a non-dict (e.g. an analysis whose body is a JSON-encoded string after #182) does a substring check and then crashes with TypeError on `current[key]`. Replace all five copies with pydash.get (already a core dep), and in analyze + analyze_and_label also wrap source with Vcon.with_decoded_body so a dotted text_location can still drill through a JSON-encoded body — matching what check_and_tag and detect_engagement already do. Co-Authored-By: Claude Opus 4.7 (1M context) --- conserver/links/analyze/__init__.py | 25 +++++----- conserver/links/analyze/tests/test_analyze.py | 46 ++++++++----------- conserver/links/analyze_and_label/__init__.py | 20 ++++---- .../tests/test_analyze_and_label.py | 22 +-------- conserver/links/analyze_vcon/__init__.py | 9 ---- conserver/links/check_and_tag/__init__.py | 16 ++----- .../check_and_tag/tests/test_check_and_tag.py | 15 +++--- conserver/links/detect_engagement/__init__.py | 12 +---- .../tests/test_detect_engagement.py | 18 -------- 9 files changed, 52 insertions(+), 131 deletions(-) diff --git a/conserver/links/analyze/__init__.py b/conserver/links/analyze/__init__.py index 3549ce4..d6ad9e1 100644 --- a/conserver/links/analyze/__init__.py +++ b/conserver/links/analyze/__init__.py @@ -12,6 +12,8 @@ import time from lib.links.filters import is_included, randomly_execute_with_sampling from lib.ai_usage import send_ai_usage_data_for_tracking +from pydash import get as pydash_get +from vcon import Vcon logger = init_logger(__name__) @@ -109,15 +111,21 @@ def run( client = get_openai_client(opts) - source_type = navigate_dict(opts, "source.analysis_type") - text_location = navigate_dict(opts, "source.text_location") + source_type = pydash_get(opts, "source.analysis_type") + text_location = pydash_get(opts, "source.text_location") for index, dialog in enumerate(vCon.dialog): source = get_analysis_for_type(vCon, index, source_type) if not source: logger.warning("No %s found for vCon: %s", source_type, vCon.uuid) continue - source_text = navigate_dict(source, text_location) + # Decode body so a dotted ``text_location`` like + # ``body.paragraphs.transcript`` can drill through a JSON-encoded + # body (spec-current shape after vcon-server#182). pydash.get also + # returns None instead of raising on any non-dict mid-path, so an + # unexpected source shape skip-warns rather than crashing the + # chain (see CON-573). + source_text = pydash_get(Vcon.with_decoded_body(source), text_location) if not source_text: logger.warning("No source_text found at %s for vCon: %s", text_location, vCon.uuid) continue @@ -191,14 +199,3 @@ def run( return vcon_uuid -def navigate_dict(dictionary, path): - if dictionary is None: - return None - keys = path.split(".") - current = dictionary - for key in keys: - if key in current: - current = current[key] - else: - return None - return current diff --git a/conserver/links/analyze/tests/test_analyze.py b/conserver/links/analyze/tests/test_analyze.py index cf7ced6..89b8533 100644 --- a/conserver/links/analyze/tests/test_analyze.py +++ b/conserver/links/analyze/tests/test_analyze.py @@ -12,11 +12,12 @@ import os import pytest from unittest.mock import Mock, patch +from pydash import get as pydash_get + from links.analyze import ( generate_analysis, run, default_options, - navigate_dict, get_analysis_for_type, ) from vcon import Vcon @@ -84,31 +85,24 @@ def mock_redis_with_vcon(mock_vcon_redis, sample_vcon): return mock_instance -class TestNavigateDict: - """Test the navigate_dict utility function""" - - def test_navigate_dict_simple(self): - """Test navigating a simple dictionary path""" - test_dict = {"a": {"b": {"c": "value"}}} - result = navigate_dict(test_dict, "a.b.c") - assert result == "value" - - def test_navigate_dict_missing_key(self): - """Test navigating to a missing key""" - test_dict = {"a": {"b": {"c": "value"}}} - result = navigate_dict(test_dict, "a.b.d") - assert result is None - - def test_navigate_dict_empty_path(self): - """Test navigating with empty path""" - test_dict = {"a": "value"} - result = navigate_dict(test_dict, "") - assert result is None # Empty path should return None - - def test_navigate_dict_none_input(self): - """Test navigating with None input""" - result = navigate_dict(None, "a.b.c") - assert result is None +class TestPydashGetContract: + """The analyze link uses pydash.get to walk dotted paths like + ``source.text_location`` into the analysis dict. The behaviour + contract we depend on — return None for any unreachable path, + including non-dict intermediates — is what fixes CON-573, so it's + worth pinning here even though pydash itself is well-tested.""" + + def test_returns_none_when_midpath_is_non_dict(self): + """Regression for CON-573: when a transcript analysis stores + body as a plain string (instead of {paragraphs:{transcript:…}}), + the dotted lookup must return None rather than raise TypeError. + The previous in-house navigate_dict did `key in current` — + which for strings is a substring check — and then + ``current[key]`` raised on the string-indexed access.""" + # "paragraphs" appears as a substring of body, so the buggy + # impl let the in-check pass before crashing on the indexing. + source = {"body": "transcript with paragraphs in it"} + assert pydash_get(source, "body.paragraphs.transcript") is None class TestGetAnalysisForType: diff --git a/conserver/links/analyze_and_label/__init__.py b/conserver/links/analyze_and_label/__init__.py index 7456fae..8c20776 100644 --- a/conserver/links/analyze_and_label/__init__.py +++ b/conserver/links/analyze_and_label/__init__.py @@ -12,6 +12,8 @@ from lib.metrics import record_histogram, increment_counter import time from lib.links.filters import is_included, randomly_execute_with_sampling +from pydash import get as pydash_get +from vcon import Vcon logger = init_logger(__name__) @@ -89,15 +91,18 @@ def run( return vcon_uuid client = get_openai_client(opts) - source_type = navigate_dict(opts, "source.analysis_type") - text_location = navigate_dict(opts, "source.text_location") + source_type = pydash_get(opts, "source.analysis_type") + text_location = pydash_get(opts, "source.text_location") for index, dialog in enumerate(vCon.dialog): source = get_analysis_for_type(vCon, index, source_type) if not source: logger.warning("No %s found for vCon: %s", source_type, vCon.uuid) continue - source_text = navigate_dict(source, text_location) + # Decode body so a dotted ``text_location`` like + # ``body.paragraphs.transcript`` can drill through a JSON-encoded + # body (spec-current shape after vcon-server#182). + source_text = pydash_get(Vcon.with_decoded_body(source), text_location) if not source_text: logger.warning("No source_text found at %s for vCon: %s", text_location, vCon.uuid) continue @@ -207,12 +212,3 @@ def run( return vcon_uuid -def navigate_dict(dictionary, path): - keys = path.split(".") - current = dictionary - for key in keys: - if key in current: - current = current[key] - else: - return None - return current diff --git a/conserver/links/analyze_and_label/tests/test_analyze_and_label.py b/conserver/links/analyze_and_label/tests/test_analyze_and_label.py index 97e9595..bf06c98 100644 --- a/conserver/links/analyze_and_label/tests/test_analyze_and_label.py +++ b/conserver/links/analyze_and_label/tests/test_analyze_and_label.py @@ -3,7 +3,7 @@ import pytest from unittest.mock import patch, MagicMock, Mock -from links.analyze_and_label import run, generate_analysis_with_labels, get_analysis_for_type, navigate_dict +from links.analyze_and_label import run, generate_analysis_with_labels, get_analysis_for_type from vcon import Vcon from lib.vcon_redis import VconRedis @@ -202,26 +202,6 @@ def test_get_analysis_for_type(sample_vcon, sample_vcon_with_analysis): assert analysis is None -def test_navigate_dict(): - """Test the navigate_dict function""" - test_dict = { - "a": { - "b": { - "c": "value" - } - }, - "x": "y" - } - - # Test valid paths - assert navigate_dict(test_dict, "a.b.c") == "value" - assert navigate_dict(test_dict, "x") == "y" - - # Test invalid paths - assert navigate_dict(test_dict, "a.b.d") is None - assert navigate_dict(test_dict, "z") is None - - @patch('links.analyze_and_label.get_openai_client') @patch('links.analyze_and_label.generate_analysis_with_labels') @patch('links.analyze_and_label.is_included', return_value=True) diff --git a/conserver/links/analyze_vcon/__init__.py b/conserver/links/analyze_vcon/__init__.py index 463de6f..fd4924d 100644 --- a/conserver/links/analyze_vcon/__init__.py +++ b/conserver/links/analyze_vcon/__init__.py @@ -201,12 +201,3 @@ def run( return vcon_uuid -def navigate_dict(dictionary, path): - keys = path.split(".") - current = dictionary - for key in keys: - if key in current: - current = current[key] - else: - return None - return current diff --git a/conserver/links/check_and_tag/__init__.py b/conserver/links/check_and_tag/__init__.py index a7dda2b..4f4e1e6 100644 --- a/conserver/links/check_and_tag/__init__.py +++ b/conserver/links/check_and_tag/__init__.py @@ -13,6 +13,7 @@ import time from lib.links.filters import is_included, randomly_execute_with_sampling from vcon import Vcon +from pydash import get as pydash_get logger = init_logger(__name__) @@ -112,8 +113,8 @@ def run( return vcon_uuid client = get_openai_client(opts) - source_type = navigate_dict(opts, "source.analysis_type") - text_location = navigate_dict(opts, "source.text_location") + source_type = pydash_get(opts, "source.analysis_type") + text_location = pydash_get(opts, "source.text_location") logger.info("starting loop for vcon.dialog with %s dialogs", len(vCon.dialog)) @@ -131,7 +132,7 @@ def run( continue # Decode body so a dotted ``text_location`` like ``body.transcript`` # can drill through a JSON-encoded body (spec-current shape). - source_text = navigate_dict(Vcon.with_decoded_body(source), text_location) + source_text = pydash_get(Vcon.with_decoded_body(source), text_location) if not source_text: logger.warning("No source_text found at %s for vCon: %s", text_location, vCon.uuid) continue @@ -222,12 +223,3 @@ def run( return vcon_uuid -def navigate_dict(dictionary, path): - keys = path.split(".") - current = dictionary - for key in keys: - if key in current: - current = current[key] - else: - return None - return current diff --git a/conserver/links/check_and_tag/tests/test_check_and_tag.py b/conserver/links/check_and_tag/tests/test_check_and_tag.py index d01f767..9530d0b 100644 --- a/conserver/links/check_and_tag/tests/test_check_and_tag.py +++ b/conserver/links/check_and_tag/tests/test_check_and_tag.py @@ -3,7 +3,9 @@ import pytest -from links.check_and_tag import get_analysis_for_type, navigate_dict, run +from pydash import get as pydash_get + +from links.check_and_tag import get_analysis_for_type, run from vcon import Vcon @@ -33,21 +35,16 @@ def test_get_analysis_for_type(sample_vcon): assert get_analysis_for_type(sample_vcon, 0, "missing") is None -def test_navigate_dict(): - assert navigate_dict({"body": {"text": "hello"}}, "body.text") == "hello" - assert navigate_dict({"body": {"text": "hello"}}, "body.missing") is None - - -def test_navigate_dict_drills_into_json_encoded_body_via_helper(): +def test_dotted_lookup_drills_into_json_encoded_body_via_helper(): # Spec-current shape: encoding=json, body is a stringified dict. - # navigate_dict cannot drill into a string, so the link feeds it the + # pydash.get cannot drill into a string, so the link feeds it the # output of Vcon.with_decoded_body first. source = { "type": "transcript", "body": json.dumps({"transcript": "the actual text"}), "encoding": "json", } - assert navigate_dict(Vcon.with_decoded_body(source), "body.transcript") == "the actual text" + assert pydash_get(Vcon.with_decoded_body(source), "body.transcript") == "the actual text" def test_run_requires_tag_name(): diff --git a/conserver/links/detect_engagement/__init__.py b/conserver/links/detect_engagement/__init__.py index 7f918d3..0588a72 100644 --- a/conserver/links/detect_engagement/__init__.py +++ b/conserver/links/detect_engagement/__init__.py @@ -12,6 +12,7 @@ import time from lib.links.filters import is_included, randomly_execute_with_sampling from vcon import Vcon +from pydash import get as pydash_get import os logger = init_logger(__name__) @@ -100,7 +101,7 @@ def run( # Decode body so a dotted ``text_location`` like ``body.transcript`` # can drill through a JSON-encoded body (spec-current shape). - source_text = navigate_dict(Vcon.with_decoded_body(source), text_location) + source_text = pydash_get(Vcon.with_decoded_body(source), text_location) if not source_text: logger.warning("No source_text found at %s for vCon: %s", text_location, vCon.uuid) continue @@ -186,12 +187,3 @@ def run( return vcon_uuid -def navigate_dict(dictionary, path): - keys = path.split(".") - current = dictionary - for key in keys: - if key in current: - current = current[key] - else: - return None - return current \ No newline at end of file diff --git a/conserver/links/detect_engagement/tests/test_detect_engagement.py b/conserver/links/detect_engagement/tests/test_detect_engagement.py index d3b3ba7..5fa089f 100644 --- a/conserver/links/detect_engagement/tests/test_detect_engagement.py +++ b/conserver/links/detect_engagement/tests/test_detect_engagement.py @@ -17,7 +17,6 @@ from links.detect_engagement import ( check_engagement, get_analysis_for_type, - navigate_dict, run, default_options, ) @@ -90,23 +89,6 @@ def test_get_analysis_for_type(): result = get_analysis_for_type(vcon, 0, "nonexistent") assert result is None -def test_navigate_dict(): - """ - Test that navigate_dict can traverse nested dictionaries using dot notation. - """ - test_dict = { - "level1": { - "level2": { - "level3": "value" - } - } - } - # Should return the value at the nested path - assert navigate_dict(test_dict, "level1.level2.level3") == "value" - # Should return None for a non-existent path - assert navigate_dict(test_dict, "level1.nonexistent") is None - assert navigate_dict(test_dict, "nonexistent") is None - def skip_if_no_openai_key(): """ Skip the test if OPENAI_API_KEY is not set in the environment.