From 44adecaf20948a018fc3db974a85b39638b7f2b4 Mon Sep 17 00:00:00 2001
From: Pavan Kumar <pavanputhra@gmail.com>
Date: Tue, 26 May 2026 16:05:44 +0530
Subject: [PATCH] fix(links): use pydash.get + decode body to fix analyze chain
 (CON-573)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The in-house navigate_dict, copy-pasted into five link modules, used
`key in current` for traversal — which on a non-dict (e.g. an analysis
whose body is a JSON-encoded string after #182) does a substring check
and then crashes with TypeError on `current[key]`.

Replace all five copies with pydash.get (already a core dep), and in
analyze + analyze_and_label also wrap source with Vcon.with_decoded_body
so a dotted text_location can still drill through a JSON-encoded body —
matching what check_and_tag and detect_engagement already do.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 conserver/links/analyze/__init__.py           | 25 +++++-----
 conserver/links/analyze/tests/test_analyze.py | 46 ++++++++-----------
 conserver/links/analyze_and_label/__init__.py | 20 ++++----
 .../tests/test_analyze_and_label.py           | 22 +--------
 conserver/links/analyze_vcon/__init__.py      |  9 ----
 conserver/links/check_and_tag/__init__.py     | 16 ++-----
 .../check_and_tag/tests/test_check_and_tag.py | 15 +++---
 conserver/links/detect_engagement/__init__.py | 12 +----
 .../tests/test_detect_engagement.py           | 18 --------
 9 files changed, 52 insertions(+), 131 deletions(-)

diff --git a/conserver/links/analyze/__init__.py b/conserver/links/analyze/__init__.py
index 3549ce4..d6ad9e1 100644
--- a/conserver/links/analyze/__init__.py
+++ b/conserver/links/analyze/__init__.py
@@ -12,6 +12,8 @@
 import time
 from lib.links.filters import is_included, randomly_execute_with_sampling
 from lib.ai_usage import send_ai_usage_data_for_tracking
+from pydash import get as pydash_get
+from vcon import Vcon
 
 logger = init_logger(__name__)
 
@@ -109,15 +111,21 @@ def run(
 
     client = get_openai_client(opts)
 
-    source_type = navigate_dict(opts, "source.analysis_type")
-    text_location = navigate_dict(opts, "source.text_location")
+    source_type = pydash_get(opts, "source.analysis_type")
+    text_location = pydash_get(opts, "source.text_location")
 
     for index, dialog in enumerate(vCon.dialog):
         source = get_analysis_for_type(vCon, index, source_type)
         if not source:
             logger.warning("No %s found for vCon: %s", source_type, vCon.uuid)
             continue
-        source_text = navigate_dict(source, text_location)
+        # Decode body so a dotted ``text_location`` like
+        # ``body.paragraphs.transcript`` can drill through a JSON-encoded
+        # body (spec-current shape after vcon-server#182). pydash.get also
+        # returns None instead of raising on any non-dict mid-path, so an
+        # unexpected source shape skip-warns rather than crashing the
+        # chain (see CON-573).
+        source_text = pydash_get(Vcon.with_decoded_body(source), text_location)
         if not source_text:
             logger.warning("No source_text found at %s for vCon: %s", text_location, vCon.uuid)
             continue
@@ -191,14 +199,3 @@ def run(
     return vcon_uuid
 
 
-def navigate_dict(dictionary, path):
-    if dictionary is None:
-        return None
-    keys = path.split(".")
-    current = dictionary
-    for key in keys:
-        if key in current:
-            current = current[key]
-        else:
-            return None
-    return current
diff --git a/conserver/links/analyze/tests/test_analyze.py b/conserver/links/analyze/tests/test_analyze.py
index cf7ced6..89b8533 100644
--- a/conserver/links/analyze/tests/test_analyze.py
+++ b/conserver/links/analyze/tests/test_analyze.py
@@ -12,11 +12,12 @@
 import os
 import pytest
 from unittest.mock import Mock, patch
+from pydash import get as pydash_get
+
 from links.analyze import (
     generate_analysis,
     run,
     default_options,
-    navigate_dict,
     get_analysis_for_type,
 )
 from vcon import Vcon
@@ -84,31 +85,24 @@ def mock_redis_with_vcon(mock_vcon_redis, sample_vcon):
     return mock_instance
 
 
-class TestNavigateDict:
-    """Test the navigate_dict utility function"""
-    
-    def test_navigate_dict_simple(self):
-        """Test navigating a simple dictionary path"""
-        test_dict = {"a": {"b": {"c": "value"}}}
-        result = navigate_dict(test_dict, "a.b.c")
-        assert result == "value"
-    
-    def test_navigate_dict_missing_key(self):
-        """Test navigating to a missing key"""
-        test_dict = {"a": {"b": {"c": "value"}}}
-        result = navigate_dict(test_dict, "a.b.d")
-        assert result is None
-    
-    def test_navigate_dict_empty_path(self):
-        """Test navigating with empty path"""
-        test_dict = {"a": "value"}
-        result = navigate_dict(test_dict, "")
-        assert result is None  # Empty path should return None
-    
-    def test_navigate_dict_none_input(self):
-        """Test navigating with None input"""
-        result = navigate_dict(None, "a.b.c")
-        assert result is None
+class TestPydashGetContract:
+    """The analyze link uses pydash.get to walk dotted paths like
+    ``source.text_location`` into the analysis dict. The behaviour
+    contract we depend on — return None for any unreachable path,
+    including non-dict intermediates — is what fixes CON-573, so it's
+    worth pinning here even though pydash itself is well-tested."""
+
+    def test_returns_none_when_midpath_is_non_dict(self):
+        """Regression for CON-573: when a transcript analysis stores
+        body as a plain string (instead of {paragraphs:{transcript:…}}),
+        the dotted lookup must return None rather than raise TypeError.
+        The previous in-house navigate_dict did `key in current` —
+        which for strings is a substring check — and then
+        ``current[key]`` raised on the string-indexed access."""
+        # "paragraphs" appears as a substring of body, so the buggy
+        # impl let the in-check pass before crashing on the indexing.
+        source = {"body": "transcript with paragraphs in it"}
+        assert pydash_get(source, "body.paragraphs.transcript") is None
 
 
 class TestGetAnalysisForType:
diff --git a/conserver/links/analyze_and_label/__init__.py b/conserver/links/analyze_and_label/__init__.py
index 7456fae..8c20776 100644
--- a/conserver/links/analyze_and_label/__init__.py
+++ b/conserver/links/analyze_and_label/__init__.py
@@ -12,6 +12,8 @@
 from lib.metrics import record_histogram, increment_counter
 import time
 from lib.links.filters import is_included, randomly_execute_with_sampling
+from pydash import get as pydash_get
+from vcon import Vcon
 
 logger = init_logger(__name__)
 
@@ -89,15 +91,18 @@ def run(
         return vcon_uuid
 
     client = get_openai_client(opts)
-    source_type = navigate_dict(opts, "source.analysis_type")
-    text_location = navigate_dict(opts, "source.text_location")
+    source_type = pydash_get(opts, "source.analysis_type")
+    text_location = pydash_get(opts, "source.text_location")
 
     for index, dialog in enumerate(vCon.dialog):
         source = get_analysis_for_type(vCon, index, source_type)
         if not source:
             logger.warning("No %s found for vCon: %s", source_type, vCon.uuid)
             continue
-        source_text = navigate_dict(source, text_location)
+        # Decode body so a dotted ``text_location`` like
+        # ``body.paragraphs.transcript`` can drill through a JSON-encoded
+        # body (spec-current shape after vcon-server#182).
+        source_text = pydash_get(Vcon.with_decoded_body(source), text_location)
         if not source_text:
             logger.warning("No source_text found at %s for vCon: %s", text_location, vCon.uuid)
             continue
@@ -207,12 +212,3 @@ def run(
     return vcon_uuid
 
 
-def navigate_dict(dictionary, path):
-    keys = path.split(".")
-    current = dictionary
-    for key in keys:
-        if key in current:
-            current = current[key]
-        else:
-            return None
-    return current
diff --git a/conserver/links/analyze_and_label/tests/test_analyze_and_label.py b/conserver/links/analyze_and_label/tests/test_analyze_and_label.py
index 97e9595..bf06c98 100644
--- a/conserver/links/analyze_and_label/tests/test_analyze_and_label.py
+++ b/conserver/links/analyze_and_label/tests/test_analyze_and_label.py
@@ -3,7 +3,7 @@
 import pytest
 from unittest.mock import patch, MagicMock, Mock
 
-from links.analyze_and_label import run, generate_analysis_with_labels, get_analysis_for_type, navigate_dict
+from links.analyze_and_label import run, generate_analysis_with_labels, get_analysis_for_type
 from vcon import Vcon
 from lib.vcon_redis import VconRedis
 
@@ -202,26 +202,6 @@ def test_get_analysis_for_type(sample_vcon, sample_vcon_with_analysis):
     assert analysis is None
 
 
-def test_navigate_dict():
-    """Test the navigate_dict function"""
-    test_dict = {
-        "a": {
-            "b": {
-                "c": "value"
-            }
-        },
-        "x": "y"
-    }
-    
-    # Test valid paths
-    assert navigate_dict(test_dict, "a.b.c") == "value"
-    assert navigate_dict(test_dict, "x") == "y"
-    
-    # Test invalid paths
-    assert navigate_dict(test_dict, "a.b.d") is None
-    assert navigate_dict(test_dict, "z") is None
-
-
 @patch('links.analyze_and_label.get_openai_client')
 @patch('links.analyze_and_label.generate_analysis_with_labels')
 @patch('links.analyze_and_label.is_included', return_value=True)
diff --git a/conserver/links/analyze_vcon/__init__.py b/conserver/links/analyze_vcon/__init__.py
index 463de6f..fd4924d 100644
--- a/conserver/links/analyze_vcon/__init__.py
+++ b/conserver/links/analyze_vcon/__init__.py
@@ -201,12 +201,3 @@ def run(
     return vcon_uuid
 
 
-def navigate_dict(dictionary, path):
-    keys = path.split(".")
-    current = dictionary
-    for key in keys:
-        if key in current:
-            current = current[key]
-        else:
-            return None
-    return current
diff --git a/conserver/links/check_and_tag/__init__.py b/conserver/links/check_and_tag/__init__.py
index a7dda2b..4f4e1e6 100644
--- a/conserver/links/check_and_tag/__init__.py
+++ b/conserver/links/check_and_tag/__init__.py
@@ -13,6 +13,7 @@
 import time
 from lib.links.filters import is_included, randomly_execute_with_sampling
 from vcon import Vcon
+from pydash import get as pydash_get
 
 logger = init_logger(__name__)
 
@@ -112,8 +113,8 @@ def run(
         return vcon_uuid
 
     client = get_openai_client(opts)
-    source_type = navigate_dict(opts, "source.analysis_type")
-    text_location = navigate_dict(opts, "source.text_location")
+    source_type = pydash_get(opts, "source.analysis_type")
+    text_location = pydash_get(opts, "source.text_location")
 
     logger.info("starting loop for vcon.dialog with %s dialogs", len(vCon.dialog))
 
@@ -131,7 +132,7 @@ def run(
             continue
         # Decode body so a dotted ``text_location`` like ``body.transcript``
         # can drill through a JSON-encoded body (spec-current shape).
-        source_text = navigate_dict(Vcon.with_decoded_body(source), text_location)
+        source_text = pydash_get(Vcon.with_decoded_body(source), text_location)
         if not source_text:
             logger.warning("No source_text found at %s for vCon: %s", text_location, vCon.uuid)
             continue
@@ -222,12 +223,3 @@ def run(
     return vcon_uuid
 
 
-def navigate_dict(dictionary, path):
-    keys = path.split(".")
-    current = dictionary
-    for key in keys:
-        if key in current:
-            current = current[key]
-        else:
-            return None
-    return current
diff --git a/conserver/links/check_and_tag/tests/test_check_and_tag.py b/conserver/links/check_and_tag/tests/test_check_and_tag.py
index d01f767..9530d0b 100644
--- a/conserver/links/check_and_tag/tests/test_check_and_tag.py
+++ b/conserver/links/check_and_tag/tests/test_check_and_tag.py
@@ -3,7 +3,9 @@
 
 import pytest
 
-from links.check_and_tag import get_analysis_for_type, navigate_dict, run
+from pydash import get as pydash_get
+
+from links.check_and_tag import get_analysis_for_type, run
 from vcon import Vcon
 
 
@@ -33,21 +35,16 @@ def test_get_analysis_for_type(sample_vcon):
     assert get_analysis_for_type(sample_vcon, 0, "missing") is None
 
 
-def test_navigate_dict():
-    assert navigate_dict({"body": {"text": "hello"}}, "body.text") == "hello"
-    assert navigate_dict({"body": {"text": "hello"}}, "body.missing") is None
-
-
-def test_navigate_dict_drills_into_json_encoded_body_via_helper():
+def test_dotted_lookup_drills_into_json_encoded_body_via_helper():
     # Spec-current shape: encoding=json, body is a stringified dict.
-    # navigate_dict cannot drill into a string, so the link feeds it the
+    # pydash.get cannot drill into a string, so the link feeds it the
     # output of Vcon.with_decoded_body first.
     source = {
         "type": "transcript",
         "body": json.dumps({"transcript": "the actual text"}),
         "encoding": "json",
     }
-    assert navigate_dict(Vcon.with_decoded_body(source), "body.transcript") == "the actual text"
+    assert pydash_get(Vcon.with_decoded_body(source), "body.transcript") == "the actual text"
 
 
 def test_run_requires_tag_name():
diff --git a/conserver/links/detect_engagement/__init__.py b/conserver/links/detect_engagement/__init__.py
index 7f918d3..0588a72 100644
--- a/conserver/links/detect_engagement/__init__.py
+++ b/conserver/links/detect_engagement/__init__.py
@@ -12,6 +12,7 @@
 import time
 from lib.links.filters import is_included, randomly_execute_with_sampling
 from vcon import Vcon
+from pydash import get as pydash_get
 import os
 logger = init_logger(__name__)
 
@@ -100,7 +101,7 @@ def run(
 
         # Decode body so a dotted ``text_location`` like ``body.transcript``
         # can drill through a JSON-encoded body (spec-current shape).
-        source_text = navigate_dict(Vcon.with_decoded_body(source), text_location)
+        source_text = pydash_get(Vcon.with_decoded_body(source), text_location)
         if not source_text:
             logger.warning("No source_text found at %s for vCon: %s", text_location, vCon.uuid)
             continue
@@ -186,12 +187,3 @@ def run(
     return vcon_uuid
 
 
-def navigate_dict(dictionary, path):
-    keys = path.split(".")
-    current = dictionary
-    for key in keys:
-        if key in current:
-            current = current[key]
-        else:
-            return None
-    return current 
\ No newline at end of file
diff --git a/conserver/links/detect_engagement/tests/test_detect_engagement.py b/conserver/links/detect_engagement/tests/test_detect_engagement.py
index d3b3ba7..5fa089f 100644
--- a/conserver/links/detect_engagement/tests/test_detect_engagement.py
+++ b/conserver/links/detect_engagement/tests/test_detect_engagement.py
@@ -17,7 +17,6 @@
 from links.detect_engagement import (
     check_engagement,
     get_analysis_for_type,
-    navigate_dict,
     run,
     default_options,
 )
@@ -90,23 +89,6 @@ def test_get_analysis_for_type():
     result = get_analysis_for_type(vcon, 0, "nonexistent")
     assert result is None
 
-def test_navigate_dict():
-    """
-    Test that navigate_dict can traverse nested dictionaries using dot notation.
-    """
-    test_dict = {
-        "level1": {
-            "level2": {
-                "level3": "value"
-            }
-        }
-    }
-    # Should return the value at the nested path
-    assert navigate_dict(test_dict, "level1.level2.level3") == "value"
-    # Should return None for a non-existent path
-    assert navigate_dict(test_dict, "level1.nonexistent") is None
-    assert navigate_dict(test_dict, "nonexistent") is None
-
 def skip_if_no_openai_key():
     """
     Skip the test if OPENAI_API_KEY is not set in the environment.