From ad91a529612d12f548d835635f54b4f06136c14e Mon Sep 17 00:00:00 2001 From: Pavan Kumar Date: Fri, 22 May 2026 14:18:20 +0530 Subject: [PATCH] fix(vcon): decode body per spec on read, canonicalize on write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit draft-ietf-vcon-vcon-core-02 §2.3.2 mandates that attachment and analysis body fields are always a String, with encoding (json/none/base64url) deciding interpretation. The Redis storage normalizer started stringifying dict/list bodies on store, which broke every reader that assumed body was still the underlying Python value — the reported crash was Vcon.add_tag calling .append on a JSON-encoded string. - Add Vcon.decoded_body and Vcon.with_decoded_body helpers. - add_tag, add_attachment, add_analysis now write spec-correct shape (encoding=json + json.dumps(body)) at the boundary instead of relying on the storage normalizer to canonicalize. - add_tag writes the new attachment under the spec-current \`purpose\` key. - Readers in tag_router, filters, post_analysis_to_slack, hugging_llm_link, milvus, check_and_tag, detect_engagement decode body before dict/list access; attachment lookups accept either \`purpose\` or \`type\` for back-compat. - filters.is_included gains TypedDict annotations and accepts \`purpose\` as an alias for \`type\`. - Regression tests added across every touched path. Co-Authored-By: Claude Opus 4.7 --- common/lib/links/filters.py | 65 ++++++-- common/storage/milvus/__init__.py | 19 ++- common/vcon.py | 105 +++++++++--- conserver/links/check_and_tag/__init__.py | 5 +- .../check_and_tag/tests/test_check_and_tag.py | 17 +- conserver/links/detect_engagement/__init__.py | 5 +- conserver/links/hugging_llm_link/__init__.py | 8 +- .../links/post_analysis_to_slack/__init__.py | 23 ++- .../test_post_analysis_to_slack.py | 18 +++ conserver/links/tag_router/__init__.py | 55 ++++--- conserver/links/tag_router/test_tag_router.py | 120 +++++++++++++- tests/core/test_filters.py | 86 ++++++++++ tests/core/test_vcon.py | 151 +++++++++++++++++- 13 files changed, 594 insertions(+), 83 deletions(-) create mode 100644 tests/core/test_filters.py diff --git a/common/lib/links/filters.py b/common/lib/links/filters.py index 2127c50..8e6d73e 100644 --- a/common/lib/links/filters.py +++ b/common/lib/links/filters.py @@ -1,30 +1,77 @@ -import json import random +from typing import Literal, Optional, TypedDict from lib.logging_utils import init_logger +from vcon import Vcon logger = init_logger(__name__) -def is_included(options, _vcon): +class OnlyIfFilter(TypedDict, total=False): + """``only_if`` clause inside :class:`FilterOptions`. + + Exactly one of ``type`` or ``purpose`` should be supplied — they are + aliases. ``purpose`` is the draft-ietf-vcon-vcon-core-02 spelling for + attachments; ``type`` is the pre-0.4.0 name (still canonical for + analysis entries). Either works against either section so existing + configs keep matching, and spec-current configs can use ``purpose``. + + Keys: + section: vCon array to scan — ``"attachments"`` or ``"analysis"``. + type / purpose: identifier to match on each element. + includes: substring/membership token to look for inside the body. + """ + + section: Literal["attachments", "analysis"] + type: str + purpose: str + includes: str + + +class FilterOptions(TypedDict, total=False): + """Options envelope accepted by :func:`is_included`. + + Absent / empty ``only_if`` means "include everything", which is why + both ``options`` and ``options.only_if`` are optional. + """ + + only_if: OnlyIfFilter + + +def is_included(options: Optional[FilterOptions], _vcon) -> bool: if not options: return True if not options.get("only_if"): return True filter = options["only_if"] section = filter["section"] - type = filter["type"] + # Accept either the spec-current ``purpose`` or legacy ``type`` key + # as the target identifier. They're treated as aliases regardless of + # section so configs migrate at their own pace. + target = filter.get("purpose") or filter.get("type") includes = filter["includes"] try: for element in getattr(_vcon, section): - body_as_string = json.dumps(element["body"]) if element["encoding"] == "json" else element["body"] - if not element["type"] == type: + # draft-ietf-vcon-vcon-core-02 renamed attachment ``type`` → + # ``purpose``. Accept either on attachments so configs written + # against the legacy shape keep working alongside spec-current + # writers. Analysis kept the ``type`` field. + if section == "attachments": + if element.get("purpose") != target and element.get("type") != target: + continue + elif element.get("type") != target: continue - if type == "tags": - tags = element["body"] - if includes in tags: + if target == "tags": + # Tags body is a JSON-encoded list of "name:value" strings — + # the one case where we have to decode before checking. + tags = Vcon.decoded_body(element) + if isinstance(tags, list) and includes in tags: return True - elif includes in body_as_string: + continue + # Per spec §2.3.2 ``body`` is always a String regardless of + # encoding, so substring-match directly without any decode. + body = element.get("body") + if isinstance(body, str) and includes in body: return True except Exception as e: logger.error(f"Error checking inclusion: {e}") diff --git a/common/storage/milvus/__init__.py b/common/storage/milvus/__init__.py index 097e520..e06277d 100644 --- a/common/storage/milvus/__init__.py +++ b/common/storage/milvus/__init__.py @@ -11,6 +11,7 @@ from lib.logging_utils import init_logger from lib.metrics import record_histogram, increment_counter from lib.vcon_redis import VconRedis +from vcon import Vcon try: from pymilvus import connections, Collection, FieldSchema, CollectionSchema, DataType, utility @@ -131,21 +132,25 @@ def extract_text_from_vcon(vcon: dict) -> str: if "analysis" in vcon: transcript_analyses = [a for a in vcon["analysis"] if a.get("type") == "transcript"] for analysis in transcript_analyses: - if "body" in analysis and "text" in analysis["body"]: - text += analysis["body"]["text"] + " " + # Decode through the Vcon helper so a JSON-stringified body + # (post spec-enforcement on store) still surfaces as a dict. + body = Vcon.decoded_body(analysis) + if isinstance(body, dict) and "text" in body: + text += body["text"] + " " extracted_components.append(f"transcript analysis") logger.debug(f"Extracted transcript analysis from vCon {vcon_id}") - + # Extract summary analysis if "analysis" in vcon: summary_analyses = [a for a in vcon["analysis"] if a.get("type") == "summary"] for analysis in summary_analyses: - if "body" in analysis and isinstance(analysis["body"], str): - text += analysis["body"] + " " + body = Vcon.decoded_body(analysis) + if isinstance(body, str): + text += body + " " extracted_components.append(f"summary analysis") logger.debug(f"Extracted summary analysis from vCon {vcon_id}") - elif "body" in analysis and isinstance(analysis["body"], dict) and "text" in analysis["body"]: - text += analysis["body"]["text"] + " " + elif isinstance(body, dict) and "text" in body: + text += body["text"] + " " extracted_components.append(f"summary analysis") logger.debug(f"Extracted summary analysis from vCon {vcon_id}") diff --git a/common/vcon.py b/common/vcon.py index 5087c32..8c39c8a 100644 --- a/common/vcon.py +++ b/common/vcon.py @@ -42,28 +42,74 @@ def build_new(cls): def tags(self): return self.find_attachment_by_purpose("tags") + @staticmethod + def decoded_body(entry): + """Return an attachment/analysis ``body`` as a live Python value. + + Per draft-ietf-vcon-vcon-core-02 §2.3.2 ``body`` is *always* a String; + the ``encoding`` tells you how to interpret it: + + - ``json`` → body is a JSON-encoded object/array, parse with ``json.loads``. + - ``base64url`` → body is base64url-encoded bytes, returned verbatim + (binary decoding is caller-specific). + - ``none`` → body is a freeform string, returned verbatim. + + For backwards compatibility with legacy writers that placed a raw + dict/list under ``body`` with ``encoding: none``, the dict/list is + returned as-is. ``VconRedis._enforce_spec_on_write`` later normalises + that to spec-correct ``encoding: json`` + stringified body, after + which this helper still returns the same Python value on reload. + + Returns ``None`` if ``entry`` is falsy. + """ + if not entry: + return None + body = entry.get("body") + if entry.get("encoding") == "json" and isinstance(body, str): + return json.loads(body) + return body + + @staticmethod + def with_decoded_body(entry): + """Shallow copy of an attachment/analysis entry with body decoded. + + Returns a new dict identical to ``entry`` except ``body`` is replaced + with the live Python value parsed via :meth:`decoded_body`. Useful + when a caller wants to navigate into ``body`` with dict syntax (e.g. + via a dot-path navigator) without having to know whether body + arrived as a JSON-encoded string from storage. + + Returns ``None`` if ``entry`` is falsy. + """ + if not entry: + return None + return {**entry, "body": Vcon.decoded_body(entry)} + def get_tag(self, tag_name): tags_attachment = self.find_attachment_by_purpose("tags") if not tags_attachment: return None - tag = next( - (t for t in tags_attachment["body"] if t.startswith(f"{tag_name}:")), None - ) + tags = self.decoded_body(tags_attachment) or [] + tag = next((t for t in tags if t.startswith(f"{tag_name}:")), None) if not tag: return None - tag_value = tag.split(":")[1] - return tag_value + return tag.split(":", 1)[1] def add_tag(self, tag_name, tag_value): tags_attachment = self.find_attachment_by_purpose("tags") - if not tags_attachment: - tags_attachment = { - "type": "tags", - "body": [], - "encoding": "none", - } + if tags_attachment is None: + # Spec 0.4.0 renamed attachment ``type`` → ``purpose``. New + # writes use the spec-current key; lookup tolerates both. + tags_attachment = {"purpose": "tags"} self.vcon_dict["attachments"].append(tags_attachment) - tags_attachment["body"].append(f"{tag_name}:{tag_value}") + # Decode existing body so a prior add_tag (or a Redis round-trip + # via VconRedis._enforce_spec_on_write) round-trips cleanly. Write + # back as spec-correct ``encoding=json`` + stringified list, per + # draft-ietf-vcon-vcon-core-02 §2.3.2 (body is always a String). + tags = self.decoded_body(tags_attachment) or [] + tags.append(f"{tag_name}:{tag_value}") + tags_attachment["body"] = json.dumps(tags) + tags_attachment["encoding"] = "json" def find_attachment_by_purpose(self, purpose): # IETF vCon spec 0.4.0 attachment lookup. Matches `purpose` first @@ -97,55 +143,68 @@ def find_attachment_by_type(self, type): def add_attachment(self, *, body: Union[dict, list, str], type: str, encoding="none"): if encoding not in ['json', 'none', 'base64url']: raise Exception("Invalid encoding") - + + # Per draft-ietf-vcon-vcon-core-02 §2.3.2 ``body`` is always a String. + # If a caller hands us a dict/list as a convenience, JSON-encode it + # immediately so any reader that touches the attachment between now + # and storage sees the spec-correct shape. + if isinstance(body, (dict, list)): + body = json.dumps(body) + encoding = "json" + if encoding == "json": try: json.loads(body) except Exception as e: raise Exception("Invalid JSON body: ", e) - + if encoding == 'base64url': try: base64.urlsafe_b64decode(body) except Exception as e: raise Exception("Invalid base64url body: ", e) - attachment = { + self.vcon_dict["attachments"].append({ "type": type, "body": body, "encoding": encoding, - } - self.vcon_dict["attachments"].append(attachment) + }) def find_analysis_by_type(self, type): # TODO fix to search for specific dialog id if it's passed return next((a for a in self.vcon_dict["analysis"] if a["type"] == type), None) def add_analysis(self, *, type: str, dialog: Union[list, int], vendor: str, body: Union[dict, list, str], encoding="none", extra={}): - if encoding not in ['json', 'none', 'base64url']: raise Exception("Invalid encoding") - + + # Per draft-ietf-vcon-vcon-core-02 §2.3.2 ``body`` is always a String. + # If a caller hands us a dict/list as a convenience, JSON-encode it + # immediately so any reader that touches the analysis between now + # and storage sees the spec-correct shape. + if isinstance(body, (dict, list)): + body = json.dumps(body) + encoding = "json" + if encoding == "json": try: json.loads(body) except Exception as e: raise Exception("Invalid JSON body: ", e) - + if encoding == 'base64url': try: base64.urlsafe_b64decode(body) except Exception as e: raise Exception("Invalid base64url body: ", e) - analysis = { + self.vcon_dict["analysis"].append({ "type": type, "dialog": dialog, "vendor": vendor, "body": body, "encoding": encoding, **extra, - } - self.vcon_dict["analysis"].append(analysis) + }) def add_party(self, party: dict): self.vcon_dict["parties"].append(party) diff --git a/conserver/links/check_and_tag/__init__.py b/conserver/links/check_and_tag/__init__.py index 58262c1..a7dda2b 100644 --- a/conserver/links/check_and_tag/__init__.py +++ b/conserver/links/check_and_tag/__init__.py @@ -12,6 +12,7 @@ from lib.metrics import record_histogram, increment_counter import time from lib.links.filters import is_included, randomly_execute_with_sampling +from vcon import Vcon logger = init_logger(__name__) @@ -128,7 +129,9 @@ def run( if not source: logger.warning("No %s found for vCon: %s", source_type, vCon.uuid) continue - source_text = navigate_dict(source, text_location) + # Decode body so a dotted ``text_location`` like ``body.transcript`` + # can drill through a JSON-encoded body (spec-current shape). + source_text = navigate_dict(Vcon.with_decoded_body(source), text_location) if not source_text: logger.warning("No source_text found at %s for vCon: %s", text_location, vCon.uuid) continue diff --git a/conserver/links/check_and_tag/tests/test_check_and_tag.py b/conserver/links/check_and_tag/tests/test_check_and_tag.py index 6d861aa..d01f767 100644 --- a/conserver/links/check_and_tag/tests/test_check_and_tag.py +++ b/conserver/links/check_and_tag/tests/test_check_and_tag.py @@ -38,6 +38,18 @@ def test_navigate_dict(): assert navigate_dict({"body": {"text": "hello"}}, "body.missing") is None +def test_navigate_dict_drills_into_json_encoded_body_via_helper(): + # Spec-current shape: encoding=json, body is a stringified dict. + # navigate_dict cannot drill into a string, so the link feeds it the + # output of Vcon.with_decoded_body first. + source = { + "type": "transcript", + "body": json.dumps({"transcript": "the actual text"}), + "encoding": "json", + } + assert navigate_dict(Vcon.with_decoded_body(source), "body.transcript") == "the actual text" + + def test_run_requires_tag_name(): with pytest.raises(ValueError, match="tag_name is required"): run( @@ -81,7 +93,8 @@ def test_run_applies_tag_when_evaluation_is_positive( assert result == "test-uuid" assert sample_vcon.get_tag("topic") == "billing" analysis = get_analysis_for_type(sample_vcon, 0, "tag_evaluation") - assert analysis["body"]["applies"] is True + # add_analysis JSON-encodes a dict body at the boundary (spec). + assert Vcon.decoded_body(analysis)["applies"] is True mock_instance.store_vcon.assert_called_once_with(sample_vcon) mock_increment_counter.assert_any_call( "conserver.link.openai.tags_applied", @@ -130,7 +143,7 @@ def test_run_records_negative_evaluation_without_applying_tag( assert result == "test-uuid" assert sample_vcon.get_tag("topic") is None analysis = get_analysis_for_type(sample_vcon, 0, "tag_evaluation") - assert analysis["body"]["applies"] is False + assert Vcon.decoded_body(analysis)["applies"] is False mock_instance.store_vcon.assert_called_once_with(sample_vcon) mock_increment_counter.assert_not_called() mock_record_histogram.assert_called_once() diff --git a/conserver/links/detect_engagement/__init__.py b/conserver/links/detect_engagement/__init__.py index cedecb3..7f918d3 100644 --- a/conserver/links/detect_engagement/__init__.py +++ b/conserver/links/detect_engagement/__init__.py @@ -11,6 +11,7 @@ from lib.metrics import record_histogram, increment_counter import time from lib.links.filters import is_included, randomly_execute_with_sampling +from vcon import Vcon import os logger = init_logger(__name__) @@ -97,7 +98,9 @@ def run( logger.warning("No %s found for vCon: %s", source_type, vCon.uuid) continue - source_text = navigate_dict(source, text_location) + # Decode body so a dotted ``text_location`` like ``body.transcript`` + # can drill through a JSON-encoded body (spec-current shape). + source_text = navigate_dict(Vcon.with_decoded_body(source), text_location) if not source_text: logger.warning("No source_text found at %s for vCon: %s", text_location, vCon.uuid) continue diff --git a/conserver/links/hugging_llm_link/__init__.py b/conserver/links/hugging_llm_link/__init__.py index 8c48bc7..f345020 100644 --- a/conserver/links/hugging_llm_link/__init__.py +++ b/conserver/links/hugging_llm_link/__init__.py @@ -40,6 +40,7 @@ from lib.error_tracking import init_error_tracker from lib.metrics import record_histogram, increment_counter from lib.vcon_redis import VconRedis +from vcon import Vcon # Initialize services init_error_tracker() @@ -229,7 +230,12 @@ def _get_transcript_text(self, vcon) -> str: transcript_text = "" for analysis in vcon.analysis: if analysis["type"] == "transcript": - transcript = analysis["body"].get("transcript", "") + # Decode body so a JSON-encoded transcript (post-spec-normalize) + # still surfaces as a dict here. + body = Vcon.decoded_body(analysis) + if not isinstance(body, dict): + continue + transcript = body.get("transcript", "") if transcript: transcript_text += transcript + "\n" return transcript_text.strip() diff --git a/conserver/links/post_analysis_to_slack/__init__.py b/conserver/links/post_analysis_to_slack/__init__.py index dccbf4b..57dcd75 100644 --- a/conserver/links/post_analysis_to_slack/__init__.py +++ b/conserver/links/post_analysis_to_slack/__init__.py @@ -2,6 +2,7 @@ from lib.logging_utils import init_logger from lib.metrics import increment_counter from slack_sdk.web import WebClient +from vcon import Vcon logger = init_logger(__name__) @@ -31,12 +32,20 @@ def build_details_url(url_template: str, vcon_uuid: str) -> str: return f'{url_template}?_vcon_id="{vcon_uuid}"' +def _is_strolid_dealer_attachment(a): + # Spec 0.4.0 renamed attachment ``type`` → ``purpose``. Match either so + # legacy and spec-current writers both resolve. + return a.get("purpose") == "strolid_dealer" or a.get("type") == "strolid_dealer" + + def get_team(vcon): team_name = None for a in vcon.attachments: - if a["type"] == "strolid_dealer": - t_obj = a["body"] - team = t_obj.get("team", None) + if _is_strolid_dealer_attachment(a): + # Body may be a JSON string (encoding=json) post-spec-normalization + # or a raw dict from legacy writers — decode either way. + t_obj = Vcon.decoded_body(a) or {} + team = t_obj.get("team", None) if isinstance(t_obj, dict) else None if team: team_name = team["name"] team_name = team_name.split()[0].lower() @@ -46,9 +55,9 @@ def get_team(vcon): def get_dealer(vcon): dealer = None for a in vcon.attachments: - if a["type"] == "strolid_dealer": - d_obj = a["body"] - dealer = d_obj.get("name", None) + if _is_strolid_dealer_attachment(a): + d_obj = Vcon.decoded_body(a) or {} + dealer = d_obj.get("name", None) if isinstance(d_obj, dict) else None return dealer @@ -113,6 +122,8 @@ def run(vcon_id, link_name, opts=default_options): # we need to skip first one an only post the second one to slack if a["type"] != opts["only_if"]["analysis_type"]: continue + # Per draft-ietf-vcon-vcon-core-02 §2.3.2 body is always a String — + # substring-match directly without decoding. if opts["only_if"]["includes"] not in a["body"]: continue if a.get("was_posted_to_slack"): diff --git a/conserver/links/post_analysis_to_slack/test_post_analysis_to_slack.py b/conserver/links/post_analysis_to_slack/test_post_analysis_to_slack.py index 0dd0ea3..a5e3864 100644 --- a/conserver/links/post_analysis_to_slack/test_post_analysis_to_slack.py +++ b/conserver/links/post_analysis_to_slack/test_post_analysis_to_slack.py @@ -49,6 +49,24 @@ def test_helper_functions_extract_team_dealer_and_summary(): assert get_summary(vcon, 1) is None +def test_helper_functions_resolve_spec_current_purpose_key(): + # draft-ietf-vcon-vcon-core-02 renamed attachment ``type`` → ``purpose``. + # get_team / get_dealer must resolve attachments authored under the new + # key as well as the legacy one. + vcon = SimpleNamespace( + attachments=[ + { + "purpose": "strolid_dealer", + "body": {"name": "Dealer Two", "team": {"name": "Beta Honda"}}, + } + ], + analysis=[], + ) + + assert get_team(vcon) == "beta" + assert get_dealer(vcon) == "Dealer Two" + + def test_build_details_url_legacy_appends_quoted_query_param(): # Old config style with no placeholder — keep the legacy Hex-shaped suffix. assert ( diff --git a/conserver/links/tag_router/__init__.py b/conserver/links/tag_router/__init__.py index cc4a284..53ebe8d 100644 --- a/conserver/links/tag_router/__init__.py +++ b/conserver/links/tag_router/__init__.py @@ -2,6 +2,7 @@ from lib.vcon_redis import VconRedis from lib.metrics import increment_counter from lib.queue import VconQueue +from vcon import Vcon logger = init_logger(__name__) @@ -45,38 +46,44 @@ def run(vcon_uuid, link_name, opts=default_options): logger.warning(f"No tag routes configured for {link_name}, skipping") return vcon_uuid - # Extract all tags from attachments - tags = [] + # Collect tag names from every tags attachment on the vCon. + tag_names = [] for attachment in vcon.attachments: - # Handle only plural "tags" format - if attachment['type'] == "tags" and 'body' in attachment: - if isinstance(attachment['body'], list): - # Process each tag string in the list - for tag_str in attachment['body']: - if isinstance(tag_str, str) and ":" in tag_str: - # Split on first colon to get tag name - tag_name = tag_str.split(":", 1)[0] - if tag_name: - tags.append(tag_name) - elif isinstance(attachment['body'], dict): - # If body is a dict, use the keys as tags - for tag_name in attachment['body'].keys(): + # Handle only plural "tags" format. Spec 0.4.0 renamed the + # identifier from ``type`` to ``purpose`` — match either so old + # and new writers both resolve. + purpose = attachment.get('purpose') or attachment.get('type') + if purpose != "tags" or 'body' not in attachment: + continue + # Decode through Vcon helper so a body stringified by the spec + # normalizer (encoding=json) still presents as a list/dict here. + body = Vcon.decoded_body(attachment) + if isinstance(body, list): + # Each entry is a "name:value" string; we only route on name. + for tag_str in body: + if isinstance(tag_str, str) and ":" in tag_str: + tag_name = tag_str.split(":", 1)[0] if tag_name: - tags.append(tag_name) - - if not tags: + tag_names.append(tag_name) + elif isinstance(body, dict): + # Dict-shaped tags attachment — keys are the tag names. + for tag_name in body.keys(): + if tag_name: + tag_names.append(tag_name) + + if not tag_names: logger.debug(f"No tags found in vCon {vcon_uuid}") return vcon_uuid if opts.get("forward_original") else None attrs = {"link.name": link_name, "vcon.uuid": vcon_uuid} - # Route the vCon to the appropriate Redis lists based on tags + # Route the vCon to the configured Redis lists by tag name. queue = VconQueue() routed = False - for tag in tags: - if tag in opts["tag_routes"]: - target_list = opts["tag_routes"][tag] - logger.info(f"Routing vCon {vcon_uuid} to list '{target_list}' based on tag '{tag}'") + for tag_name in tag_names: + if tag_name in opts["tag_routes"]: + target_list = opts["tag_routes"][tag_name] + logger.info(f"Routing vCon {vcon_uuid} to list '{target_list}' based on tag '{tag_name}'") # Push the vCon UUID to the target Redis list queue.enqueue(target_list, vcon_uuid) increment_counter( @@ -85,7 +92,7 @@ def run(vcon_uuid, link_name, opts=default_options): ) routed = True else: - logger.debug(f"No route configured for tag '{tag}'") + logger.debug(f"No route configured for tag '{tag_name}'") if routed: increment_counter("conserver.link.tag_router.routed_count", attributes=attrs) diff --git a/conserver/links/tag_router/test_tag_router.py b/conserver/links/tag_router/test_tag_router.py index 9f48fe7..b9a3385 100644 --- a/conserver/links/tag_router/test_tag_router.py +++ b/conserver/links/tag_router/test_tag_router.py @@ -1,3 +1,4 @@ +import json import pytest from unittest.mock import patch, MagicMock from links.tag_router import run @@ -433,11 +434,124 @@ def test_tag_router_with_name_only_tags(mock_redis_with_name_tag_vcon): def test_tag_router_with_default_options(mock_redis_with_vcon): """Test link with default options""" _, mock_redis = mock_redis_with_vcon - + result = run("test-uuid", "test-link") - + # Check that no routing occurred (default tag_routes is empty) mock_redis.rpush.assert_not_called() - + # Check that it was forwarded (default forward_original is True) assert result == "test-uuid" + + +# --------------------------------------------------------------------------- +# Regression: VconRedis._enforce_spec_on_write rewrites attachment bodies to +# encoding="json" + a JSON-encoded string (per draft-ietf-vcon-vcon-core-02). +# tag_router used to fall through both isinstance branches and silently route +# nothing on the reload-and-process path. It now must extract tag names by +# decoding via Vcon.decoded_body. +# --------------------------------------------------------------------------- + +@pytest.fixture +def sample_vcon_with_stringified_list_body(): + """vCon whose tags attachment looks like one reloaded from Redis post-normalize.""" + return Vcon({ + "uuid": "test-uuid-stringified", + "vcon": "0.0.1", + "parties": [], + "dialog": [], + "analysis": [], + "attachments": [ + { + "type": "tags", + "body": json.dumps(["important:important", "followup:followup"]), + "encoding": "json", + } + ], + "redacted": {}, + }) + + +@pytest.fixture +def sample_vcon_with_stringified_dict_body(): + return Vcon({ + "uuid": "test-uuid-stringified-dict", + "vcon": "0.0.1", + "parties": [], + "dialog": [], + "analysis": [], + "attachments": [ + { + "type": "tags", + "body": json.dumps({"cdr_id": "abc", "call_type": "0"}), + "encoding": "json", + } + ], + "redacted": {}, + }) + + +def test_tag_router_with_json_stringified_list_body( + mock_vcon_redis, mock_redis, sample_vcon_with_stringified_list_body +): + mock_instance = MagicMock() + mock_instance.get_vcon.return_value = sample_vcon_with_stringified_list_body + mock_vcon_redis.return_value = mock_instance + + opts = {"tag_routes": {"important": "important_list", "followup": "followup_list"}} + + result = run("test-uuid-stringified", "test-link", opts) + + mock_redis.rpush.assert_any_call("important_list", "test-uuid-stringified") + mock_redis.rpush.assert_any_call("followup_list", "test-uuid-stringified") + assert mock_redis.rpush.call_count == 2 + assert result == "test-uuid-stringified" + + +def test_tag_router_with_json_stringified_dict_body( + mock_vcon_redis, mock_redis, sample_vcon_with_stringified_dict_body +): + mock_instance = MagicMock() + mock_instance.get_vcon.return_value = sample_vcon_with_stringified_dict_body + mock_vcon_redis.return_value = mock_instance + + opts = {"tag_routes": {"cdr_id": "cdr_list", "call_type": "call_type_list"}} + + result = run("test-uuid-stringified-dict", "test-link", opts) + + mock_redis.rpush.assert_any_call("cdr_list", "test-uuid-stringified-dict") + mock_redis.rpush.assert_any_call("call_type_list", "test-uuid-stringified-dict") + assert mock_redis.rpush.call_count == 2 + assert result == "test-uuid-stringified-dict" + + +def test_tag_router_matches_spec_current_purpose_key(mock_vcon_redis, mock_redis): + # Vcon.add_tag now writes ``purpose: tags`` per spec 0.4.0. tag_router + # must recognize that shape, not just the legacy ``type: tags`` one. + vcon = Vcon({ + "uuid": "test-uuid-purpose", + "vcon": "0.0.1", + "parties": [], + "dialog": [], + "analysis": [], + "attachments": [ + { + "purpose": "tags", + "body": json.dumps(["important:important"]), + "encoding": "json", + } + ], + "redacted": {}, + }) + mock_instance = MagicMock() + mock_instance.get_vcon.return_value = vcon + mock_vcon_redis.return_value = mock_instance + + result = run( + "test-uuid-purpose", + "test-link", + {"tag_routes": {"important": "important_list"}}, + ) + + mock_redis.rpush.assert_called_once_with("important_list", "test-uuid-purpose") + assert result == "test-uuid-purpose" diff --git a/tests/core/test_filters.py b/tests/core/test_filters.py new file mode 100644 index 0000000..753d8bb --- /dev/null +++ b/tests/core/test_filters.py @@ -0,0 +1,86 @@ +"""Regression tests for ``common/lib/links/filters.is_included``. + +Per draft-ietf-vcon-vcon-core-02 §2.3.2 ``body`` is always a String, so the +filter substring-matches directly without decoding — except for the ``tags`` +purpose, whose body is a JSON-encoded list of ``"name:value"`` strings. + +Attachments also accept either the spec-current ``purpose`` key or the +legacy ``type`` key, matching how :meth:`Vcon.find_attachment_by_purpose` +already tolerates both. +""" +import json + +from lib.links.filters import is_included +from vcon import Vcon + + +def _vcon_with_analysis(body, encoding="none"): + v = Vcon.build_new() + v.vcon_dict["analysis"].append( + {"type": "summary", "dialog": 0, "vendor": "x", "body": body, "encoding": encoding} + ) + return v + + +def _vcon_with_tags_attachment(body, encoding="json"): + v = Vcon.build_new() + v.vcon_dict["attachments"].append( + {"type": "tags", "body": body, "encoding": encoding} + ) + return v + + +def test_is_included_matches_string_analysis_body(): + v = _vcon_with_analysis("NEEDS REVIEW: customer escalated") + options = {"only_if": {"section": "analysis", "type": "summary", "includes": "NEEDS REVIEW"}} + assert is_included(options, v) is True + + +def test_is_included_substring_matches_json_encoded_body_as_string(): + # encoding=json means body is already a JSON-encoded *string* per spec; + # substring-matching against the raw string is the spec-compliant behavior. + v = _vcon_with_analysis(json.dumps({"text": "NEEDS REVIEW now"}), encoding="json") + options = {"only_if": {"section": "analysis", "type": "summary", "includes": "NEEDS REVIEW"}} + assert is_included(options, v) is True + + +def test_is_included_no_match(): + v = _vcon_with_analysis("everything fine") + options = {"only_if": {"section": "analysis", "type": "summary", "includes": "NEEDS REVIEW"}} + assert is_included(options, v) is False + + +def test_is_included_tags_against_json_stringified_list(): + v = _vcon_with_tags_attachment(json.dumps(["important:important", "vip:vip"])) + options = {"only_if": {"section": "attachments", "type": "tags", "includes": "important:important"}} + assert is_included(options, v) is True + + +def test_is_included_attachment_matched_via_spec_current_purpose_key(): + # Spec-0.4.0 writer: attachment uses ``purpose`` instead of ``type``. + v = Vcon.build_new() + v.vcon_dict["attachments"].append( + {"purpose": "tags", "body": json.dumps(["vip:vip"]), "encoding": "json"} + ) + options = {"only_if": {"section": "attachments", "type": "tags", "includes": "vip:vip"}} + assert is_included(options, v) is True + + +def test_is_included_attachment_matched_via_legacy_type_key(): + # Pre-0.4.0 writer: attachment uses ``type``. Must still resolve. + v = _vcon_with_tags_attachment(json.dumps(["vip:vip"])) + options = {"only_if": {"section": "attachments", "type": "tags", "includes": "vip:vip"}} + assert is_included(options, v) is True + + +def test_is_included_accepts_spec_current_purpose_key_in_filter(): + # Config written against spec 0.4.0 — uses ``purpose`` instead of ``type``. + v = _vcon_with_tags_attachment(json.dumps(["vip:vip"])) + options = {"only_if": {"section": "attachments", "purpose": "tags", "includes": "vip:vip"}} + assert is_included(options, v) is True + + +def test_is_included_returns_true_when_no_only_if(): + v = Vcon.build_new() + assert is_included({}, v) is True + assert is_included(None, v) is True diff --git a/tests/core/test_vcon.py b/tests/core/test_vcon.py index c591d10..331ba58 100644 --- a/tests/core/test_vcon.py +++ b/tests/core/test_vcon.py @@ -44,16 +44,30 @@ def test_add_attachment(): vcon = Vcon.build_new() vcon.add_attachment(body={"key": "value"}, type="test_type") attachment = vcon.find_attachment_by_purpose("test_type") - assert attachment["body"] == {"key": "value"} + # Per spec body is always String — a dict input is JSON-encoded at the + # boundary and ``encoding`` is forced to ``json``. The original Python + # value round-trips via Vcon.decoded_body. + assert attachment["body"] == json.dumps({"key": "value"}) + assert attachment["encoding"] == "json" + assert Vcon.decoded_body(attachment) == {"key": "value"} + + +def test_add_attachment_keeps_freeform_string_body_unchanged(): + vcon = Vcon.build_new() + vcon.add_attachment(body="just text", type="note", encoding="none") + attachment = vcon.find_attachment_by_purpose("note") + assert attachment == {"type": "note", "body": "just text", "encoding": "none"} def test_add_analysis(): vcon = Vcon.build_new() vcon.add_analysis(type="test_type", dialog=[1, 2], vendor="test_vendor", body={"key": "value"}) analysis = vcon.find_analysis_by_type("test_type") - assert analysis["body"] == {"key": "value"} + assert analysis["body"] == json.dumps({"key": "value"}) + assert analysis["encoding"] == "json" assert analysis["dialog"] == [1, 2] assert analysis["vendor"] == "test_vendor" + assert Vcon.decoded_body(analysis) == {"key": "value"} def test_add_party(): @@ -95,8 +109,8 @@ def test_find_attachment_by_type(): warnings.simplefilter("ignore", DeprecationWarning) assert vcon.find_attachment_by_type("test_type") == { "type": "test_type", - "body": {"key": "value"}, - "encoding": "none", + "body": json.dumps({"key": "value"}), + "encoding": "json", } assert vcon.find_attachment_by_type("nonexistent_type") is None @@ -143,7 +157,13 @@ def test_find_attachment_by_purpose_tolerates_missing_keys(): def test_find_analysis_by_type(): vcon = Vcon.build_new() vcon.add_analysis(type="test_type", dialog=[1, 2], vendor="test_vendor", body={"key": "value"}) - assert vcon.find_analysis_by_type("test_type") == {"type": "test_type", "dialog": [1, 2], "vendor": "test_vendor", "body": {"key": "value"}, "encoding": "none"} + assert vcon.find_analysis_by_type("test_type") == { + "type": "test_type", + "dialog": [1, 2], + "vendor": "test_vendor", + "body": json.dumps({"key": "value"}), + "encoding": "json", + } assert vcon.find_analysis_by_type("nonexistent_type") is None @@ -192,4 +212,123 @@ def test_dumps(): def test_error_handling(): with pytest.raises(json.JSONDecodeError): Vcon.build_from_json("invalid_json") - \ No newline at end of file + + +# --------------------------------------------------------------------------- +# Body decoding regression coverage. The Redis store path +# (VconRedis._enforce_spec_on_write) stringifies dict/list bodies to JSON and +# rewrites encoding to "json" per draft-ietf-vcon-vcon-core-02. Read-side +# callers must round-trip through Vcon.decoded_body so they don't see a string +# where they used to see a dict/list. +# --------------------------------------------------------------------------- + + +def test_decoded_body_parses_json_encoded_string(): + entry = {"body": json.dumps({"k": "v"}), "encoding": "json"} + assert Vcon.decoded_body(entry) == {"k": "v"} + + +def test_decoded_body_returns_freeform_string_for_encoding_none(): + # encoding=none means body is a freeform string, no parsing. + entry = {"body": "NEEDS REVIEW: trailing context", "encoding": "none"} + assert Vcon.decoded_body(entry) == "NEEDS REVIEW: trailing context" + + +def test_decoded_body_passes_through_legacy_dict_body(): + # Legacy writers placed dict/list directly under body with encoding=none. + # The helper returns it as-is so callers don't have to special-case. + entry = {"body": {"k": "v"}, "encoding": "none"} + assert Vcon.decoded_body(entry) == {"k": "v"} + + +def test_decoded_body_handles_none_entry(): + assert Vcon.decoded_body(None) is None + assert Vcon.decoded_body({}) is None + + +def test_with_decoded_body_returns_shallow_copy_with_parsed_body(): + entry = { + "type": "transcript", + "dialog": 0, + "body": json.dumps({"transcript": "hello"}), + "encoding": "json", + } + decoded = Vcon.with_decoded_body(entry) + + assert decoded == { + "type": "transcript", + "dialog": 0, + "body": {"transcript": "hello"}, + "encoding": "json", + } + # Source entry untouched — important for callers that don't want to + # mutate the underlying vCon. + assert entry["body"] == json.dumps({"transcript": "hello"}) + + +def test_with_decoded_body_returns_none_for_empty_input(): + assert Vcon.with_decoded_body(None) is None + assert Vcon.with_decoded_body({}) is None + + +def test_add_tag_writes_spec_correct_shape(): + # Per draft-ietf-vcon-vcon-core-02 §2.3.2 body is always a String. The + # tags purpose carries a JSON-encoded list of "name:value" strings. + vcon = Vcon.build_new() + vcon.add_tag("first", "1") + vcon.add_tag("second", "2") + + tags_attachment = vcon.find_attachment_by_purpose("tags") + assert tags_attachment["encoding"] == "json" + assert isinstance(tags_attachment["body"], str) + assert json.loads(tags_attachment["body"]) == ["first:1", "second:2"] + assert vcon.get_tag("first") == "1" + assert vcon.get_tag("second") == "2" + + +def test_add_tag_appends_to_legacy_unstringified_body(): + # A tags attachment in the legacy shape (encoding=none, body is a Python + # list) must still be appendable — add_tag decodes, appends, and rewrites + # in the spec-correct shape. + vcon = Vcon.build_new() + vcon.vcon_dict["attachments"].append( + {"type": "tags", "body": ["legacy:1"], "encoding": "none"} + ) + + vcon.add_tag("new", "2") + + tags_attachment = vcon.find_attachment_by_purpose("tags") + assert tags_attachment["encoding"] == "json" + assert json.loads(tags_attachment["body"]) == ["legacy:1", "new:2"] + + +def test_add_tag_appends_to_already_stringified_body(): + # Reproduces the reported crash path: a tags attachment carrying the + # spec-correct shape (encoding=json + stringified list) must be + # appendable without the previous AttributeError. + vcon = Vcon.build_new() + vcon.vcon_dict["attachments"].append( + {"type": "tags", "body": json.dumps(["first:1"]), "encoding": "json"} + ) + + vcon.add_tag("second", "2") + + assert vcon.get_tag("first") == "1" + assert vcon.get_tag("second") == "2" + + +def test_get_tag_reads_through_json_encoded_body(): + vcon = Vcon.build_new() + vcon.vcon_dict["attachments"].append( + {"type": "tags", "body": json.dumps(["alpha:a", "beta:b"]), "encoding": "json"} + ) + assert vcon.get_tag("alpha") == "a" + assert vcon.get_tag("beta") == "b" + assert vcon.get_tag("missing") is None + + +def test_get_tag_preserves_colons_in_value(): + # split(":", 1) — values containing ':' must not be truncated. + vcon = Vcon.build_new() + vcon.add_tag("url", "https://example.com/path") + assert vcon.get_tag("url") == "https://example.com/path"