From d2ab51ccd0b28456b86e198785a7ad4af8900f50 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 6 May 2026 03:51:39 +0000 Subject: [PATCH 1/3] Initial plan From 7d8cb9d520f9f5f52d31e54c13e86bef98308589 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 6 May 2026 03:59:10 +0000 Subject: [PATCH 2/3] Fix TaskNavigationEfficiencyEvaluator to accept JSON-string inputs from cloud runtime Agent-Logs-Url: https://github.com/Azure/azure-sdk-for-python/sessions/e644088f-cada-4fcf-a537-c3d6c26212eb Co-authored-by: m7md7sien <16615690+m7md7sien@users.noreply.github.com> --- .../azure-ai-evaluation/CHANGELOG.md | 1 + .../_task_navigation_efficiency_validator.py | 116 +++++---- .../_task_navigation_efficiency.py | 42 +++- ...ask_navigation_efficiency_string_inputs.py | 224 ++++++++++++++++++ 4 files changed, 339 insertions(+), 44 deletions(-) create mode 100644 sdk/evaluation/azure-ai-evaluation/tests/unittests/test_task_navigation_efficiency_string_inputs.py diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 91da8edbb732..2db2c7e22710 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -14,6 +14,7 @@ ### Bugs Fixed +- Fixed `_TaskNavigationEfficiencyEvaluator` failing with `'response' must be a list of messages.` when invoked through the cloud Foundry / ACA evaluation runtime, which delivers list/object fields as JSON-encoded strings via dataMapping templating. The evaluator now transparently JSON-decodes string-typed `response` and `ground_truth` inputs before validation. The validator also now accepts the JSON round-tripped tuple form of `ground_truth` (a 2-element `[list, dict]`) as equivalent to the native `(list, dict)` tuple form. - Fixed row classification double-counting in `_calculate_aoai_evaluation_summary` where errored rows were counted separately and could also be counted as passed/failed. Rows are now classified into mutually exclusive buckets with priority: passed > failed > errored > skipped. - Fixed row classification where rows with empty or missing results lists were incorrectly counted as "passed" (the condition `passed_count == len(results) - error_count` evaluated `0 == 0` as True). - Fixed `_get_metric_result` prefix matching where shorter metric names (e.g., `xpia`) could match before longer, more-specific ones (e.g., `xpia_manipulated_content`). Now sorts by length descending for correct longest-prefix matching. diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_validators/_task_navigation_efficiency_validator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_validators/_task_navigation_efficiency_validator.py index dbdda1aa36f0..d9e0ddac7ba0 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_validators/_task_navigation_efficiency_validator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_validators/_task_navigation_efficiency_validator.py @@ -115,76 +115,92 @@ def _validate_response(self, response: Any) -> Optional[EvaluationException]: return None - def _validate_ground_truth(self, ground_truth: Any) -> Optional[EvaluationException]: - """Validate the ground_truth parameter.""" - if not ground_truth: + def _validate_tool_names_and_params( + self, tool_names: Any, parameters: Any + ) -> Optional[EvaluationException]: + """Validate the (tool_names_list, parameters_dict) pair used in the tuple form of ground_truth. + + :param tool_names: The first element of the tuple/2-element list. + :type tool_names: Any + :param parameters: The second element of the tuple/2-element list. + :type parameters: Any + :return: An :class:`EvaluationException` if validation fails, or ``None`` on success. + :rtype: Optional[EvaluationException] + """ + # Validate tool names list + if not isinstance(tool_names, list): return EvaluationException( - message="'ground_truth' parameter is required and cannot be None or empty.", + message="First element of 'ground_truth' tuple must be a list of tool names.", blame=ErrorBlame.USER_ERROR, - category=ErrorCategory.MISSING_FIELD, + category=ErrorCategory.INVALID_VALUE, target=self.error_target, ) - # ground_truth can be either: - # 1. A list of tool names (strings) - # 2. A tuple of (list of tool names, dict of parameters) + if len(tool_names) == 0: + return EvaluationException( + message="Tool names list in 'ground_truth' cannot be empty.", + blame=ErrorBlame.USER_ERROR, + category=ErrorCategory.INVALID_VALUE, + target=self.error_target, + ) - if isinstance(ground_truth, tuple): - # Validate tuple format: (list, dict) - if len(ground_truth) != 2: + for idx, name in enumerate(tool_names): + if not isinstance(name, str): return EvaluationException( - message="When 'ground_truth' is a tuple, it must contain exactly 2 elements: (tool_names_list, parameters_dict).", + message=f"Tool name at index {idx} in 'ground_truth' must be a string, got {type(name).__name__}.", blame=ErrorBlame.USER_ERROR, category=ErrorCategory.INVALID_VALUE, target=self.error_target, ) - tool_names, parameters = ground_truth + # Validate parameters dict + if not isinstance(parameters, dict): + return EvaluationException( + message="Second element of 'ground_truth' tuple must be a dictionary of parameters.", + blame=ErrorBlame.USER_ERROR, + category=ErrorCategory.INVALID_VALUE, + target=self.error_target, + ) - # Validate tool names list - if not isinstance(tool_names, list): + # Validate parameter values are dicts + for tool_name, params in parameters.items(): + if not isinstance(params, dict): return EvaluationException( - message="First element of 'ground_truth' tuple must be a list of tool names.", + message=f"Parameters for tool '{tool_name}' in 'ground_truth' must be a dictionary, got {type(params).__name__}.", blame=ErrorBlame.USER_ERROR, category=ErrorCategory.INVALID_VALUE, target=self.error_target, ) - if len(tool_names) == 0: - return EvaluationException( - message="Tool names list in 'ground_truth' cannot be empty.", - blame=ErrorBlame.USER_ERROR, - category=ErrorCategory.INVALID_VALUE, - target=self.error_target, - ) + return None - for idx, name in enumerate(tool_names): - if not isinstance(name, str): - return EvaluationException( - message=f"Tool name at index {idx} in 'ground_truth' must be a string, got {type(name).__name__}.", - blame=ErrorBlame.USER_ERROR, - category=ErrorCategory.INVALID_VALUE, - target=self.error_target, - ) + def _validate_ground_truth(self, ground_truth: Any) -> Optional[EvaluationException]: + """Validate the ground_truth parameter.""" + if not ground_truth: + return EvaluationException( + message="'ground_truth' parameter is required and cannot be None or empty.", + blame=ErrorBlame.USER_ERROR, + category=ErrorCategory.MISSING_FIELD, + target=self.error_target, + ) - # Validate parameters dict - if not isinstance(parameters, dict): + # ground_truth can be either: + # 1. A list of tool names (strings) + # 2. A tuple of (list of tool names, dict of parameters) + # 3. A 2-element list [list, dict] — the JSON round-tripped form of (2) + + if isinstance(ground_truth, tuple): + # Validate tuple format: (list, dict) + if len(ground_truth) != 2: return EvaluationException( - message="Second element of 'ground_truth' tuple must be a dictionary of parameters.", + message="When 'ground_truth' is a tuple, it must contain exactly 2 elements: (tool_names_list, parameters_dict).", blame=ErrorBlame.USER_ERROR, category=ErrorCategory.INVALID_VALUE, target=self.error_target, ) - # Validate parameter values are dicts - for tool_name, params in parameters.items(): - if not isinstance(params, dict): - return EvaluationException( - message=f"Parameters for tool '{tool_name}' in 'ground_truth' must be a dictionary, got {type(params).__name__}.", - blame=ErrorBlame.USER_ERROR, - category=ErrorCategory.INVALID_VALUE, - target=self.error_target, - ) + tool_names, parameters = ground_truth + return self._validate_tool_names_and_params(tool_names, parameters) elif isinstance(ground_truth, list): # Validate list of tool names @@ -196,6 +212,20 @@ def _validate_ground_truth(self, ground_truth: Any) -> Optional[EvaluationExcept target=self.error_target, ) + if all(isinstance(name, str) for name in ground_truth): + # Plain list of tool name strings — nothing further to validate. + return None + + if ( + len(ground_truth) == 2 + and isinstance(ground_truth[0], list) + and isinstance(ground_truth[1], dict) + ): + # 2-element list [list, dict] — the JSON round-tripped form of a + # (tool_names_list, parameters_dict) tuple. Validate it the same way. + return self._validate_tool_names_and_params(ground_truth[0], ground_truth[1]) + + # Identify the first non-string element to give a helpful error. for idx, name in enumerate(ground_truth): if not isinstance(name, str): return EvaluationException( diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py index 6d9cb8a4234c..63bc2100e106 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py @@ -139,6 +139,36 @@ def __init__( super().__init__(threshold=1.0) + @staticmethod + def _maybe_json_decode(value: Any, field_name: str) -> Any: + """Decode a JSON-encoded string into a Python object. + + The cloud Foundry / ACA evaluation runtime delivers list/object fields + to code-type evaluators as JSON-encoded strings via dataMapping + templating (e.g. ``${data.response}``). Accept that shape transparently + so that callers using either the in-process Python SDK or the cloud + runtime get consistent behaviour. + + :param value: The value to potentially decode. + :type value: Any + :param field_name: The field name used in error messages. + :type field_name: str + :return: The decoded Python object, or the original value if not a string. + :rtype: Any + :raises EvaluationException: If ``value`` is a string but not valid JSON. + """ + if isinstance(value, str): + try: + return json.loads(value) + except json.JSONDecodeError as exc: + raise EvaluationException( + message=(f"'{field_name}' arrived as a string but is not valid JSON: {exc}"), + internal_message=str(exc), + target=ErrorTarget.TASK_NAVIGATION_EFFICIENCY_EVALUATOR, + category=ErrorCategory.INVALID_VALUE, + ) + return value + @override async def _real_call(self, **kwargs): """The asynchronous call where real end-to-end evaluation logic is performed. @@ -148,6 +178,10 @@ async def _real_call(self, **kwargs): :return: The evaluation result. :rtype: Dict[str, Union[float, str, Dict[str, float]]] """ + if "response" in kwargs: + kwargs["response"] = self._maybe_json_decode(kwargs["response"], "response") + if "ground_truth" in kwargs: + kwargs["ground_truth"] = self._maybe_json_decode(kwargs["ground_truth"], "ground_truth") self._validator.validate_eval_input(kwargs) return await super()._real_call(**kwargs) @@ -275,8 +309,14 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str, Dict[s ground_truth_names = [] ground_truth_params_dict: Dict[str, Dict[str, Any]] = {} - if isinstance(ground_truth, tuple) and len(ground_truth) == 2: + if (isinstance(ground_truth, tuple) and len(ground_truth) == 2) or ( + isinstance(ground_truth, list) + and len(ground_truth) == 2 + and isinstance(ground_truth[0], list) + and isinstance(ground_truth[1], dict) + ): # Tuple format: (tool_names, parameters_dict) + # Also handles a 2-element list [list, dict] which is the JSON round-tripped form of a tuple. tool_names_list, params_dict = ground_truth if not isinstance(tool_names_list, list) or not all(isinstance(name, str) for name in tool_names_list): diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_task_navigation_efficiency_string_inputs.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_task_navigation_efficiency_string_inputs.py new file mode 100644 index 000000000000..7a310368ff1a --- /dev/null +++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_task_navigation_efficiency_string_inputs.py @@ -0,0 +1,224 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +"""Unit tests for JSON-string input handling in _TaskNavigationEfficiencyEvaluator. + +The cloud Foundry / ACA evaluation runtime delivers list/object fields to code-type +evaluators as JSON-encoded strings via dataMapping templating (e.g. ${data.response}). +These tests verify that the evaluator transparently accepts and decodes those strings. +""" + +import json + +import pytest + +from azure.ai.evaluation._evaluators._task_navigation_efficiency import ( + _TaskNavigationEfficiencyEvaluator, + _TaskNavigationEfficiencyMatchingMode, +) +from azure.ai.evaluation._exceptions import ErrorCategory, EvaluationException + + +# --------------------------------------------------------------------------- +# Fixtures / shared data +# --------------------------------------------------------------------------- + +RESPONSE_LIST = [ + { + "role": "assistant", + "content": [ + { + "type": "tool_call", + "tool_call_id": "call_1", + "name": "search", + "arguments": {"query": "weather", "location": "NYC"}, + } + ], + }, + { + "role": "assistant", + "content": [ + { + "type": "tool_call", + "tool_call_id": "call_2", + "name": "format_result", + "arguments": {"format": "json"}, + } + ], + }, +] + +GROUND_TRUTH_NAMES = ["search", "format_result"] + +GROUND_TRUTH_TUPLE = ( + ["search", "format_result"], + { + "search": {"query": "weather", "location": "NYC"}, + "format_result": {"format": "json"}, + }, +) + +# JSON-round-tripped form of the tuple above — JSON has no tuple type, so it becomes a list. +GROUND_TRUTH_LIST_FORM = [ + ["search", "format_result"], + { + "search": {"query": "weather", "location": "NYC"}, + "format_result": {"format": "json"}, + }, +] + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +@pytest.mark.unittest +class TestTaskNavigationEfficiencyStringInputs: + """Tests covering JSON-string input acceptance in _TaskNavigationEfficiencyEvaluator.""" + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + @staticmethod + def _make_evaluator(**kwargs) -> _TaskNavigationEfficiencyEvaluator: + return _TaskNavigationEfficiencyEvaluator( + matching_mode=_TaskNavigationEfficiencyMatchingMode.EXACT_MATCH, **kwargs + ) + + # ------------------------------------------------------------------ + # Happy path — native Python objects (existing behaviour unchanged) + # ------------------------------------------------------------------ + + @pytest.mark.asyncio + async def test_native_list_inputs_still_work(self): + """Existing happy path: native list response and list ground_truth.""" + evaluator = self._make_evaluator() + result = await evaluator._real_call( + response=RESPONSE_LIST, + ground_truth=GROUND_TRUTH_NAMES, + ) + assert result["task_navigation_efficiency_passed"] is True + assert result["task_navigation_efficiency_result"] == "pass" + + @pytest.mark.asyncio + async def test_native_tuple_ground_truth_still_works(self): + """Existing happy path: native tuple ground_truth with parameter matching.""" + evaluator = self._make_evaluator() + result = await evaluator._real_call( + response=RESPONSE_LIST, + ground_truth=GROUND_TRUTH_TUPLE, + ) + assert result["task_navigation_efficiency_passed"] is True + + # ------------------------------------------------------------------ + # New: JSON-string inputs accepted transparently + # ------------------------------------------------------------------ + + @pytest.mark.asyncio + async def test_json_string_response_and_list_ground_truth(self): + """Cloud runtime path: response and ground_truth arrive as JSON-encoded strings.""" + evaluator = self._make_evaluator() + result = await evaluator._real_call( + response=json.dumps(RESPONSE_LIST), + ground_truth=json.dumps(GROUND_TRUTH_NAMES), + ) + assert result["task_navigation_efficiency_passed"] is True + assert result["task_navigation_efficiency_result"] == "pass" + + @pytest.mark.asyncio + async def test_json_string_inputs_match_native_result(self): + """JSON-string inputs produce identical result to native Python object inputs.""" + evaluator = self._make_evaluator() + + native_result = await evaluator._real_call( + response=RESPONSE_LIST, + ground_truth=GROUND_TRUTH_NAMES, + ) + string_result = await evaluator._real_call( + response=json.dumps(RESPONSE_LIST), + ground_truth=json.dumps(GROUND_TRUTH_NAMES), + ) + + assert native_result["task_navigation_efficiency_passed"] == string_result["task_navigation_efficiency_passed"] + assert native_result["task_navigation_efficiency_result"] == string_result["task_navigation_efficiency_result"] + + @pytest.mark.asyncio + async def test_json_string_tuple_form_ground_truth(self): + """JSON round-tripped tuple-form ground_truth (2-element list [list, dict]) is accepted.""" + evaluator = self._make_evaluator() + + # Simulate the JSON round-trip: tuple → JSON string → 2-element list + result = await evaluator._real_call( + response=json.dumps(RESPONSE_LIST), + ground_truth=json.dumps(GROUND_TRUTH_LIST_FORM), + ) + assert result["task_navigation_efficiency_passed"] is True + + @pytest.mark.asyncio + async def test_json_string_tuple_form_uses_parameter_matching(self): + """JSON round-tripped tuple form triggers parameter matching (same as native tuple).""" + evaluator = self._make_evaluator() + + # Native tuple form — parameter mismatch → should fail + wrong_params_tuple = ( + ["search", "format_result"], + { + "search": {"query": "WRONG_QUERY", "location": "NYC"}, + "format_result": {"format": "json"}, + }, + ) + native_result = await evaluator._real_call( + response=RESPONSE_LIST, + ground_truth=wrong_params_tuple, + ) + assert native_result["task_navigation_efficiency_passed"] is False + + # Same test via JSON-string path — must also fail + string_result = await evaluator._real_call( + response=json.dumps(RESPONSE_LIST), + ground_truth=json.dumps(list(wrong_params_tuple)), + ) + assert string_result["task_navigation_efficiency_passed"] is False + + # ------------------------------------------------------------------ + # Error cases + # ------------------------------------------------------------------ + + @pytest.mark.asyncio + async def test_invalid_json_in_response_raises_evaluation_exception(self): + """A non-JSON string in 'response' raises EvaluationException with INVALID_VALUE.""" + evaluator = self._make_evaluator() + with pytest.raises(EvaluationException) as exc_info: + await evaluator._real_call( + response="not valid json {{{{", + ground_truth=GROUND_TRUTH_NAMES, + ) + error = exc_info.value + assert error.category == ErrorCategory.INVALID_VALUE + assert "arrived as a string but is not valid JSON" in error.message + + @pytest.mark.asyncio + async def test_non_string_non_list_response_raises_original_error(self): + """A non-string, non-list value (e.g. 42) in 'response' raises the original error.""" + evaluator = self._make_evaluator() + with pytest.raises(EvaluationException) as exc_info: + await evaluator._real_call( + response=42, # type: ignore[arg-type] + ground_truth=GROUND_TRUTH_NAMES, + ) + assert "'response' must be a list of messages." in exc_info.value.message + + @pytest.mark.asyncio + async def test_invalid_json_in_ground_truth_raises_evaluation_exception(self): + """A non-JSON string in 'ground_truth' raises EvaluationException with INVALID_VALUE.""" + evaluator = self._make_evaluator() + with pytest.raises(EvaluationException) as exc_info: + await evaluator._real_call( + response=RESPONSE_LIST, + ground_truth="[not json", + ) + error = exc_info.value + assert error.category == ErrorCategory.INVALID_VALUE + assert "arrived as a string but is not valid JSON" in error.message From 8b90a2ee44f526c8813c0335295b311a541fc60e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 6 May 2026 04:00:05 +0000 Subject: [PATCH 3/3] Address code review: improve CHANGELOG and docstring clarity Agent-Logs-Url: https://github.com/Azure/azure-sdk-for-python/sessions/e644088f-cada-4fcf-a537-c3d6c26212eb Co-authored-by: m7md7sien <16615690+m7md7sien@users.noreply.github.com> --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 8 +++++++- .../_task_navigation_efficiency.py | 10 +++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 2db2c7e22710..17825b2ae956 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -14,7 +14,13 @@ ### Bugs Fixed -- Fixed `_TaskNavigationEfficiencyEvaluator` failing with `'response' must be a list of messages.` when invoked through the cloud Foundry / ACA evaluation runtime, which delivers list/object fields as JSON-encoded strings via dataMapping templating. The evaluator now transparently JSON-decodes string-typed `response` and `ground_truth` inputs before validation. The validator also now accepts the JSON round-tripped tuple form of `ground_truth` (a 2-element `[list, dict]`) as equivalent to the native `(list, dict)` tuple form. +- Fixed `_TaskNavigationEfficiencyEvaluator` failing with `'response' must be a list of messages.` + when invoked through the cloud Foundry / ACA evaluation runtime. The runtime serializes + list/object dataMapping fields to JSON-encoded strings before calling the Python evaluator; + the evaluator now transparently JSON-decodes such string inputs before validation. +- Fixed the validator rejecting the JSON round-tripped form of the `ground_truth` tuple. + JSON has no tuple type, so a `(list, dict)` tuple round-trips to a `[list, dict]` 2-element + list; both forms are now accepted equivalently. - Fixed row classification double-counting in `_calculate_aoai_evaluation_summary` where errored rows were counted separately and could also be counted as passed/failed. Rows are now classified into mutually exclusive buckets with priority: passed > failed > errored > skipped. - Fixed row classification where rows with empty or missing results lists were incorrectly counted as "passed" (the condition `passed_count == len(results) - error_count` evaluated `0 == 0` as True). - Fixed `_get_metric_result` prefix matching where shorter metric names (e.g., `xpia`) could match before longer, more-specific ones (e.g., `xpia_manipulated_content`). Now sorts by length descending for correct longest-prefix matching. diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py index 63bc2100e106..3509cf2b2dcb 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py @@ -143,11 +143,11 @@ def __init__( def _maybe_json_decode(value: Any, field_name: str) -> Any: """Decode a JSON-encoded string into a Python object. - The cloud Foundry / ACA evaluation runtime delivers list/object fields - to code-type evaluators as JSON-encoded strings via dataMapping - templating (e.g. ``${data.response}``). Accept that shape transparently - so that callers using either the in-process Python SDK or the cloud - runtime get consistent behaviour. + The cloud Foundry / ACA evaluation runtime serializes list/object fields + to JSON-encoded strings via ``dataMapping`` template substitution + (e.g. ``${data.response}``) before invoking the Python evaluator entry-point. + This method accepts that shape transparently so callers using either the + in-process Python SDK or the cloud runtime get consistent behaviour. :param value: The value to potentially decode. :type value: Any