From a0bb65c870d99bce9afe9e2c74d42339616cc32c Mon Sep 17 00:00:00 2001 From: Raman369AI Date: Sat, 9 May 2026 23:07:37 -0500 Subject: [PATCH 1/3] fix: terminate infinite retry loop in LoadSkillResourceTool on RESOURCE_NOT_FOUND When load_skill_resource returned RESOURCE_NOT_FOUND, the LLM treated it as a transient soft error and retried the same path indefinitely, producing runaway invocations and large API bills. Two complementary guards are added: 1. Code (LoadSkillResourceTool.run_async): an invocation-scoped retry guard tracks already-attempted (skill, path) pairs in tool_context.state under a temp:_adk_skill_resource_failed_paths_ key. The temp: prefix prevents persistence to durable session storage; the invocation_id suffix prevents leakage across invocations on in-memory session backends (where temp keys are not auto-cleared). A second call on the same path within the same invocation returns RESOURCE_NOT_FOUND_FATAL with an explicit stop instruction, giving the LLM an unambiguous terminal signal. 2. Prompt (DEFAULT_SKILL_SYSTEM_INSTRUCTION): adds a rule prohibiting retrying the same path after any error, and a scope boundary clarifying that load_skill_resource is for skill-bundled files only, not for runtime user input (a common source of hallucinated paths). Neither guard alone is sufficient: a code-only stop produces confusing downstream behavior when the LLM has no semantic understanding of why to stop; a prompt-only guard can be ignored under imperfect system prompts. Both layers are required for defense-in-depth. Tests cover: repeat-failure escalation, distinct-path soft errors not escalating, cross-invocation isolation with shared session state, and the temp: prefix on tracking keys. --- src/google/adk/tools/skill_toolset.py | 26 +++++- tests/unittests/tools/test_skill_toolset.py | 90 ++++++++++++++++++++- 2 files changed, 114 insertions(+), 2 deletions(-) diff --git a/src/google/adk/tools/skill_toolset.py b/src/google/adk/tools/skill_toolset.py index 08dc937d7b..0a95a263a5 100644 --- a/src/google/adk/tools/skill_toolset.py +++ b/src/google/adk/tools/skill_toolset.py @@ -80,10 +80,14 @@ "of them in order.\n" "3. The `load_skill_resource` tool is for viewing files within a " "skill's directory (e.g., `references/*`, `assets/*`, `scripts/*`). " - "Do NOT use other tools to access these files.\n" + "It is ONLY for skill-bundled files — do NOT use it to access " + "documents or files provided by the user at runtime. Do NOT use " + "other tools to access skill files.\n" "4. Use `run_skill_script` to run scripts from a skill's `scripts/` " "directory. Use `load_skill_resource` to view script content first if " "needed.\n" + "5. If `load_skill_resource` returns any error, do not retry the " + "same path. Report the error to the user and stop.\n" ) @@ -261,6 +265,26 @@ async def run_async( } if content is None: + # Invocation-scoped retry guard. The `temp:` prefix prevents persistence + # to durable session storage; appending invocation_id ensures the guard + # does not leak across invocations on in-memory session backends (where + # temp keys are not auto-cleared). + failed_key = ( + f"temp:_adk_skill_resource_failed_paths_{tool_context.invocation_id}" + ) + failed_paths = list(tool_context.state.get(failed_key) or []) + resource_id = f"{skill_name}:{file_path}" + if resource_id in failed_paths: + return { + "error": ( + f"Resource '{file_path}' not found in skill '{skill_name}'." + " This path was already attempted and failed. Do not retry" + " — report the error to the user and stop." + ), + "error_code": "RESOURCE_NOT_FOUND_FATAL", + } + failed_paths.append(resource_id) + tool_context.state[failed_key] = failed_paths return { "error": f"Resource '{file_path}' not found in skill '{skill_name}'.", "error_code": "RESOURCE_NOT_FOUND", diff --git a/tests/unittests/tools/test_skill_toolset.py b/tests/unittests/tools/test_skill_toolset.py index 7d60110177..e1a03ec8f9 100644 --- a/tests/unittests/tools/test_skill_toolset.py +++ b/tests/unittests/tools/test_skill_toolset.py @@ -473,10 +473,97 @@ async def test_scripts_resource_not_found(mock_skill1, tool_context_instance): assert result["error_code"] == "RESOURCE_NOT_FOUND" +@pytest.mark.asyncio +async def test_load_resource_repeated_failure_escalates_to_fatal(mock_skill1): + """Second call with same missing path within an invocation returns RESOURCE_NOT_FOUND_FATAL.""" + toolset = skill_toolset.SkillToolset([mock_skill1]) + tool = skill_toolset.LoadSkillResourceTool(toolset) + ctx = _make_tool_context_with_agent() + + args = {"skill_name": "skill1", "file_path": "references/nonexistent.md"} + + result1 = await tool.run_async(args=args, tool_context=ctx) + assert result1["error_code"] == "RESOURCE_NOT_FOUND" + + result2 = await tool.run_async(args=args, tool_context=ctx) + assert result2["error_code"] == "RESOURCE_NOT_FOUND_FATAL" + assert "Do not retry" in result2["error"] + assert "stop" in result2["error"].lower() + + +@pytest.mark.asyncio +async def test_load_resource_different_paths_each_soft_fail(mock_skill1): + """Different missing paths each get a soft error (no cross-path escalation).""" + toolset = skill_toolset.SkillToolset([mock_skill1]) + tool = skill_toolset.LoadSkillResourceTool(toolset) + ctx = _make_tool_context_with_agent() + + result1 = await tool.run_async( + args={"skill_name": "skill1", "file_path": "references/missing_a.md"}, + tool_context=ctx, + ) + assert result1["error_code"] == "RESOURCE_NOT_FOUND" + + result2 = await tool.run_async( + args={"skill_name": "skill1", "file_path": "references/missing_b.md"}, + tool_context=ctx, + ) + assert result2["error_code"] == "RESOURCE_NOT_FOUND" + + +@pytest.mark.asyncio +async def test_load_resource_failures_isolated_per_invocation(mock_skill1): + """Failed-path tracking does not leak across invocations. + + A path that hit a soft RESOURCE_NOT_FOUND in invocation A must still + return RESOURCE_NOT_FOUND (not _FATAL) on its first attempt in + invocation B, even when sharing session state. + """ + toolset = skill_toolset.SkillToolset([mock_skill1]) + tool = skill_toolset.LoadSkillResourceTool(toolset) + + shared_state = {} + ctx_a = _make_tool_context_with_agent(invocation_id="inv_a") + ctx_a.state = shared_state + ctx_b = _make_tool_context_with_agent(invocation_id="inv_b") + ctx_b.state = shared_state + + args = {"skill_name": "skill1", "file_path": "references/typo.md"} + + result_a = await tool.run_async(args=args, tool_context=ctx_a) + assert result_a["error_code"] == "RESOURCE_NOT_FOUND" + + result_b = await tool.run_async(args=args, tool_context=ctx_b) + assert result_b["error_code"] == "RESOURCE_NOT_FOUND" + + # And the fatal escalation still works within a single invocation. + result_b2 = await tool.run_async(args=args, tool_context=ctx_b) + assert result_b2["error_code"] == "RESOURCE_NOT_FOUND_FATAL" + + +@pytest.mark.asyncio +async def test_load_resource_failed_paths_use_temp_prefix(mock_skill1): + """Tracking key uses the `temp:` prefix so it is not persisted.""" + toolset = skill_toolset.SkillToolset([mock_skill1]) + tool = skill_toolset.LoadSkillResourceTool(toolset) + ctx = _make_tool_context_with_agent() + + await tool.run_async( + args={"skill_name": "skill1", "file_path": "references/missing.md"}, + tool_context=ctx, + ) + + # Every key written by the retry guard must start with `temp:` so it gets + # trimmed from the event delta and never reaches durable storage. + guard_keys = [k for k in ctx.state if "skill_resource_failed_paths" in k] + assert guard_keys, "Retry guard did not write a tracking key." + assert all(k.startswith("temp:") for k in guard_keys) + + # RunSkillScriptTool tests -def _make_tool_context_with_agent(agent=None): +def _make_tool_context_with_agent(agent=None, invocation_id="test_invocation"): """Creates a mock ToolContext with _invocation_context.agent.""" ctx = mock.MagicMock(spec=tool_context.ToolContext) ctx._invocation_context = mock.MagicMock() @@ -484,6 +571,7 @@ def _make_tool_context_with_agent(agent=None): ctx._invocation_context.agent.name = "test_agent" ctx._invocation_context.agent_states = {} ctx.agent_name = "test_agent" + ctx.invocation_id = invocation_id ctx.state = {} return ctx From f89e4b6d9af6a76759ebb20334bcebe6e63ba999 Mon Sep 17 00:00:00 2001 From: Raman369AI Date: Tue, 12 May 2026 21:54:57 -0500 Subject: [PATCH 2/3] fix: switch retry guard to invocation-wide counter; fix logger pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace per-path failed-paths list with a total RESOURCE_NOT_FOUND counter (temp:_adk_skill_resource_not_found_count_). Live tracing showed the LLM hallucinates a different path on every retry, so a per-path guard never fired. The counter catches any second failure regardless of path. - Fix forbidden logger pattern in oauth_mcp_server.py (getLogger(__name__) → getLogger('google_adk.' + __name__)). - Update tests to match counter-based logic: rename/rewrite guard tests, add test_load_resource_different_path_also_escalates_to_fatal to cover the hallucinated-variant pattern, fix mock state issue in parametrized test and test_scripts_resource_not_found. --- .../mcp_toolset_auth/oauth_mcp_server.py | 2 +- src/google/adk/tools/skill_toolset.py | 25 +++-- tests/unittests/tools/test_skill_toolset.py | 92 ++++++++++++------- 3 files changed, 72 insertions(+), 47 deletions(-) diff --git a/contributing/samples/mcp_toolset_auth/oauth_mcp_server.py b/contributing/samples/mcp_toolset_auth/oauth_mcp_server.py index 0eeab51c6a..b48046e20e 100644 --- a/contributing/samples/mcp_toolset_auth/oauth_mcp_server.py +++ b/contributing/samples/mcp_toolset_auth/oauth_mcp_server.py @@ -33,7 +33,7 @@ import uvicorn logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) +logger = logging.getLogger('google_adk.' + __name__) # Expected OAuth token for testing VALID_TOKEN = 'test_access_token_12345' diff --git a/src/google/adk/tools/skill_toolset.py b/src/google/adk/tools/skill_toolset.py index 0a95a263a5..b56211af2c 100644 --- a/src/google/adk/tools/skill_toolset.py +++ b/src/google/adk/tools/skill_toolset.py @@ -265,26 +265,25 @@ async def run_async( } if content is None: - # Invocation-scoped retry guard. The `temp:` prefix prevents persistence - # to durable session storage; appending invocation_id ensures the guard - # does not leak across invocations on in-memory session backends (where - # temp keys are not auto-cleared). - failed_key = ( - f"temp:_adk_skill_resource_failed_paths_{tool_context.invocation_id}" + # Invocation-scoped failure counter. Counts RESOURCE_NOT_FOUND across ALL + # paths so the guard fires even when the LLM hallucinates a different path + # on each retry. The `temp:` prefix prevents persistence to durable + # session storage; invocation_id isolates in-memory backends. + counter_key = ( + f"temp:_adk_skill_resource_not_found_count_{tool_context.invocation_id}" ) - failed_paths = list(tool_context.state.get(failed_key) or []) - resource_id = f"{skill_name}:{file_path}" - if resource_id in failed_paths: + fail_count = int(tool_context.state.get(counter_key) or 0) + 1 + tool_context.state[counter_key] = fail_count + if fail_count > 1: return { "error": ( f"Resource '{file_path}' not found in skill '{skill_name}'." - " This path was already attempted and failed. Do not retry" - " — report the error to the user and stop." + f" This is resource lookup failure #{fail_count} this" + " invocation. Do not retry any path — report the error to" + " the user and stop." ), "error_code": "RESOURCE_NOT_FOUND_FATAL", } - failed_paths.append(resource_id) - tool_context.state[failed_key] = failed_paths return { "error": f"Resource '{file_path}' not found in skill '{skill_name}'.", "error_code": "RESOURCE_NOT_FOUND", diff --git a/tests/unittests/tools/test_skill_toolset.py b/tests/unittests/tools/test_skill_toolset.py index e1a03ec8f9..523feab35d 100644 --- a/tests/unittests/tools/test_skill_toolset.py +++ b/tests/unittests/tools/test_skill_toolset.py @@ -331,16 +331,10 @@ async def test_load_skill_run_async_state_none( "error_code": "SKILL_NOT_FOUND", }, ), - ( - {"skill_name": "skill1", "file_path": "references/other.md"}, - { - "error": ( - "Resource 'references/other.md' not found in skill" - " 'skill1'." - ), - "error_code": "RESOURCE_NOT_FOUND", - }, - ), + # RESOURCE_NOT_FOUND is tested separately in + # test_load_resource_first_missing_returns_soft_error because the + # counter guard requires a real state dict (mock state.get() returns a + # truthy MagicMock that int() coerces to 1, skipping the soft path). ( {"skill_name": "skill1", "file_path": "invalid/path.txt"}, { @@ -463,19 +457,36 @@ def test_duplicate_skill_name_raises(mock_skill1): @pytest.mark.asyncio -async def test_scripts_resource_not_found(mock_skill1, tool_context_instance): +async def test_scripts_resource_not_found(mock_skill1): toolset = skill_toolset.SkillToolset([mock_skill1]) tool = skill_toolset.LoadSkillResourceTool(toolset) + ctx = _make_tool_context_with_agent() result = await tool.run_async( args={"skill_name": "skill1", "file_path": "scripts/nonexistent.sh"}, - tool_context=tool_context_instance, + tool_context=ctx, ) assert result["error_code"] == "RESOURCE_NOT_FOUND" +@pytest.mark.asyncio +async def test_load_resource_first_missing_returns_soft_error(mock_skill1): + """First RESOURCE_NOT_FOUND in an invocation returns the soft error code.""" + toolset = skill_toolset.SkillToolset([mock_skill1]) + tool = skill_toolset.LoadSkillResourceTool(toolset) + ctx = _make_tool_context_with_agent() + result = await tool.run_async( + args={"skill_name": "skill1", "file_path": "references/other.md"}, + tool_context=ctx, + ) + assert result["error_code"] == "RESOURCE_NOT_FOUND" + assert result["error"] == ( + "Resource 'references/other.md' not found in skill 'skill1'." + ) + + @pytest.mark.asyncio async def test_load_resource_repeated_failure_escalates_to_fatal(mock_skill1): - """Second call with same missing path within an invocation returns RESOURCE_NOT_FOUND_FATAL.""" + """Any second RESOURCE_NOT_FOUND within an invocation returns RESOURCE_NOT_FOUND_FATAL.""" toolset = skill_toolset.SkillToolset([mock_skill1]) tool = skill_toolset.LoadSkillResourceTool(toolset) ctx = _make_tool_context_with_agent() @@ -489,11 +500,16 @@ async def test_load_resource_repeated_failure_escalates_to_fatal(mock_skill1): assert result2["error_code"] == "RESOURCE_NOT_FOUND_FATAL" assert "Do not retry" in result2["error"] assert "stop" in result2["error"].lower() + assert "failure #2" in result2["error"] @pytest.mark.asyncio -async def test_load_resource_different_paths_each_soft_fail(mock_skill1): - """Different missing paths each get a soft error (no cross-path escalation).""" +async def test_load_resource_different_path_also_escalates_to_fatal(mock_skill1): + """A different missing path on the second call still escalates to RESOURCE_NOT_FOUND_FATAL. + + The counter is path-agnostic: any second not-found within the same invocation + is fatal, even when the LLM hallucinates a different path on each retry. + """ toolset = skill_toolset.SkillToolset([mock_skill1]) tool = skill_toolset.LoadSkillResourceTool(toolset) ctx = _make_tool_context_with_agent() @@ -508,16 +524,17 @@ async def test_load_resource_different_paths_each_soft_fail(mock_skill1): args={"skill_name": "skill1", "file_path": "references/missing_b.md"}, tool_context=ctx, ) - assert result2["error_code"] == "RESOURCE_NOT_FOUND" + assert result2["error_code"] == "RESOURCE_NOT_FOUND_FATAL" + assert "Do not retry" in result2["error"] @pytest.mark.asyncio async def test_load_resource_failures_isolated_per_invocation(mock_skill1): - """Failed-path tracking does not leak across invocations. + """Failure counter does not leak across invocations. - A path that hit a soft RESOURCE_NOT_FOUND in invocation A must still - return RESOURCE_NOT_FOUND (not _FATAL) on its first attempt in - invocation B, even when sharing session state. + A RESOURCE_NOT_FOUND in invocation A must not increment invocation B's + counter; invocation B's first missing-resource call must still return the + soft error, even when both invocations share the same session state dict. """ toolset = skill_toolset.SkillToolset([mock_skill1]) tool = skill_toolset.LoadSkillResourceTool(toolset) @@ -528,22 +545,31 @@ async def test_load_resource_failures_isolated_per_invocation(mock_skill1): ctx_b = _make_tool_context_with_agent(invocation_id="inv_b") ctx_b.state = shared_state - args = {"skill_name": "skill1", "file_path": "references/typo.md"} - - result_a = await tool.run_async(args=args, tool_context=ctx_a) + # invocation A: one failure — counter for inv_a reaches 1 (soft). + result_a = await tool.run_async( + args={"skill_name": "skill1", "file_path": "references/typo.md"}, + tool_context=ctx_a, + ) assert result_a["error_code"] == "RESOURCE_NOT_FOUND" - result_b = await tool.run_async(args=args, tool_context=ctx_b) - assert result_b["error_code"] == "RESOURCE_NOT_FOUND" + # invocation B, first attempt (different path) — counter for inv_b = 1 (soft). + result_b1 = await tool.run_async( + args={"skill_name": "skill1", "file_path": "references/typo.md"}, + tool_context=ctx_b, + ) + assert result_b1["error_code"] == "RESOURCE_NOT_FOUND" - # And the fatal escalation still works within a single invocation. - result_b2 = await tool.run_async(args=args, tool_context=ctx_b) + # invocation B, second attempt (different path) — counter for inv_b = 2 (fatal). + result_b2 = await tool.run_async( + args={"skill_name": "skill1", "file_path": "references/other.md"}, + tool_context=ctx_b, + ) assert result_b2["error_code"] == "RESOURCE_NOT_FOUND_FATAL" @pytest.mark.asyncio -async def test_load_resource_failed_paths_use_temp_prefix(mock_skill1): - """Tracking key uses the `temp:` prefix so it is not persisted.""" +async def test_load_resource_counter_uses_temp_prefix(mock_skill1): + """Failure-counter key uses the `temp:` prefix so it is not persisted.""" toolset = skill_toolset.SkillToolset([mock_skill1]) tool = skill_toolset.LoadSkillResourceTool(toolset) ctx = _make_tool_context_with_agent() @@ -553,10 +579,10 @@ async def test_load_resource_failed_paths_use_temp_prefix(mock_skill1): tool_context=ctx, ) - # Every key written by the retry guard must start with `temp:` so it gets - # trimmed from the event delta and never reaches durable storage. - guard_keys = [k for k in ctx.state if "skill_resource_failed_paths" in k] - assert guard_keys, "Retry guard did not write a tracking key." + # The counter key must start with `temp:` so it is trimmed from the event + # delta and never reaches durable storage. + guard_keys = [k for k in ctx.state if "skill_resource_not_found_count" in k] + assert guard_keys, "Failure counter did not write a tracking key." assert all(k.startswith("temp:") for k in guard_keys) From 755b2387fce34fca90dd3c187f7656dce03e2e4f Mon Sep 17 00:00:00 2001 From: Raman369AI Date: Tue, 12 May 2026 22:06:25 -0500 Subject: [PATCH 3/3] style: apply pyink formatting --- src/google/adk/tools/skill_toolset.py | 4 +--- tests/unittests/tools/test_skill_toolset.py | 4 +++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/google/adk/tools/skill_toolset.py b/src/google/adk/tools/skill_toolset.py index b56211af2c..d9bf2da705 100644 --- a/src/google/adk/tools/skill_toolset.py +++ b/src/google/adk/tools/skill_toolset.py @@ -269,9 +269,7 @@ async def run_async( # paths so the guard fires even when the LLM hallucinates a different path # on each retry. The `temp:` prefix prevents persistence to durable # session storage; invocation_id isolates in-memory backends. - counter_key = ( - f"temp:_adk_skill_resource_not_found_count_{tool_context.invocation_id}" - ) + counter_key = f"temp:_adk_skill_resource_not_found_count_{tool_context.invocation_id}" fail_count = int(tool_context.state.get(counter_key) or 0) + 1 tool_context.state[counter_key] = fail_count if fail_count > 1: diff --git a/tests/unittests/tools/test_skill_toolset.py b/tests/unittests/tools/test_skill_toolset.py index 523feab35d..1a01587560 100644 --- a/tests/unittests/tools/test_skill_toolset.py +++ b/tests/unittests/tools/test_skill_toolset.py @@ -504,7 +504,9 @@ async def test_load_resource_repeated_failure_escalates_to_fatal(mock_skill1): @pytest.mark.asyncio -async def test_load_resource_different_path_also_escalates_to_fatal(mock_skill1): +async def test_load_resource_different_path_also_escalates_to_fatal( + mock_skill1, +): """A different missing path on the second call still escalates to RESOURCE_NOT_FOUND_FATAL. The counter is path-agnostic: any second not-found within the same invocation