Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/google/adk/tools/skill_toolset.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@
"4. Use `run_skill_script` to run scripts from a skill's `scripts/` "
"directory. Use `load_skill_resource` to view script content first if "
"needed.\n"
"5. If `run_skill_script` returns an error (for example "
"`SCRIPT_NOT_FOUND`), do not retry the same script or guess a different "
"script path. Report the error to the user and stop.\n"
)


Expand Down Expand Up @@ -840,6 +843,25 @@ async def run_async(
script = skill.resources.get_script(file_path)

if script is None:
# Invocation-scoped failure counter. Counts SCRIPT_NOT_FOUND across ALL
# paths so the guard fires even when the LLM hallucinates a different
# script path on each retry. The `temp:` prefix prevents persistence to
# durable session storage; invocation_id isolates in-memory backends.
counter_key = (
f"temp:_adk_skill_script_not_found_count_{tool_context.invocation_id}"
)
fail_count = int(tool_context.state.get(counter_key) or 0) + 1
tool_context.state[counter_key] = fail_count
if fail_count > 1:
return {
"error": (
f"Script '{file_path}' not found in skill '{skill_name}'."
f" This is script lookup failure #{fail_count} this"
" invocation. Do not retry any script path — report the"
" error to the user and stop."
),
"error_code": "SCRIPT_NOT_FOUND_FATAL",
}
return {
"error": f"Script '{file_path}' not found in skill '{skill_name}'.",
"error_code": "SCRIPT_NOT_FOUND",
Expand Down
115 changes: 114 additions & 1 deletion tests/unittests/tools/test_skill_toolset.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,14 +499,15 @@ async def test_scripts_resource_not_found(mock_skill1, tool_context_instance):
# RunSkillScriptTool tests


def _make_tool_context_with_agent(agent=None):
def _make_tool_context_with_agent(agent=None, invocation_id="test_invocation"):
"""Creates a mock ToolContext with _invocation_context.agent."""
ctx = mock.MagicMock(spec=tool_context.ToolContext)
ctx._invocation_context = mock.MagicMock()
ctx._invocation_context.agent = agent or mock.MagicMock()
ctx._invocation_context.agent.name = "test_agent"
ctx._invocation_context.agent_states = {}
ctx.agent_name = "test_agent"
ctx.invocation_id = invocation_id
ctx.state = {}
return ctx

Expand Down Expand Up @@ -577,6 +578,118 @@ async def test_execute_script_script_not_found(mock_skill1):
tool_context=ctx,
)
assert result["error_code"] == "SCRIPT_NOT_FOUND"
assert result["error"] == (
"Script 'nonexistent.py' not found in skill 'skill1'."
)


@pytest.mark.asyncio
async def test_execute_script_repeated_failure_escalates_to_fatal(mock_skill1):
"""Any second SCRIPT_NOT_FOUND within an invocation returns SCRIPT_NOT_FOUND_FATAL."""
executor = _make_mock_executor()
toolset = skill_toolset.SkillToolset([mock_skill1], code_executor=executor)
tool = skill_toolset.RunSkillScriptTool(toolset)
ctx = _make_tool_context_with_agent()

args = {"skill_name": "skill1", "file_path": "scripts/nonexistent.py"}

result1 = await tool.run_async(args=args, tool_context=ctx)
assert result1["error_code"] == "SCRIPT_NOT_FOUND"

result2 = await tool.run_async(args=args, tool_context=ctx)
assert result2["error_code"] == "SCRIPT_NOT_FOUND_FATAL"
assert "Do not retry" in result2["error"]
assert "stop" in result2["error"].lower()
assert "failure #2" in result2["error"]


@pytest.mark.asyncio
async def test_execute_script_different_path_also_escalates_to_fatal(
mock_skill1,
):
"""A different missing script on the second call still escalates to SCRIPT_NOT_FOUND_FATAL.

The counter is path-agnostic: any second not-found within the same invocation
is fatal, even when the LLM hallucinates a different script path on each
retry.
"""
executor = _make_mock_executor()
toolset = skill_toolset.SkillToolset([mock_skill1], code_executor=executor)
tool = skill_toolset.RunSkillScriptTool(toolset)
ctx = _make_tool_context_with_agent()

result1 = await tool.run_async(
args={"skill_name": "skill1", "file_path": "scripts/missing_a.py"},
tool_context=ctx,
)
assert result1["error_code"] == "SCRIPT_NOT_FOUND"

result2 = await tool.run_async(
args={"skill_name": "skill1", "file_path": "scripts/missing_b.py"},
tool_context=ctx,
)
assert result2["error_code"] == "SCRIPT_NOT_FOUND_FATAL"
assert "Do not retry" in result2["error"]


@pytest.mark.asyncio
async def test_execute_script_failures_isolated_per_invocation(mock_skill1):
"""Failure counter does not leak across invocations.

A SCRIPT_NOT_FOUND in invocation A must not increment invocation B's
counter; invocation B's first missing-script call must still return the
soft error, even when both invocations share the same session state dict.
"""
executor = _make_mock_executor()
toolset = skill_toolset.SkillToolset([mock_skill1], code_executor=executor)
tool = skill_toolset.RunSkillScriptTool(toolset)

shared_state = {}
ctx_a = _make_tool_context_with_agent(invocation_id="inv_a")
ctx_a.state = shared_state
ctx_b = _make_tool_context_with_agent(invocation_id="inv_b")
ctx_b.state = shared_state

# invocation A: one failure — counter for inv_a reaches 1 (soft).
result_a = await tool.run_async(
args={"skill_name": "skill1", "file_path": "scripts/typo.py"},
tool_context=ctx_a,
)
assert result_a["error_code"] == "SCRIPT_NOT_FOUND"

# invocation B, first attempt (same path) — counter for inv_b = 1 (soft).
result_b1 = await tool.run_async(
args={"skill_name": "skill1", "file_path": "scripts/typo.py"},
tool_context=ctx_b,
)
assert result_b1["error_code"] == "SCRIPT_NOT_FOUND"

# invocation B, second attempt (different path) — counter for inv_b = 2 (fatal).
result_b2 = await tool.run_async(
args={"skill_name": "skill1", "file_path": "scripts/other.py"},
tool_context=ctx_b,
)
assert result_b2["error_code"] == "SCRIPT_NOT_FOUND_FATAL"


@pytest.mark.asyncio
async def test_execute_script_counter_uses_temp_prefix(mock_skill1):
"""Failure-counter key uses the `temp:` prefix so it is not persisted."""
executor = _make_mock_executor()
toolset = skill_toolset.SkillToolset([mock_skill1], code_executor=executor)
tool = skill_toolset.RunSkillScriptTool(toolset)
ctx = _make_tool_context_with_agent()

await tool.run_async(
args={"skill_name": "skill1", "file_path": "scripts/missing.py"},
tool_context=ctx,
)

# The counter key must start with `temp:` so it is trimmed from the event
# delta and never reaches durable storage.
guard_keys = [k for k in ctx.state if "skill_script_not_found_count" in k]
assert guard_keys, "Failure counter did not write a tracking key."
assert all(k.startswith("temp:") for k in guard_keys)


@pytest.mark.asyncio
Expand Down
Loading