diff --git a/tests/cloud/test_harness_app_contract.py b/tests/cloud/test_harness_app_contract.py index cca27ef8..b8c87ba1 100644 --- a/tests/cloud/test_harness_app_contract.py +++ b/tests/cloud/test_harness_app_contract.py @@ -24,6 +24,8 @@ import time, so it is intentionally left out to keep these tests offline. """ +import pytest + from veadk.cloud.harness_app.types import ( HarnessConfig, HarnessOverrides, @@ -31,7 +33,11 @@ InvokeHarnessResponse, RunAgentRequest, ) -from veadk.cloud.harness_app.utils import split_csv +from veadk.cloud.harness_app.utils import ( + _download_and_extract_skill, + _resolve_skill_slug, + split_csv, +) from veadk.consts import DEFAULT_MODEL_AGENT_NAME from veadk.prompts.agent_default_prompt import DEFAULT_INSTRUCTION @@ -142,3 +148,79 @@ def test_empty_string_is_empty_list(self): def test_drops_blank_segments(self): assert split_csv("a,, ,b") == ["a", "b"] + + +class _FakeResp: + """Minimal stand-in for an ``httpx.Response``.""" + + def __init__(self, *, status_code=200, json_data=None, content=b"", headers=None): + self.status_code = status_code + self._json = json_data + self.content = content + self.headers = headers or {} + + def json(self): + return self._json + + +class TestResolveSkillSlug: + def test_exact_name_match_wins_even_when_not_first(self, monkeypatch): + # The hub ranks "smart-web-scraper" above the exact "web-scraper"; the + # exact Name match must still win (we scan the whole page, not just [0]). + def fake_get(url, params=None, **kwargs): + assert params["query"] == "web-scraper" + return _FakeResp( + json_data={ + "Skills": [ + { + "Name": "smart-web-scraper", + "Slug": "clawhub/m/smart-web-scraper", + }, + {"Name": "web-scraper", "Slug": "clawhub/y/web-scraper"}, + ] + } + ) + + monkeypatch.setattr("veadk.cloud.harness_app.utils.httpx.get", fake_get) + assert _resolve_skill_slug("web-scraper") == "clawhub/y/web-scraper" + + def test_value_with_slash_is_treated_as_explicit_slug(self, monkeypatch): + def boom(*args, **kwargs): + raise AssertionError("must not search when given an explicit slug") + + monkeypatch.setattr("veadk.cloud.harness_app.utils.httpx.get", boom) + assert _resolve_skill_slug("clawhub/org/my-skill") == "clawhub/org/my-skill" + + def test_no_exact_match_raises(self, monkeypatch): + monkeypatch.setattr( + "veadk.cloud.harness_app.utils.httpx.get", + lambda *a, **k: _FakeResp( + json_data={"Skills": [{"Name": "other", "Slug": "clawhub/o/other"}]} + ), + ) + with pytest.raises(RuntimeError, match="not found in the skill hub"): + _resolve_skill_slug("nonexistent-skill") + + def test_search_http_error_raises(self, monkeypatch): + monkeypatch.setattr( + "veadk.cloud.harness_app.utils.httpx.get", + lambda *a, **k: _FakeResp(status_code=500, json_data={}), + ) + with pytest.raises(RuntimeError, match="failed: HTTP 500"): + _resolve_skill_slug("whatever") + + +class TestDownloadNonZip: + def test_non_zip_200_gives_clear_error(self, monkeypatch, tmp_path): + # An explicit slug skips the search; a 200 JSON error body (not a zip) + # must surface clearly rather than as a BadZipFile. + monkeypatch.setattr( + "veadk.cloud.harness_app.utils.httpx.get", + lambda *a, **k: _FakeResp( + status_code=200, + content=b'{"Error":{"Code":"InternalError"}}', + headers={"content-type": "application/json"}, + ), + ) + with pytest.raises(RuntimeError, match="did not return a zip"): + _download_and_extract_skill("clawhub/org/my-skill", tmp_path) diff --git a/veadk/cloud/harness_app/utils.py b/veadk/cloud/harness_app/utils.py index 3e7ec26a..0d6838a6 100644 --- a/veadk/cloud/harness_app/utils.py +++ b/veadk/cloud/harness_app/utils.py @@ -79,11 +79,20 @@ def _load_builtin_tool(name: str) -> Any: ) from e -# Skill hub download endpoint. A skill name in a harness is the path after -# `/download/`, e.g. "clawhub/lgwventrue/system-file-handler". +# Skill hub endpoints. A harness lists a skill by its human *name* (e.g. +# "web-scraper"); the hub downloads by *slug* (e.g. +# "clawhub/yinanping-cpu/web-scraper"). We resolve name -> slug via the search +# API, then download by slug. Both are env-overridable. SKILL_HUB_DOWNLOAD_URL = os.getenv( "SKILL_HUB_DOWNLOAD_URL", "https://skills.volces.com/v1/skills/download" ) +SKILL_HUB_SEARCH_URL = os.getenv( + "SKILL_HUB_SEARCH_URL", "https://skills.volces.com/v1/skills" +) +# Top-N search results scanned for an exact name match. The top hit is not always +# the exact one (e.g. "web-scraper" ranks below "smart-web-scraper"), so we scan +# the whole (small) page rather than trusting the first result. +_SKILL_SEARCH_PAGE_SIZE = 3 # Maps HarnessConfig field names to their environment variables. ``app_name`` is # populated via its "name" alias. Only variables that are set are passed, so the @@ -110,12 +119,49 @@ def split_csv(value: str) -> list[str]: return [item.strip() for item in value.split(",") if item.strip()] +def _resolve_skill_slug(skill: str) -> str: + """Resolve a skill *name* to its hub *slug* via the search API. + + A harness lists skills by name (e.g. ``"web-scraper"``) but the hub downloads + by slug (e.g. ``"clawhub/yinanping-cpu/web-scraper"``). A value already given + as a slug (contains ``"/"``) is used as-is. Otherwise the hub is searched and + the result whose ``Name`` matches exactly is taken — the top hit is not always + the exact one, so all results on the page are scanned. + + Raises: + RuntimeError: the search failed, or no result's ``Name`` matched exactly. + """ + name = skill.strip("/") + if "/" in name: + return name # already a slug (e.g. "clawhub/org/skill") + + response = httpx.get( + SKILL_HUB_SEARCH_URL, + params={"query": name, "pageNumber": 1, "pageSize": _SKILL_SEARCH_PAGE_SIZE}, + timeout=60, + follow_redirects=True, + ) + if response.status_code != 200: + raise RuntimeError( + f"Skill search for '{skill}' failed: HTTP {response.status_code}" + ) + results = response.json().get("Skills") or [] + for entry in results: + if entry.get("Name") == name and entry.get("Slug"): + return str(entry["Slug"]) + seen = ", ".join(repr(e.get("Name")) for e in results) or "no results" + raise RuntimeError( + f"Skill '{skill}' not found in the skill hub (search returned: {seen}). " + f"Check the skill name, or pass its full slug (e.g. 'clawhub//')." + ) + + def _download_and_extract_skill(skill: str, dest_dir: Path) -> Path: - """Download a skill zip from the skill hub and extract it. + """Resolve a skill name to its hub slug, download the zip, and extract it. Args: - skill: Skill identifier — the hub path after ``/download/`` - (e.g. ``"clawhub/lgwventrue/system-file-handler"``). + skill: Skill name (e.g. ``"web-scraper"``) or full hub slug (e.g. + ``"clawhub/lgwventrue/system-file-handler"``). dest_dir: Base directory to extract into; the skill is placed in a subdirectory named after its declared name in ``SKILL.md``. @@ -123,19 +169,27 @@ def _download_and_extract_skill(skill: str, dest_dir: Path) -> Path: The directory the skill was extracted to. Its name matches the skill's declared name in ``SKILL.md`` (required by ``load_skill_from_dir``). """ - name = skill.strip("/") - url = f"{SKILL_HUB_DOWNLOAD_URL.rstrip('/')}/{name}" - logger.info(f"Downloading skill '{skill}' from {url}") + slug = _resolve_skill_slug(skill) + url = f"{SKILL_HUB_DOWNLOAD_URL.rstrip('/')}/{slug}" + logger.info(f"Downloading skill '{skill}' (slug='{slug}') from {url}") response = httpx.get(url, timeout=60, follow_redirects=True) if response.status_code != 200: raise RuntimeError( f"Failed to download skill '{skill}': HTTP {response.status_code}" ) + # The hub may answer 200 with a JSON error body instead of a zip; surface that + # clearly rather than letting zipfile fail with "File is not a zip file". + if response.content[:2] != b"PK": + ct = response.headers.get("content-type", "") + raise RuntimeError( + f"Skill '{skill}' (slug='{slug}') download did not return a zip " + f"(content-type={ct!r}, {len(response.content)} bytes)" + ) # Extract to a staging dir first; the final directory must be named after # the skill's declared name (ADK's load_skill_from_dir enforces this). - staging = dest_dir / f"{name.split('/')[-1]}__staging" + staging = dest_dir / f"{slug.split('/')[-1]}__staging" if staging.exists(): shutil.rmtree(staging) staging.mkdir(parents=True)