Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 83 additions & 1 deletion tests/cloud/test_harness_app_contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,20 @@
import time, so it is intentionally left out to keep these tests offline.
"""

import pytest

from veadk.cloud.harness_app.types import (
HarnessConfig,
HarnessOverrides,
InvokeHarnessRequest,
InvokeHarnessResponse,
RunAgentRequest,
)
from veadk.cloud.harness_app.utils import split_csv
from veadk.cloud.harness_app.utils import (
_download_and_extract_skill,
_resolve_skill_slug,
split_csv,
)
from veadk.consts import DEFAULT_MODEL_AGENT_NAME
from veadk.prompts.agent_default_prompt import DEFAULT_INSTRUCTION

Expand Down Expand Up @@ -142,3 +148,79 @@ def test_empty_string_is_empty_list(self):

def test_drops_blank_segments(self):
assert split_csv("a,, ,b") == ["a", "b"]


class _FakeResp:
"""Minimal stand-in for an ``httpx.Response``."""

def __init__(self, *, status_code=200, json_data=None, content=b"", headers=None):
self.status_code = status_code
self._json = json_data
self.content = content
self.headers = headers or {}

def json(self):
return self._json


class TestResolveSkillSlug:
def test_exact_name_match_wins_even_when_not_first(self, monkeypatch):
# The hub ranks "smart-web-scraper" above the exact "web-scraper"; the
# exact Name match must still win (we scan the whole page, not just [0]).
def fake_get(url, params=None, **kwargs):
assert params["query"] == "web-scraper"
return _FakeResp(
json_data={
"Skills": [
{
"Name": "smart-web-scraper",
"Slug": "clawhub/m/smart-web-scraper",
},
{"Name": "web-scraper", "Slug": "clawhub/y/web-scraper"},
]
}
)

monkeypatch.setattr("veadk.cloud.harness_app.utils.httpx.get", fake_get)
assert _resolve_skill_slug("web-scraper") == "clawhub/y/web-scraper"

def test_value_with_slash_is_treated_as_explicit_slug(self, monkeypatch):
def boom(*args, **kwargs):
raise AssertionError("must not search when given an explicit slug")

monkeypatch.setattr("veadk.cloud.harness_app.utils.httpx.get", boom)
assert _resolve_skill_slug("clawhub/org/my-skill") == "clawhub/org/my-skill"

def test_no_exact_match_raises(self, monkeypatch):
monkeypatch.setattr(
"veadk.cloud.harness_app.utils.httpx.get",
lambda *a, **k: _FakeResp(
json_data={"Skills": [{"Name": "other", "Slug": "clawhub/o/other"}]}
),
)
with pytest.raises(RuntimeError, match="not found in the skill hub"):
_resolve_skill_slug("nonexistent-skill")

def test_search_http_error_raises(self, monkeypatch):
monkeypatch.setattr(
"veadk.cloud.harness_app.utils.httpx.get",
lambda *a, **k: _FakeResp(status_code=500, json_data={}),
)
with pytest.raises(RuntimeError, match="failed: HTTP 500"):
_resolve_skill_slug("whatever")


class TestDownloadNonZip:
def test_non_zip_200_gives_clear_error(self, monkeypatch, tmp_path):
# An explicit slug skips the search; a 200 JSON error body (not a zip)
# must surface clearly rather than as a BadZipFile.
monkeypatch.setattr(
"veadk.cloud.harness_app.utils.httpx.get",
lambda *a, **k: _FakeResp(
status_code=200,
content=b'{"Error":{"Code":"InternalError"}}',
headers={"content-type": "application/json"},
),
)
with pytest.raises(RuntimeError, match="did not return a zip"):
_download_and_extract_skill("clawhub/org/my-skill", tmp_path)
72 changes: 63 additions & 9 deletions veadk/cloud/harness_app/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,20 @@ def _load_builtin_tool(name: str) -> Any:
) from e


# Skill hub download endpoint. A skill name in a harness is the path after
# `/download/`, e.g. "clawhub/lgwventrue/system-file-handler".
# Skill hub endpoints. A harness lists a skill by its human *name* (e.g.
# "web-scraper"); the hub downloads by *slug* (e.g.
# "clawhub/yinanping-cpu/web-scraper"). We resolve name -> slug via the search
# API, then download by slug. Both are env-overridable.
SKILL_HUB_DOWNLOAD_URL = os.getenv(
"SKILL_HUB_DOWNLOAD_URL", "https://skills.volces.com/v1/skills/download"
)
SKILL_HUB_SEARCH_URL = os.getenv(
"SKILL_HUB_SEARCH_URL", "https://skills.volces.com/v1/skills"
)
# Top-N search results scanned for an exact name match. The top hit is not always
# the exact one (e.g. "web-scraper" ranks below "smart-web-scraper"), so we scan
# the whole (small) page rather than trusting the first result.
_SKILL_SEARCH_PAGE_SIZE = 3

# Maps HarnessConfig field names to their environment variables. ``app_name`` is
# populated via its "name" alias. Only variables that are set are passed, so the
Expand All @@ -110,32 +119,77 @@ def split_csv(value: str) -> list[str]:
return [item.strip() for item in value.split(",") if item.strip()]


def _resolve_skill_slug(skill: str) -> str:
"""Resolve a skill *name* to its hub *slug* via the search API.

A harness lists skills by name (e.g. ``"web-scraper"``) but the hub downloads
by slug (e.g. ``"clawhub/yinanping-cpu/web-scraper"``). A value already given
as a slug (contains ``"/"``) is used as-is. Otherwise the hub is searched and
the result whose ``Name`` matches exactly is taken — the top hit is not always
the exact one, so all results on the page are scanned.

Raises:
RuntimeError: the search failed, or no result's ``Name`` matched exactly.
"""
name = skill.strip("/")
if "/" in name:
return name # already a slug (e.g. "clawhub/org/skill")

response = httpx.get(
SKILL_HUB_SEARCH_URL,
params={"query": name, "pageNumber": 1, "pageSize": _SKILL_SEARCH_PAGE_SIZE},
timeout=60,
follow_redirects=True,
)
if response.status_code != 200:
raise RuntimeError(
f"Skill search for '{skill}' failed: HTTP {response.status_code}"
)
results = response.json().get("Skills") or []
for entry in results:
if entry.get("Name") == name and entry.get("Slug"):
return str(entry["Slug"])
seen = ", ".join(repr(e.get("Name")) for e in results) or "no results"
raise RuntimeError(
f"Skill '{skill}' not found in the skill hub (search returned: {seen}). "
f"Check the skill name, or pass its full slug (e.g. 'clawhub/<org>/<name>')."
)


def _download_and_extract_skill(skill: str, dest_dir: Path) -> Path:
"""Download a skill zip from the skill hub and extract it.
"""Resolve a skill name to its hub slug, download the zip, and extract it.

Args:
skill: Skill identifier — the hub path after ``/download/``
(e.g. ``"clawhub/lgwventrue/system-file-handler"``).
skill: Skill name (e.g. ``"web-scraper"``) or full hub slug (e.g.
``"clawhub/lgwventrue/system-file-handler"``).
dest_dir: Base directory to extract into; the skill is placed in a
subdirectory named after its declared name in ``SKILL.md``.

Returns:
The directory the skill was extracted to. Its name matches the skill's
declared name in ``SKILL.md`` (required by ``load_skill_from_dir``).
"""
name = skill.strip("/")
url = f"{SKILL_HUB_DOWNLOAD_URL.rstrip('/')}/{name}"
logger.info(f"Downloading skill '{skill}' from {url}")
slug = _resolve_skill_slug(skill)
url = f"{SKILL_HUB_DOWNLOAD_URL.rstrip('/')}/{slug}"
logger.info(f"Downloading skill '{skill}' (slug='{slug}') from {url}")

response = httpx.get(url, timeout=60, follow_redirects=True)
if response.status_code != 200:
raise RuntimeError(
f"Failed to download skill '{skill}': HTTP {response.status_code}"
)
# The hub may answer 200 with a JSON error body instead of a zip; surface that
# clearly rather than letting zipfile fail with "File is not a zip file".
if response.content[:2] != b"PK":
ct = response.headers.get("content-type", "")
raise RuntimeError(
f"Skill '{skill}' (slug='{slug}') download did not return a zip "
f"(content-type={ct!r}, {len(response.content)} bytes)"
)

# Extract to a staging dir first; the final directory must be named after
# the skill's declared name (ADK's load_skill_from_dir enforces this).
staging = dest_dir / f"{name.split('/')[-1]}__staging"
staging = dest_dir / f"{slug.split('/')[-1]}__staging"
if staging.exists():
shutil.rmtree(staging)
staging.mkdir(parents=True)
Expand Down
Loading