diff --git a/.github/workflows/auto_llama_cpp_update.yml b/.github/workflows/auto_llama_cpp_update.yml new file mode 100644 index 0000000..3f57a8a --- /dev/null +++ b/.github/workflows/auto_llama_cpp_update.yml @@ -0,0 +1,172 @@ +name: Auto-update llama.cpp Pin + +on: + schedule: + # Weekday morning UTC, after upstream llama.cpp usually publishes release tags. + - cron: '17 10 * * 1-5' + workflow_dispatch: + +permissions: + actions: write + contents: write + pull-requests: write + +concurrency: + group: llama-cpp-auto-update + cancel-in-progress: true + +env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" + VERSION_FILE: llama_cpp.version + UPDATE_BRANCH: automation/bump-llama-cpp + +jobs: + update-llama-cpp-pin: + name: Check for newer llama.cpp release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Resolve update metadata + id: meta + env: + GH_TOKEN: ${{ github.token }} + run: | + set -euo pipefail + + CURRENT_TAG="$(tr -d '[:space:]' < "$VERSION_FILE")" + if [ -z "$CURRENT_TAG" ]; then + echo "error: $VERSION_FILE is empty" + exit 1 + fi + + LATEST_TAG="$(gh release list --repo ggml-org/llama.cpp --limit 1 --json tagName --jq '.[0].tagName')" + if [ -z "$LATEST_TAG" ] || [ "$LATEST_TAG" = "null" ]; then + echo "error: could not resolve latest ggml-org/llama.cpp release tag" + exit 1 + fi + + echo "current_tag=$CURRENT_TAG" >> "$GITHUB_OUTPUT" + echo "latest_tag=$LATEST_TAG" >> "$GITHUB_OUTPUT" + + if [ "$CURRENT_TAG" = "$LATEST_TAG" ]; then + echo "update_needed=false" >> "$GITHUB_OUTPUT" + echo "skip_reason=$VERSION_FILE already points at $LATEST_TAG" >> "$GITHUB_OUTPUT" + exit 0 + fi + + for pr in $(gh pr list --repo "$GITHUB_REPOSITORY" --state open --json number,headRefName --jq ".[] | select(.headRefName != \"$UPDATE_BRANCH\") | .number"); do + if gh pr diff "$pr" --repo "$GITHUB_REPOSITORY" --name-only | grep -qx "$VERSION_FILE"; then + echo "update_needed=false" >> "$GITHUB_OUTPUT" + echo "skip_reason=PR #$pr already changes $VERSION_FILE; not racing a non-automation PR" >> "$GITHUB_OUTPUT" + exit 0 + fi + done + + printf '%s\n' "$LATEST_TAG" > "$VERSION_FILE" + + RELEASE_URL="$(gh release view "$LATEST_TAG" --repo ggml-org/llama.cpp --json url --jq '.url')" + COMPARE_URL="https://github.com/ggml-org/llama.cpp/compare/${CURRENT_TAG}...${LATEST_TAG}" + echo "release_url=$RELEASE_URL" >> "$GITHUB_OUTPUT" + echo "compare_url=$COMPARE_URL" >> "$GITHUB_OUTPUT" + echo "update_needed=true" >> "$GITHUB_OUTPUT" + + gh release view "$LATEST_TAG" --repo ggml-org/llama.cpp --json body --jq '.body // ""' > /tmp/llama_cpp_release_notes.md + gh api "repos/ggml-org/llama.cpp/compare/${CURRENT_TAG}...${LATEST_TAG}" \ + --jq '.commits[:80][] | "- " + (.commit.message | split("\n")[0]) + " (`" + (.sha[0:7]) + "`)"' \ + > /tmp/llama_cpp_commits.md || true + + { + echo "## llama.cpp update" + echo + echo "- Previous pin: \`$CURRENT_TAG\`" + echo "- New pin: \`$LATEST_TAG\`" + echo "- Upstream release: $RELEASE_URL" + echo "- Compare: $COMPARE_URL" + echo + echo "## Upstream changelog" + echo + echo "
" + echo "Release notes for $LATEST_TAG" + echo + if [ -s /tmp/llama_cpp_release_notes.md ]; then + cat /tmp/llama_cpp_release_notes.md + else + echo "No GitHub release notes were published for this tag. Use the compare link above as the source of truth." + fi + echo + echo "
" + echo + echo "## Commit range" + echo + echo "
" + echo "Commits from $CURRENT_TAG to $LATEST_TAG (first 80)" + echo + if [ -s /tmp/llama_cpp_commits.md ]; then + cat /tmp/llama_cpp_commits.md + else + echo "Commit list unavailable; see $COMPARE_URL." + fi + echo + echo "
" + echo + echo "## Web bridge review focus" + echo + echo "Please pay extra attention to upstream changes touching:" + echo + echo "- WebGPU, WASM, Emscripten, pthreads, or memory64 build behavior" + echo "- ggml backend APIs used by the bridge" + echo "- model loading, tokenizer, chat template, context/state persistence, or cache semantics" + echo "- CMake/build flags that can affect the generated JS/WASM artifacts" + echo + echo "## Validation" + echo + echo "- [ ] Emscripten build passed" + echo "- [ ] Browser WebGPU/state-persistence smoke passed" + echo "- [ ] Generated bridge artifacts include wasm32 and memory64 outputs" + echo "- [ ] No stale hard-coded llama.cpp tag remains in CI/publish defaults" + echo + echo "## Automation behavior" + echo + echo "This PR is managed from the stable branch \`$UPDATE_BRANCH\`. If another llama.cpp release appears before merge, the scheduled workflow updates this same PR instead of opening a duplicate. The workflow skips if a non-automation PR already changes \`$VERSION_FILE\`." + } > /tmp/llama_cpp_update_pr.md + + - name: Report skip reason + if: steps.meta.outputs.update_needed == 'false' + run: echo "${{ steps.meta.outputs.skip_reason }}" + + - name: Create or update llama.cpp bump PR + id: create-pr + if: steps.meta.outputs.update_needed == 'true' + uses: peter-evans/create-pull-request@v7 + with: + token: ${{ secrets.GITHUB_TOKEN }} + branch: ${{ env.UPDATE_BRANCH }} + base: main + delete-branch: true + title: 'chore: bump llama.cpp to ${{ steps.meta.outputs.latest_tag }}' + commit-message: 'chore: bump llama.cpp to ${{ steps.meta.outputs.latest_tag }}' + body-path: /tmp/llama_cpp_update_pr.md + labels: | + dependencies + automated + + - name: Dispatch CI for automation PR + if: steps.create-pr.outputs.pull-request-number != '' + env: + GH_TOKEN: ${{ github.token }} + run: | + set -euo pipefail + for attempt in 1 2 3; do + if gh workflow run ci.yml --repo "$GITHUB_REPOSITORY" --ref "$UPDATE_BRANCH"; then + break + fi + if [ "$attempt" = 3 ]; then + echo "error: failed to dispatch CI for $UPDATE_BRANCH after $attempt attempts" + exit 1 + fi + sleep $((attempt * 5)) + done + echo "Dispatched CI for #${{ steps.create-pr.outputs.pull-request-number }} on $UPDATE_BRANCH." diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bc8533b..e52a1e0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,6 +1,7 @@ name: CI on: + workflow_dispatch: push: branches: [main] pull_request: @@ -12,7 +13,6 @@ jobs: runs-on: ubuntu-latest env: FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" - LLAMA_CPP_TAG: b9116 LLAMA_WEBGPU_SMOKE_MODEL_URL: https://huggingface.co/aladar/llama-2-tiny-random-GGUF/resolve/main/llama-2-tiny-random.gguf LLAMA_WEBGPU_SMOKE_MODEL_SHA256: 81f226c62d28ed4a1a9b9fa080fcd9f0cc40e0f9d5680036583ff98fbcd035cb LLAMA_WEBGPU_SMOKE_MODEL_CACHE: ~/.cache/llama-web-bridge/state-smoke-models @@ -20,6 +20,16 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Resolve llama.cpp pin + run: | + LLAMA_CPP_TAG="$(tr -d '[:space:]' < llama_cpp.version)" + if [ -z "$LLAMA_CPP_TAG" ]; then + echo "error: llama_cpp.version is empty" + exit 1 + fi + echo "LLAMA_CPP_TAG=$LLAMA_CPP_TAG" >> "$GITHUB_ENV" + echo "Using llama.cpp ${LLAMA_CPP_TAG}" + - name: Validate state persistence API contract run: python3 scripts/verify_state_persistence_api.py diff --git a/.github/workflows/publish_assets.yml b/.github/workflows/publish_assets.yml index 2118959..c2480eb 100644 --- a/.github/workflows/publish_assets.yml +++ b/.github/workflows/publish_assets.yml @@ -11,9 +11,9 @@ on: required: true default: leehack/llama-web-bridge-assets llama_cpp_tag: - description: llama.cpp tag to build from - required: true - default: b9116 + description: Optional llama.cpp tag override; defaults to llama_cpp.version + required: false + default: '' push: tags: - 'v*' @@ -22,7 +22,7 @@ env: FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" ASSETS_TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.assets_tag || github.ref_name }} ASSETS_REPO: ${{ github.event_name == 'workflow_dispatch' && inputs.assets_repo || 'leehack/llama-web-bridge-assets' }} - LLAMA_CPP_TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.llama_cpp_tag || 'b9116' }} + REQUESTED_LLAMA_CPP_TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.llama_cpp_tag || '' }} permissions: contents: read @@ -31,11 +31,25 @@ jobs: build-bridge-assets: name: Build bridge assets runs-on: ubuntu-latest + outputs: + llama_cpp_tag: ${{ steps.resolve-publish-parameters.outputs.llama_cpp_tag }} steps: - uses: actions/checkout@v4 - name: Resolve publish parameters + id: resolve-publish-parameters run: | + if [ -n "$REQUESTED_LLAMA_CPP_TAG" ]; then + LLAMA_CPP_TAG="$REQUESTED_LLAMA_CPP_TAG" + else + LLAMA_CPP_TAG="$(tr -d '[:space:]' < llama_cpp.version)" + fi + if [ -z "$LLAMA_CPP_TAG" ]; then + echo "error: llama.cpp tag is empty" + exit 1 + fi + echo "LLAMA_CPP_TAG=$LLAMA_CPP_TAG" >> "$GITHUB_ENV" + echo "llama_cpp_tag=$LLAMA_CPP_TAG" >> "$GITHUB_OUTPUT" echo "event=${{ github.event_name }}" echo "assets_tag=${ASSETS_TAG}" echo "assets_repo=${ASSETS_REPO}" @@ -60,7 +74,6 @@ jobs: env: OUT_DIR: ${{ runner.temp }}/webgpu_bridge_dist ASSETS_TAG: ${{ env.ASSETS_TAG }} - LLAMA_CPP_TAG: ${{ env.LLAMA_CPP_TAG }} SOURCE_REPO: ${{ github.repository }} SOURCE_COMMIT: ${{ github.sha }} run: | @@ -191,9 +204,9 @@ jobs: GH_TOKEN: ${{ secrets.WEBGPU_BRIDGE_ASSETS_PAT }} ASSETS_TAG: ${{ env.ASSETS_TAG }} ASSETS_REPO: ${{ env.ASSETS_REPO }} + LLAMA_CPP_TAG: ${{ needs.build-bridge-assets.outputs.llama_cpp_tag }} SOURCE_REPO: ${{ github.repository }} SOURCE_COMMIT: ${{ github.sha }} - LLAMA_CPP_TAG: ${{ env.LLAMA_CPP_TAG }} run: | NOTES="$(cat < int: smoke = read_required("scripts/state_persistence_browser_smoke.py", errors) ci = read_required(".github/workflows/ci.yml", errors) publish = read_required(".github/workflows/publish_assets.yml", errors) + auto_update = read_required(".github/workflows/auto_llama_cpp_update.yml", errors) + version = read_required("llama_cpp.version", errors).strip() agents = read_required("AGENTS.md", errors) readme = read_required("README.md", errors) contributing = read_required("CONTRIBUTING.md", errors) - for name, workflow in (("ci.yml", ci), ("publish_assets.yml", publish)): + for name, workflow in ( + ("ci.yml", ci), + ("publish_assets.yml", publish), + ("auto_llama_cpp_update.yml", auto_update), + ): require( "FORCE_JAVASCRIPT_ACTIONS_TO_NODE24" in workflow, f"{name} must opt into Node 24 action runtime to catch Node 20 deprecation breakage early", @@ -44,6 +50,46 @@ def main() -> int: errors, ) + require( + version.startswith("b") and version[1:].isdigit(), + "llama_cpp.version must contain a llama.cpp release tag like b9165", + errors, + ) + require( + "tr -d '[:space:]' < llama_cpp.version" in ci + and "Resolve llama.cpp pin" in ci + and "workflow_dispatch:" in ci + and "LLAMA_CPP_TAG: b9116" not in ci, + "ci.yml must resolve the llama.cpp tag from llama_cpp.version, support explicit dispatch, and avoid hard-coded stale defaults", + errors, + ) + require( + "REQUESTED_LLAMA_CPP_TAG" in publish + and "tr -d '[:space:]' < llama_cpp.version" in publish + and "outputs:" in publish + and "llama_cpp_tag: ${{ steps.resolve-publish-parameters.outputs.llama_cpp_tag }}" in publish + and "LLAMA_CPP_TAG: ${{ needs.build-bridge-assets.outputs.llama_cpp_tag }}" in publish + and "default: b9116" not in publish + and "|| 'b9116'" not in publish, + "publish_assets.yml must default to llama_cpp.version, pass the resolved tag across jobs, and still allow a manual override", + errors, + ) + require( + "concurrency:" in auto_update + and "group: llama-cpp-auto-update" in auto_update + and "UPDATE_BRANCH: automation/bump-llama-cpp" in auto_update + and "ggml-org/llama.cpp" in auto_update + and "create-pull-request" in auto_update + and "actions: write" in auto_update + and "id: create-pr" in auto_update + and "gh workflow run ci.yml --repo \"$GITHUB_REPOSITORY\" --ref \"$UPDATE_BRANCH\"" in auto_update + and "body-path: /tmp/llama_cpp_update_pr.md" in auto_update + and "Upstream changelog" in auto_update + and "not racing a non-automation PR" in auto_update, + "auto_llama_cpp_update.yml must update one stable PR branch with upstream changelog context, dispatch CI for bot updates, and avoid racing human PRs", + errors, + ) + require( "--model-url" in smoke and "--model-sha256" in smoke, "browser smoke must support an integrity-checked model-backed state round-trip", @@ -97,22 +143,27 @@ def main() -> int: require( "Agent PR Workflow" in agents and "independent review" in agents - and "state_persistence_browser_smoke.py" in agents, - "AGENTS.md must document the agent PR workflow and browser smoke expectations", + and "state_persistence_browser_smoke.py" in agents + and "llama_cpp.version" in agents + and "auto_llama_cpp_update.yml" in agents, + "AGENTS.md must document the agent PR workflow, browser smoke expectations, and llama.cpp auto-update policy", errors, ) require( "FORCE_JAVASCRIPT_ACTIONS_TO_NODE24" in readme and "state-persistence-smoke-artifacts" in readme - and "scripts/verify_ci_reliability.py" in readme, - "README.md must document CI reliability, diagnostics, and Node 24 action-runtime coverage", + and "scripts/verify_ci_reliability.py" in readme + and "llama_cpp.version" in readme + and "auto_llama_cpp_update.yml" in readme, + "README.md must document CI reliability, diagnostics, Node 24 action-runtime coverage, and llama.cpp pin automation", errors, ) require( "Agent Workflow Guardrails" in contributing and "scripts/verify_ci_reliability.py" in contributing - and "--model-sha256" in contributing, - "CONTRIBUTING.md must document maintainer/agent workflow guardrails and checksum-pinned smoke usage", + and "--model-sha256" in contributing + and "llama_cpp.version" in contributing, + "CONTRIBUTING.md must document maintainer/agent workflow guardrails, checksum-pinned smoke usage, and llama.cpp pin handling", errors, )