diff --git a/.github/workflows/auto_llama_cpp_update.yml b/.github/workflows/auto_llama_cpp_update.yml
new file mode 100644
index 0000000..3f57a8a
--- /dev/null
+++ b/.github/workflows/auto_llama_cpp_update.yml
@@ -0,0 +1,172 @@
+name: Auto-update llama.cpp Pin
+
+on:
+ schedule:
+ # Weekday morning UTC, after upstream llama.cpp usually publishes release tags.
+ - cron: '17 10 * * 1-5'
+ workflow_dispatch:
+
+permissions:
+ actions: write
+ contents: write
+ pull-requests: write
+
+concurrency:
+ group: llama-cpp-auto-update
+ cancel-in-progress: true
+
+env:
+ FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
+ VERSION_FILE: llama_cpp.version
+ UPDATE_BRANCH: automation/bump-llama-cpp
+
+jobs:
+ update-llama-cpp-pin:
+ name: Check for newer llama.cpp release
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Resolve update metadata
+ id: meta
+ env:
+ GH_TOKEN: ${{ github.token }}
+ run: |
+ set -euo pipefail
+
+ CURRENT_TAG="$(tr -d '[:space:]' < "$VERSION_FILE")"
+ if [ -z "$CURRENT_TAG" ]; then
+ echo "error: $VERSION_FILE is empty"
+ exit 1
+ fi
+
+ LATEST_TAG="$(gh release list --repo ggml-org/llama.cpp --limit 1 --json tagName --jq '.[0].tagName')"
+ if [ -z "$LATEST_TAG" ] || [ "$LATEST_TAG" = "null" ]; then
+ echo "error: could not resolve latest ggml-org/llama.cpp release tag"
+ exit 1
+ fi
+
+ echo "current_tag=$CURRENT_TAG" >> "$GITHUB_OUTPUT"
+ echo "latest_tag=$LATEST_TAG" >> "$GITHUB_OUTPUT"
+
+ if [ "$CURRENT_TAG" = "$LATEST_TAG" ]; then
+ echo "update_needed=false" >> "$GITHUB_OUTPUT"
+ echo "skip_reason=$VERSION_FILE already points at $LATEST_TAG" >> "$GITHUB_OUTPUT"
+ exit 0
+ fi
+
+ for pr in $(gh pr list --repo "$GITHUB_REPOSITORY" --state open --json number,headRefName --jq ".[] | select(.headRefName != \"$UPDATE_BRANCH\") | .number"); do
+ if gh pr diff "$pr" --repo "$GITHUB_REPOSITORY" --name-only | grep -qx "$VERSION_FILE"; then
+ echo "update_needed=false" >> "$GITHUB_OUTPUT"
+ echo "skip_reason=PR #$pr already changes $VERSION_FILE; not racing a non-automation PR" >> "$GITHUB_OUTPUT"
+ exit 0
+ fi
+ done
+
+ printf '%s\n' "$LATEST_TAG" > "$VERSION_FILE"
+
+ RELEASE_URL="$(gh release view "$LATEST_TAG" --repo ggml-org/llama.cpp --json url --jq '.url')"
+ COMPARE_URL="https://github.com/ggml-org/llama.cpp/compare/${CURRENT_TAG}...${LATEST_TAG}"
+ echo "release_url=$RELEASE_URL" >> "$GITHUB_OUTPUT"
+ echo "compare_url=$COMPARE_URL" >> "$GITHUB_OUTPUT"
+ echo "update_needed=true" >> "$GITHUB_OUTPUT"
+
+ gh release view "$LATEST_TAG" --repo ggml-org/llama.cpp --json body --jq '.body // ""' > /tmp/llama_cpp_release_notes.md
+ gh api "repos/ggml-org/llama.cpp/compare/${CURRENT_TAG}...${LATEST_TAG}" \
+ --jq '.commits[:80][] | "- " + (.commit.message | split("\n")[0]) + " (`" + (.sha[0:7]) + "`)"' \
+ > /tmp/llama_cpp_commits.md || true
+
+ {
+ echo "## llama.cpp update"
+ echo
+ echo "- Previous pin: \`$CURRENT_TAG\`"
+ echo "- New pin: \`$LATEST_TAG\`"
+ echo "- Upstream release: $RELEASE_URL"
+ echo "- Compare: $COMPARE_URL"
+ echo
+ echo "## Upstream changelog"
+ echo
+ echo ""
+ echo "Release notes for $LATEST_TAG
"
+ echo
+ if [ -s /tmp/llama_cpp_release_notes.md ]; then
+ cat /tmp/llama_cpp_release_notes.md
+ else
+ echo "No GitHub release notes were published for this tag. Use the compare link above as the source of truth."
+ fi
+ echo
+ echo " "
+ echo
+ echo "## Commit range"
+ echo
+ echo ""
+ echo "Commits from $CURRENT_TAG to $LATEST_TAG (first 80)
"
+ echo
+ if [ -s /tmp/llama_cpp_commits.md ]; then
+ cat /tmp/llama_cpp_commits.md
+ else
+ echo "Commit list unavailable; see $COMPARE_URL."
+ fi
+ echo
+ echo " "
+ echo
+ echo "## Web bridge review focus"
+ echo
+ echo "Please pay extra attention to upstream changes touching:"
+ echo
+ echo "- WebGPU, WASM, Emscripten, pthreads, or memory64 build behavior"
+ echo "- ggml backend APIs used by the bridge"
+ echo "- model loading, tokenizer, chat template, context/state persistence, or cache semantics"
+ echo "- CMake/build flags that can affect the generated JS/WASM artifacts"
+ echo
+ echo "## Validation"
+ echo
+ echo "- [ ] Emscripten build passed"
+ echo "- [ ] Browser WebGPU/state-persistence smoke passed"
+ echo "- [ ] Generated bridge artifacts include wasm32 and memory64 outputs"
+ echo "- [ ] No stale hard-coded llama.cpp tag remains in CI/publish defaults"
+ echo
+ echo "## Automation behavior"
+ echo
+ echo "This PR is managed from the stable branch \`$UPDATE_BRANCH\`. If another llama.cpp release appears before merge, the scheduled workflow updates this same PR instead of opening a duplicate. The workflow skips if a non-automation PR already changes \`$VERSION_FILE\`."
+ } > /tmp/llama_cpp_update_pr.md
+
+ - name: Report skip reason
+ if: steps.meta.outputs.update_needed == 'false'
+ run: echo "${{ steps.meta.outputs.skip_reason }}"
+
+ - name: Create or update llama.cpp bump PR
+ id: create-pr
+ if: steps.meta.outputs.update_needed == 'true'
+ uses: peter-evans/create-pull-request@v7
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ branch: ${{ env.UPDATE_BRANCH }}
+ base: main
+ delete-branch: true
+ title: 'chore: bump llama.cpp to ${{ steps.meta.outputs.latest_tag }}'
+ commit-message: 'chore: bump llama.cpp to ${{ steps.meta.outputs.latest_tag }}'
+ body-path: /tmp/llama_cpp_update_pr.md
+ labels: |
+ dependencies
+ automated
+
+ - name: Dispatch CI for automation PR
+ if: steps.create-pr.outputs.pull-request-number != ''
+ env:
+ GH_TOKEN: ${{ github.token }}
+ run: |
+ set -euo pipefail
+ for attempt in 1 2 3; do
+ if gh workflow run ci.yml --repo "$GITHUB_REPOSITORY" --ref "$UPDATE_BRANCH"; then
+ break
+ fi
+ if [ "$attempt" = 3 ]; then
+ echo "error: failed to dispatch CI for $UPDATE_BRANCH after $attempt attempts"
+ exit 1
+ fi
+ sleep $((attempt * 5))
+ done
+ echo "Dispatched CI for #${{ steps.create-pr.outputs.pull-request-number }} on $UPDATE_BRANCH."
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index bc8533b..e52a1e0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,6 +1,7 @@
name: CI
on:
+ workflow_dispatch:
push:
branches: [main]
pull_request:
@@ -12,7 +13,6 @@ jobs:
runs-on: ubuntu-latest
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
- LLAMA_CPP_TAG: b9116
LLAMA_WEBGPU_SMOKE_MODEL_URL: https://huggingface.co/aladar/llama-2-tiny-random-GGUF/resolve/main/llama-2-tiny-random.gguf
LLAMA_WEBGPU_SMOKE_MODEL_SHA256: 81f226c62d28ed4a1a9b9fa080fcd9f0cc40e0f9d5680036583ff98fbcd035cb
LLAMA_WEBGPU_SMOKE_MODEL_CACHE: ~/.cache/llama-web-bridge/state-smoke-models
@@ -20,6 +20,16 @@ jobs:
steps:
- uses: actions/checkout@v4
+ - name: Resolve llama.cpp pin
+ run: |
+ LLAMA_CPP_TAG="$(tr -d '[:space:]' < llama_cpp.version)"
+ if [ -z "$LLAMA_CPP_TAG" ]; then
+ echo "error: llama_cpp.version is empty"
+ exit 1
+ fi
+ echo "LLAMA_CPP_TAG=$LLAMA_CPP_TAG" >> "$GITHUB_ENV"
+ echo "Using llama.cpp ${LLAMA_CPP_TAG}"
+
- name: Validate state persistence API contract
run: python3 scripts/verify_state_persistence_api.py
diff --git a/.github/workflows/publish_assets.yml b/.github/workflows/publish_assets.yml
index 2118959..c2480eb 100644
--- a/.github/workflows/publish_assets.yml
+++ b/.github/workflows/publish_assets.yml
@@ -11,9 +11,9 @@ on:
required: true
default: leehack/llama-web-bridge-assets
llama_cpp_tag:
- description: llama.cpp tag to build from
- required: true
- default: b9116
+ description: Optional llama.cpp tag override; defaults to llama_cpp.version
+ required: false
+ default: ''
push:
tags:
- 'v*'
@@ -22,7 +22,7 @@ env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
ASSETS_TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.assets_tag || github.ref_name }}
ASSETS_REPO: ${{ github.event_name == 'workflow_dispatch' && inputs.assets_repo || 'leehack/llama-web-bridge-assets' }}
- LLAMA_CPP_TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.llama_cpp_tag || 'b9116' }}
+ REQUESTED_LLAMA_CPP_TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.llama_cpp_tag || '' }}
permissions:
contents: read
@@ -31,11 +31,25 @@ jobs:
build-bridge-assets:
name: Build bridge assets
runs-on: ubuntu-latest
+ outputs:
+ llama_cpp_tag: ${{ steps.resolve-publish-parameters.outputs.llama_cpp_tag }}
steps:
- uses: actions/checkout@v4
- name: Resolve publish parameters
+ id: resolve-publish-parameters
run: |
+ if [ -n "$REQUESTED_LLAMA_CPP_TAG" ]; then
+ LLAMA_CPP_TAG="$REQUESTED_LLAMA_CPP_TAG"
+ else
+ LLAMA_CPP_TAG="$(tr -d '[:space:]' < llama_cpp.version)"
+ fi
+ if [ -z "$LLAMA_CPP_TAG" ]; then
+ echo "error: llama.cpp tag is empty"
+ exit 1
+ fi
+ echo "LLAMA_CPP_TAG=$LLAMA_CPP_TAG" >> "$GITHUB_ENV"
+ echo "llama_cpp_tag=$LLAMA_CPP_TAG" >> "$GITHUB_OUTPUT"
echo "event=${{ github.event_name }}"
echo "assets_tag=${ASSETS_TAG}"
echo "assets_repo=${ASSETS_REPO}"
@@ -60,7 +74,6 @@ jobs:
env:
OUT_DIR: ${{ runner.temp }}/webgpu_bridge_dist
ASSETS_TAG: ${{ env.ASSETS_TAG }}
- LLAMA_CPP_TAG: ${{ env.LLAMA_CPP_TAG }}
SOURCE_REPO: ${{ github.repository }}
SOURCE_COMMIT: ${{ github.sha }}
run: |
@@ -191,9 +204,9 @@ jobs:
GH_TOKEN: ${{ secrets.WEBGPU_BRIDGE_ASSETS_PAT }}
ASSETS_TAG: ${{ env.ASSETS_TAG }}
ASSETS_REPO: ${{ env.ASSETS_REPO }}
+ LLAMA_CPP_TAG: ${{ needs.build-bridge-assets.outputs.llama_cpp_tag }}
SOURCE_REPO: ${{ github.repository }}
SOURCE_COMMIT: ${{ github.sha }}
- LLAMA_CPP_TAG: ${{ env.LLAMA_CPP_TAG }}
run: |
NOTES="$(cat < int:
smoke = read_required("scripts/state_persistence_browser_smoke.py", errors)
ci = read_required(".github/workflows/ci.yml", errors)
publish = read_required(".github/workflows/publish_assets.yml", errors)
+ auto_update = read_required(".github/workflows/auto_llama_cpp_update.yml", errors)
+ version = read_required("llama_cpp.version", errors).strip()
agents = read_required("AGENTS.md", errors)
readme = read_required("README.md", errors)
contributing = read_required("CONTRIBUTING.md", errors)
- for name, workflow in (("ci.yml", ci), ("publish_assets.yml", publish)):
+ for name, workflow in (
+ ("ci.yml", ci),
+ ("publish_assets.yml", publish),
+ ("auto_llama_cpp_update.yml", auto_update),
+ ):
require(
"FORCE_JAVASCRIPT_ACTIONS_TO_NODE24" in workflow,
f"{name} must opt into Node 24 action runtime to catch Node 20 deprecation breakage early",
@@ -44,6 +50,46 @@ def main() -> int:
errors,
)
+ require(
+ version.startswith("b") and version[1:].isdigit(),
+ "llama_cpp.version must contain a llama.cpp release tag like b9165",
+ errors,
+ )
+ require(
+ "tr -d '[:space:]' < llama_cpp.version" in ci
+ and "Resolve llama.cpp pin" in ci
+ and "workflow_dispatch:" in ci
+ and "LLAMA_CPP_TAG: b9116" not in ci,
+ "ci.yml must resolve the llama.cpp tag from llama_cpp.version, support explicit dispatch, and avoid hard-coded stale defaults",
+ errors,
+ )
+ require(
+ "REQUESTED_LLAMA_CPP_TAG" in publish
+ and "tr -d '[:space:]' < llama_cpp.version" in publish
+ and "outputs:" in publish
+ and "llama_cpp_tag: ${{ steps.resolve-publish-parameters.outputs.llama_cpp_tag }}" in publish
+ and "LLAMA_CPP_TAG: ${{ needs.build-bridge-assets.outputs.llama_cpp_tag }}" in publish
+ and "default: b9116" not in publish
+ and "|| 'b9116'" not in publish,
+ "publish_assets.yml must default to llama_cpp.version, pass the resolved tag across jobs, and still allow a manual override",
+ errors,
+ )
+ require(
+ "concurrency:" in auto_update
+ and "group: llama-cpp-auto-update" in auto_update
+ and "UPDATE_BRANCH: automation/bump-llama-cpp" in auto_update
+ and "ggml-org/llama.cpp" in auto_update
+ and "create-pull-request" in auto_update
+ and "actions: write" in auto_update
+ and "id: create-pr" in auto_update
+ and "gh workflow run ci.yml --repo \"$GITHUB_REPOSITORY\" --ref \"$UPDATE_BRANCH\"" in auto_update
+ and "body-path: /tmp/llama_cpp_update_pr.md" in auto_update
+ and "Upstream changelog" in auto_update
+ and "not racing a non-automation PR" in auto_update,
+ "auto_llama_cpp_update.yml must update one stable PR branch with upstream changelog context, dispatch CI for bot updates, and avoid racing human PRs",
+ errors,
+ )
+
require(
"--model-url" in smoke and "--model-sha256" in smoke,
"browser smoke must support an integrity-checked model-backed state round-trip",
@@ -97,22 +143,27 @@ def main() -> int:
require(
"Agent PR Workflow" in agents
and "independent review" in agents
- and "state_persistence_browser_smoke.py" in agents,
- "AGENTS.md must document the agent PR workflow and browser smoke expectations",
+ and "state_persistence_browser_smoke.py" in agents
+ and "llama_cpp.version" in agents
+ and "auto_llama_cpp_update.yml" in agents,
+ "AGENTS.md must document the agent PR workflow, browser smoke expectations, and llama.cpp auto-update policy",
errors,
)
require(
"FORCE_JAVASCRIPT_ACTIONS_TO_NODE24" in readme
and "state-persistence-smoke-artifacts" in readme
- and "scripts/verify_ci_reliability.py" in readme,
- "README.md must document CI reliability, diagnostics, and Node 24 action-runtime coverage",
+ and "scripts/verify_ci_reliability.py" in readme
+ and "llama_cpp.version" in readme
+ and "auto_llama_cpp_update.yml" in readme,
+ "README.md must document CI reliability, diagnostics, Node 24 action-runtime coverage, and llama.cpp pin automation",
errors,
)
require(
"Agent Workflow Guardrails" in contributing
and "scripts/verify_ci_reliability.py" in contributing
- and "--model-sha256" in contributing,
- "CONTRIBUTING.md must document maintainer/agent workflow guardrails and checksum-pinned smoke usage",
+ and "--model-sha256" in contributing
+ and "llama_cpp.version" in contributing,
+ "CONTRIBUTING.md must document maintainer/agent workflow guardrails, checksum-pinned smoke usage, and llama.cpp pin handling",
errors,
)