leehack · leehack · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/.github/workflows/auto_llama_cpp_update.yml b/.github/workflows/auto_llama_cpp_update.yml
@@ -0,0 +1,172 @@
+name: Auto-update llama.cpp Pin
+
+on:
+  schedule:
+    # Weekday morning UTC, after upstream llama.cpp usually publishes release tags.
+    - cron: '17 10 * * 1-5'
+  workflow_dispatch:
+
+permissions:
+  actions: write
+  contents: write
+  pull-requests: write
+
+concurrency:
+  group: llama-cpp-auto-update
+  cancel-in-progress: true
+
+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
+  VERSION_FILE: llama_cpp.version
+  UPDATE_BRANCH: automation/bump-llama-cpp
+
+jobs:
+  update-llama-cpp-pin:
+    name: Check for newer llama.cpp release
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Resolve update metadata
+        id: meta
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          set -euo pipefail
+
+          CURRENT_TAG="$(tr -d '[:space:]' < "$VERSION_FILE")"
+          if [ -z "$CURRENT_TAG" ]; then
+            echo "error: $VERSION_FILE is empty"
+            exit 1
+          fi
+
+          LATEST_TAG="$(gh release list --repo ggml-org/llama.cpp --limit 1 --json tagName --jq '.[0].tagName')"
+          if [ -z "$LATEST_TAG" ] || [ "$LATEST_TAG" = "null" ]; then
+            echo "error: could not resolve latest ggml-org/llama.cpp release tag"
+            exit 1
+          fi
+
+          echo "current_tag=$CURRENT_TAG" >> "$GITHUB_OUTPUT"
+          echo "latest_tag=$LATEST_TAG" >> "$GITHUB_OUTPUT"
+
+          if [ "$CURRENT_TAG" = "$LATEST_TAG" ]; then
+            echo "update_needed=false" >> "$GITHUB_OUTPUT"
+            echo "skip_reason=$VERSION_FILE already points at $LATEST_TAG" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          for pr in $(gh pr list --repo "$GITHUB_REPOSITORY" --state open --json number,headRefName --jq ".[] | select(.headRefName != \"$UPDATE_BRANCH\") | .number"); do
+            if gh pr diff "$pr" --repo "$GITHUB_REPOSITORY" --name-only | grep -qx "$VERSION_FILE"; then
+              echo "update_needed=false" >> "$GITHUB_OUTPUT"
+              echo "skip_reason=PR #$pr already changes $VERSION_FILE; not racing a non-automation PR" >> "$GITHUB_OUTPUT"
+              exit 0
+            fi
+          done
+
+          printf '%s\n' "$LATEST_TAG" > "$VERSION_FILE"
+
+          RELEASE_URL="$(gh release view "$LATEST_TAG" --repo ggml-org/llama.cpp --json url --jq '.url')"
+          COMPARE_URL="https://github.com/ggml-org/llama.cpp/compare/${CURRENT_TAG}...${LATEST_TAG}"
+          echo "release_url=$RELEASE_URL" >> "$GITHUB_OUTPUT"
+          echo "compare_url=$COMPARE_URL" >> "$GITHUB_OUTPUT"
+          echo "update_needed=true" >> "$GITHUB_OUTPUT"
+
+          gh release view "$LATEST_TAG" --repo ggml-org/llama.cpp --json body --jq '.body // ""' > /tmp/llama_cpp_release_notes.md
+          gh api "repos/ggml-org/llama.cpp/compare/${CURRENT_TAG}...${LATEST_TAG}" \
+            --jq '.commits[:80][] | "- " + (.commit.message | split("\n")[0]) + " (`" + (.sha[0:7]) + "`)"' \
+            > /tmp/llama_cpp_commits.md || true
+
+          {
+            echo "## llama.cpp update"
+            echo
+            echo "- Previous pin: \`$CURRENT_TAG\`"
+            echo "- New pin: \`$LATEST_TAG\`"
+            echo "- Upstream release: $RELEASE_URL"
+            echo "- Compare: $COMPARE_URL"
+            echo
+            echo "## Upstream changelog"
+            echo
+            echo "<details>"
+            echo "<summary>Release notes for $LATEST_TAG</summary>"
+            echo
+            if [ -s /tmp/llama_cpp_release_notes.md ]; then
+              cat /tmp/llama_cpp_release_notes.md
+            else
+              echo "No GitHub release notes were published for this tag. Use the compare link above as the source of truth."
+            fi
+            echo
+            echo "</details>"
+            echo
+            echo "## Commit range"
+            echo
+            echo "<details>"
+            echo "<summary>Commits from $CURRENT_TAG to $LATEST_TAG (first 80)</summary>"
+            echo
+            if [ -s /tmp/llama_cpp_commits.md ]; then
+              cat /tmp/llama_cpp_commits.md
+            else
+              echo "Commit list unavailable; see $COMPARE_URL."
+            fi
+            echo
+            echo "</details>"
+            echo
+            echo "## Web bridge review focus"
+            echo
+            echo "Please pay extra attention to upstream changes touching:"
+            echo
+            echo "- WebGPU, WASM, Emscripten, pthreads, or memory64 build behavior"
+            echo "- ggml backend APIs used by the bridge"
+            echo "- model loading, tokenizer, chat template, context/state persistence, or cache semantics"
+            echo "- CMake/build flags that can affect the generated JS/WASM artifacts"
+            echo
+            echo "## Validation"
+            echo
+            echo "- [ ] Emscripten build passed"
+            echo "- [ ] Browser WebGPU/state-persistence smoke passed"
+            echo "- [ ] Generated bridge artifacts include wasm32 and memory64 outputs"
+            echo "- [ ] No stale hard-coded llama.cpp tag remains in CI/publish defaults"
+            echo
+            echo "## Automation behavior"
+            echo
+            echo "This PR is managed from the stable branch \`$UPDATE_BRANCH\`. If another llama.cpp release appears before merge, the scheduled workflow updates this same PR instead of opening a duplicate. The workflow skips if a non-automation PR already changes \`$VERSION_FILE\`."
+          } > /tmp/llama_cpp_update_pr.md
+
+      - name: Report skip reason
+        if: steps.meta.outputs.update_needed == 'false'
+        run: echo "${{ steps.meta.outputs.skip_reason }}"
+
+      - name: Create or update llama.cpp bump PR
+        id: create-pr
+        if: steps.meta.outputs.update_needed == 'true'
+        uses: peter-evans/create-pull-request@v7
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          branch: ${{ env.UPDATE_BRANCH }}
+          base: main
+          delete-branch: true
+          title: 'chore: bump llama.cpp to ${{ steps.meta.outputs.latest_tag }}'
+          commit-message: 'chore: bump llama.cpp to ${{ steps.meta.outputs.latest_tag }}'
+          body-path: /tmp/llama_cpp_update_pr.md
+          labels: |
+            dependencies
+            automated
+
+      - name: Dispatch CI for automation PR
+        if: steps.create-pr.outputs.pull-request-number != ''
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          set -euo pipefail
+          for attempt in 1 2 3; do
+            if gh workflow run ci.yml --repo "$GITHUB_REPOSITORY" --ref "$UPDATE_BRANCH"; then
+              break
+            fi
+            if [ "$attempt" = 3 ]; then
+              echo "error: failed to dispatch CI for $UPDATE_BRANCH after $attempt attempts"
+              exit 1
+            fi
+            sleep $((attempt * 5))
+          done
+          echo "Dispatched CI for #${{ steps.create-pr.outputs.pull-request-number }} on $UPDATE_BRANCH."
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,6 +1,7 @@
 name: CI
 
 on:
+  workflow_dispatch:
   push:
     branches: [main]
   pull_request:
@@ -12,14 +13,23 @@ jobs:
     runs-on: ubuntu-latest
     env:
       FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
-      LLAMA_CPP_TAG: b9116
       LLAMA_WEBGPU_SMOKE_MODEL_URL: https://huggingface.co/aladar/llama-2-tiny-random-GGUF/resolve/main/llama-2-tiny-random.gguf
       LLAMA_WEBGPU_SMOKE_MODEL_SHA256: 81f226c62d28ed4a1a9b9fa080fcd9f0cc40e0f9d5680036583ff98fbcd035cb
       LLAMA_WEBGPU_SMOKE_MODEL_CACHE: ~/.cache/llama-web-bridge/state-smoke-models
       LLAMA_WEBGPU_SMOKE_ARTIFACTS_DIR: /tmp/state-persistence-smoke-artifacts
     steps:
       - uses: actions/checkout@v4
 
+      - name: Resolve llama.cpp pin
+        run: |
+          LLAMA_CPP_TAG="$(tr -d '[:space:]' < llama_cpp.version)"
+          if [ -z "$LLAMA_CPP_TAG" ]; then
+            echo "error: llama_cpp.version is empty"
+            exit 1
+          fi
+          echo "LLAMA_CPP_TAG=$LLAMA_CPP_TAG" >> "$GITHUB_ENV"
+          echo "Using llama.cpp ${LLAMA_CPP_TAG}"
+
       - name: Validate state persistence API contract
         run: python3 scripts/verify_state_persistence_api.py
 

diff --git a/.github/workflows/publish_assets.yml b/.github/workflows/publish_assets.yml
@@ -11,9 +11,9 @@ on:
         required: true
         default: leehack/llama-web-bridge-assets
       llama_cpp_tag:
-        description: llama.cpp tag to build from
-        required: true
-        default: b9116
+        description: Optional llama.cpp tag override; defaults to llama_cpp.version
+        required: false
+        default: ''
   push:
     tags:
       - 'v*'
@@ -22,7 +22,7 @@ env:
   FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
   ASSETS_TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.assets_tag || github.ref_name }}
   ASSETS_REPO: ${{ github.event_name == 'workflow_dispatch' && inputs.assets_repo || 'leehack/llama-web-bridge-assets' }}
-  LLAMA_CPP_TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.llama_cpp_tag || 'b9116' }}
+  REQUESTED_LLAMA_CPP_TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.llama_cpp_tag || '' }}
 
 permissions:
   contents: read
@@ -31,11 +31,25 @@ jobs:
   build-bridge-assets:
     name: Build bridge assets
     runs-on: ubuntu-latest
+    outputs:
+      llama_cpp_tag: ${{ steps.resolve-publish-parameters.outputs.llama_cpp_tag }}
     steps:
       - uses: actions/checkout@v4
 
       - name: Resolve publish parameters
+        id: resolve-publish-parameters
         run: |
+          if [ -n "$REQUESTED_LLAMA_CPP_TAG" ]; then
+            LLAMA_CPP_TAG="$REQUESTED_LLAMA_CPP_TAG"
+          else
+            LLAMA_CPP_TAG="$(tr -d '[:space:]' < llama_cpp.version)"
+          fi
+          if [ -z "$LLAMA_CPP_TAG" ]; then
+            echo "error: llama.cpp tag is empty"
+            exit 1
+          fi
+          echo "LLAMA_CPP_TAG=$LLAMA_CPP_TAG" >> "$GITHUB_ENV"
+          echo "llama_cpp_tag=$LLAMA_CPP_TAG" >> "$GITHUB_OUTPUT"
           echo "event=${{ github.event_name }}"
           echo "assets_tag=${ASSETS_TAG}"
           echo "assets_repo=${ASSETS_REPO}"
@@ -60,7 +74,6 @@ jobs:
         env:
           OUT_DIR: ${{ runner.temp }}/webgpu_bridge_dist
           ASSETS_TAG: ${{ env.ASSETS_TAG }}
-          LLAMA_CPP_TAG: ${{ env.LLAMA_CPP_TAG }}
           SOURCE_REPO: ${{ github.repository }}
           SOURCE_COMMIT: ${{ github.sha }}
         run: |
@@ -191,9 +204,9 @@ jobs:
           GH_TOKEN: ${{ secrets.WEBGPU_BRIDGE_ASSETS_PAT }}
           ASSETS_TAG: ${{ env.ASSETS_TAG }}
           ASSETS_REPO: ${{ env.ASSETS_REPO }}
+          LLAMA_CPP_TAG: ${{ needs.build-bridge-assets.outputs.llama_cpp_tag }}
           SOURCE_REPO: ${{ github.repository }}
           SOURCE_COMMIT: ${{ github.sha }}
-          LLAMA_CPP_TAG: ${{ env.LLAMA_CPP_TAG }}
         run: |
           NOTES="$(cat <<EOF
           Bridge assets published from ${SOURCE_REPO}@${SOURCE_COMMIT} (llama.cpp ${LLAMA_CPP_TAG}).

diff --git a/AGENTS.md b/AGENTS.md
@@ -27,7 +27,8 @@ Common maintainer sibling layout:
 
 Useful environment overrides:
 
-- `LLAMA_CPP_DIR`
+- `LLAMA_CPP_DIR` (defaults to `third_party/llama_cpp`; CI clones the tag from
+  `llama_cpp.version`)
 - `BUILD_DIR`
 - `OUT_DIR`
 - `CMAKE_BUILD_TYPE`
@@ -87,6 +88,18 @@ python3 scripts/state_persistence_browser_smoke.py \
 ## CI / Release
 
 - CI build gate: `.github/workflows/ci.yml`
+  - Resolves the default llama.cpp checkout from `llama_cpp.version`.
+- Automated llama.cpp bump PR: `.github/workflows/auto_llama_cpp_update.yml`
+  - Runs on a schedule/manual dispatch, compares `llama_cpp.version` against the
+    latest `ggml-org/llama.cpp` release, and manages one stable
+    `automation/bump-llama-cpp` PR.
+  - The PR body must include the upstream release notes, compare URL, commit
+    range, and WebGPU/WASM review focus. If a newer upstream release appears
+    while the PR is still open, update the same PR instead of opening a duplicate.
+  - Dispatch the CI workflow on the automation branch after creating/updating
+    the PR so `GITHUB_TOKEN` branch updates still receive head-SHA validation.
+  - Skip instead of racing when a non-automation PR already changes
+    `llama_cpp.version`.
 - CI reliability contract: `scripts/verify_ci_reliability.py`
   - Keep this script updated when changing browser smoke behavior, action
     versions, or workflow diagnostics.
@@ -98,6 +111,10 @@ python3 scripts/state_persistence_browser_smoke.py \
     `FORCE_JAVASCRIPT_ACTIONS_TO_NODE24` so action-runtime regressions are caught
     before Node 20 deprecation becomes a hard failure.
 - Publish workflow: `.github/workflows/publish_assets.yml`
+  - Defaults to `llama_cpp.version`; workflow-dispatch `llama_cpp_tag` is only a
+    temporary explicit override.
+  - Passes the resolved `llama.cpp` tag as a job output so asset release notes
+    match the generated manifest.
   - Requires `WEBGPU_BRIDGE_ASSETS_PAT`
   - Pushes assets + tag to `llama-web-bridge-assets`
 

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -11,7 +11,7 @@ Published artifacts are consumed from `llama-web-bridge-assets`.
 
 - Emscripten SDK (`emcmake`, `emcc`)
 - CMake toolchain
-- Access to a llama.cpp checkout
+- Access to a llama.cpp checkout matching `llama_cpp.version`
 
 ## Setup
 
@@ -80,8 +80,16 @@ query strings, and fragments before printing the location.
 ## Agent Workflow Guardrails
 
 - Keep workflow reliability rules in `scripts/verify_ci_reliability.py` when
-  changing `.github/workflows/ci.yml`, `.github/workflows/publish_assets.yml`, or
+  changing `.github/workflows/ci.yml`, `.github/workflows/publish_assets.yml`,
+  `.github/workflows/auto_llama_cpp_update.yml`, or
   `scripts/state_persistence_browser_smoke.py`.
+- Preserve `llama_cpp.version` as the single source of truth for default CI and
+  publish builds. Manual publish overrides are allowed for temporary validation,
+  but tag-triggered publishes should use the pinned file.
+- The auto-update workflow manages the stable `automation/bump-llama-cpp` branch
+  and updates an existing PR instead of opening duplicates. It should include the
+  upstream release notes, compare link, and commit range in the PR body, then
+  dispatch CI on the automation branch so bot-token updates are validated.
 - Preserve `FORCE_JAVASCRIPT_ACTIONS_TO_NODE24` in CI and publish workflows so
   GitHub Action runtime changes are detected before they become mandatory.
 - Upload state-persistence smoke diagnostics only on failure; successful CI runs
@@ -94,10 +102,12 @@ query strings, and fragments before printing the location.
 Use workflow `.github/workflows/publish_assets.yml`:
 
 1. Set input `assets_tag` (new tag).
-2. Optionally set `assets_repo` and `llama_cpp_tag`.
+2. Optionally set `assets_repo`; leave `llama_cpp_tag` empty to use
+   `llama_cpp.version`, or set it only for an explicit temporary override.
 3. Ensure `WEBGPU_BRIDGE_ASSETS_PAT` secret is configured.
 4. Workflow builds, generates `manifest.json`/`sha256sums.txt`, pushes to
-   assets repo, and creates matching tag there.
+   assets repo, creates matching tag there, and uses the build job's resolved
+   `llama.cpp` tag output in release notes.
 
 ## Repository Boundaries