diff --git a/.github/workflows/collectivex-sweep.yml b/.github/workflows/collectivex-sweep.yml
index 7ddaca285c..489bca7888 100644
--- a/.github/workflows/collectivex-sweep.yml
+++ b/.github/workflows/collectivex-sweep.yml
@@ -1,71 +1,300 @@
 # CollectiveX Sweep — one structured run instead of thousands of dispatches.
 #
-# Shape (mirrors the InferenceX CI tracker): setup -> sweep (a MATRIX job = "a job with other jobs
-# in it") -> aggregate (the collector "at the end"). The matrix unit is a SHARD = one allocation that
-# sweeps many cases sharing (sku, backend, mode, resource) — generate_matrix's own grouping, chunked
-# so no cell exceeds the job budget. Each cell emits a handful of per-case JSONs; the aggregate job
-# collects every shard into ONE line-delimited file (results/aggregate/*.ndjson) so there aren't
-# thousands of individual result files. Run once per backend (deepep / uccl / flashinfer /
-# deepep-hybrid / nccl-ep, + deepep_v2) for full parity.
+# Shape: setup -> sweep. The matrix unit is a shard: one allocation that sweeps
+# cases sharing (sku, backend, nodes). Each cell uploads its privacy-checked raw
+# result JSONs. The isolated v1 publisher consumes downloaded shards separately.
 name: CollectiveX Sweep
+permissions:
+  contents: read
 on:
   workflow_dispatch:
     inputs:
       backend:
-        description: EP library to sweep (deepep matrix is remapped onto the others, capability-filtered)
+        description: "EP library to sweep — 'all' runs every EP backend in one matrix"
         type: choice
-        default: deepep
-        options: [deepep, uccl, flashinfer, deepep-hybrid, nccl-ep]
-      deepep_v2:
-        description: DeepEP V2 from-source kernels (kernel_gen=v2; deepep backend only)
-        type: boolean
-        default: false
+        default: all
+        options: [all, deepep, deepep-v2, uccl, deepep-hybrid, mori, nccl-ep]
       suites:
         description: "'all' or comma-list of suite names"
         type: string
         default: all
       only_sku:
-        description: Restrict to one SKU (h100-dgxc|h200|b300|b200-dgxc|gb200|gb300|mi355x); blank = all
+        description: Restrict to one GHA runner pool (h100-dgxc|h200-dgxc|b300|b200-dgxc|gb200|gb300|mi325x|mi355x); blank = all
+        type: string
+        default: ''
+      min_nodes:
+        description: Keep only shards with >= this tray count (2 = rack-scale EP8 only; blank = all)
+        type: string
+        default: ''
+      max_nodes:
+        description: Keep only shards with <= this tray count (1 = single-tray EP4 only; blank = all)
         type: string
         default: ''
       max_cases:
-        description: Max cases per shard cell (chunk larger shards)
+        description: Max cases per shard cell before chunking into another GHA job (128 = no chunking for current suites)
         type: string
-        default: '14'
-
+        default: '128'
+      diagnostic_execution:
+        description: Temporary retained-log execution identity; blank runs the benchmark
+        type: string
+        default: ''
 concurrency:
-  group: cx-sweep-${{ github.ref }}-${{ inputs.backend }}-${{ inputs.deepep_v2 }}-${{ inputs.only_sku }}
+  group: ${{ inputs.diagnostic_execution != '' && format('cx-diagnostic-{0}', inputs.diagnostic_execution) || format('cx-sweep-{0}-{1}-{2}', github.ref, inputs.backend, inputs.only_sku) }}
   cancel-in-progress: false
 
 jobs:
+  diagnostic:
+    if: ${{ inputs.diagnostic_execution != '' }}
+    runs-on: ${{ 'h100-dgxc' }}
+    timeout-minutes: 5
+    env:
+      EXECUTION_ID: ${{ inputs.diagnostic_execution }}
+    steps:
+      - name: Classify retained private log without disclosing it
+        run: |
+          python3 - <<'PY'
+          import hashlib
+          import json
+          import os
+          import re
+          import stat
+
+          execution = os.environ.get("EXECUTION_ID", "")
+          expected = "28706865182_1_h100-dgxc-deepep-v2-n1"
+          if execution != expected:
+              raise SystemExit("invalid diagnostic request")
+          root = f"/tmp/inferencex-collectivex-{os.getuid()}/{expected}"
+          try:
+              root_fd = os.open(
+                  root, os.O_RDONLY | os.O_DIRECTORY | os.O_CLOEXEC | os.O_NOFOLLOW
+              )
+          except OSError:
+              raise SystemExit("retained diagnostic unavailable") from None
+          metadata = os.fstat(root_fd)
+          if (
+              not stat.S_ISDIR(metadata.st_mode)
+              or metadata.st_uid != os.getuid()
+              or stat.S_IMODE(metadata.st_mode) != 0o700
+          ):
+              raise SystemExit("private diagnostic directory is unsafe")
+
+          native_status = {}
+          native_sites = {}
+          exceptions = {}
+          trace_sites = {}
+          terms = {}
+          digests = []
+          total = 0
+          logs = 0
+          for name in sorted(os.listdir(root_fd)):
+              if not re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9._-]{0,127}[.]log", name):
+                  continue
+              fd = -1
+              try:
+                  fd = os.open(
+                      name,
+                      os.O_RDONLY | os.O_CLOEXEC | os.O_NOFOLLOW,
+                      dir_fd=root_fd,
+                  )
+                  item = os.fstat(fd)
+                  if (
+                      not stat.S_ISREG(item.st_mode)
+                      or item.st_uid != os.getuid()
+                      or stat.S_IMODE(item.st_mode) & 0o077
+                      or item.st_nlink != 1
+                      or item.st_size > 64 * 1024 * 1024
+                  ):
+                      raise RuntimeError
+                  chunks = []
+                  remaining = item.st_size
+                  while remaining:
+                      chunk = os.read(fd, min(1024 * 1024, remaining))
+                      if not chunk:
+                          raise RuntimeError
+                      chunks.append(chunk)
+                      remaining -= len(chunk)
+                  if os.read(fd, 1):
+                      raise RuntimeError
+                  payload = b"".join(chunks)
+              except (OSError, RuntimeError):
+                  raise SystemExit("retained diagnostic validation failed") from None
+              finally:
+                  if fd >= 0:
+                      os.close(fd)
+              logs += 1
+              total += len(payload)
+              digests.append(hashlib.sha256(payload).digest())
+              for line in payload.splitlines():
+                  lower = line.lower()
+                  match = re.search(rb" exception \([^()\n]*:[0-9]{1,6}\):\s*([0-9]{1,6})", line)
+                  if match:
+                      key = match.group(1).decode("ascii")
+                      native_status[key] = native_status.get(key, 0) + 1
+                  native_site = re.search(
+                      rb" exception \([^()\n]*/(nccl[.]cu):([0-9]{1,6})\):\s*([0-9]{1,6})",
+                      line,
+                  )
+                  if native_site:
+                      key = ":".join(
+                          part.decode("ascii") for part in native_site.groups()
+                      )
+                      native_sites[key] = native_sites.get(key, 0) + 1
+                  for found in re.finditer(
+                      rb"(?<![A-Za-z0-9_])([A-Za-z_][A-Za-z0-9_]*(?:Error|Exception))(?=[:\s]|$)",
+                      line,
+                  ):
+                      exception = found.group(1).decode("ascii", errors="ignore")
+                      if exception not in {"ChildFailedError", "RuntimeError"}:
+                          exception = "other"
+                      exceptions[exception] = exceptions.get(exception, 0) + 1
+                  trace_site = re.search(
+                      rb"File ['\"][^'\"]*/([A-Za-z_][A-Za-z0-9_.-]*[.]py)['\"], line ([0-9]{1,6})",
+                      line,
+                  )
+                  if trace_site:
+                      source = trace_site.group(1).decode("ascii")
+                      if source not in {
+                          "elastic.py", "ep_deepep_v2.py", "ep_harness.py", "run_ep.py"
+                      }:
+                          source = "other"
+                      key = f"{source}:{trace_site.group(2).decode('ascii')}"
+                      trace_sites[key] = trace_sites.get(key, 0) + 1
+                  for label, pattern in {
+                      "driver-insufficient": rb"driver version is insufficient",
+                      "invalid-argument": rb"invalid argument",
+                      "invalid-device-function": rb"invalid device function",
+                      "invalid-usage": rb"invalid usage",
+                      "not-supported": rb"not supported|operation not supported",
+                      "unhandled-cuda": rb"unhandled cuda error",
+                      "illegal-memory": rb"illegal memory access|misaligned address",
+                      "timeout": rb"timed out|timeout",
+                      "process-killed": rb"\bkilled\b|signal 9|signal 15",
+                  }.items():
+                      if re.search(pattern, lower):
+                          terms[label] = terms.get(label, 0) + 1
+          os.close(root_fd)
+          if logs == 0:
+              raise SystemExit("retained diagnostic validation failed")
+          print(json.dumps({
+              "exception_types": exceptions,
+              "log_count": logs,
+              "native_sites": native_sites,
+              "native_status": native_status,
+              "payload_bytes": total,
+              "payload_set_sha256": hashlib.sha256(b"".join(sorted(digests))).hexdigest(),
+              "runtime_terms": terms,
+              "trace_sites": trace_sites,
+          }, sort_keys=True))
+          PY
+
   # ---- setup: resolve the suites into the shard matrix (the "pending jobs" node) ----
   setup:
+    if: ${{ inputs.diagnostic_execution == '' }}
     runs-on: ubuntu-latest
     outputs:
       matrix: ${{ steps.gen.outputs.matrix }}
       n: ${{ steps.gen.outputs.n }}
     steps:
       - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v5.0.0
-        with: { clean: true }
-      - run: pip install --quiet pyyaml
+        with: { clean: true, persist-credentials: false }
+      - name: Install matrix dependencies
+        run: python3 -m pip install --quiet PyYAML==6.0.2
       - id: gen
         working-directory: experimental/CollectiveX
+        env:
+          INPUT_BACKEND: ${{ inputs.backend }}
+          INPUT_SUITES: ${{ inputs.suites }}
+          INPUT_ONLY_SKU: ${{ inputs.only_sku }}
+          INPUT_MIN_NODES: ${{ inputs.min_nodes }}
+          INPUT_MAX_NODES: ${{ inputs.max_nodes }}
+          INPUT_MAX_CASES: ${{ inputs.max_cases }}
+          COLLECTIVEX_SOURCE_SHA: ${{ github.sha }}
+          COLLECTIVEX_ARTIFACT_NAME: cxunsupported-${{ github.run_id }}-${{ github.run_attempt }}
+          COLLECTIVEX_EXECUTION_ID: ${{ github.run_id }}_${{ github.run_attempt }}_unsupported
         run: |
           set -euo pipefail
-          ov=""; [ "${{ inputs.backend }}" != "deepep" ] && ov="--backend ${{ inputs.backend }}"
-          v2=""; [ "${{ inputs.deepep_v2 }}" = "true" ] && v2="--deepep-v2"
-          os=""; [ -n "${{ inputs.only_sku }}" ] && os="--only-sku ${{ inputs.only_sku }}"
-          # full matrix (with cases) -> artifact for the cells; slim (no cases) -> the strategy output.
-          python3 sweep_matrix.py --suites "${{ inputs.suites }}" --max-cases "${{ inputs.max_cases }}" $ov $v2 $os --out matrix_full.json >/dev/null
-          SLIM=$(python3 -c "import json;m=json.load(open('matrix_full.json'));print(json.dumps({'include':[{k:v for k,v in x.items() if k!='cases'} for x in m['include']]}))")
-          echo "matrix=$SLIM" >> "$GITHUB_OUTPUT"
-          echo "n=$(python3 -c "import json;print(len(json.load(open('matrix_full.json'))['include']))")" >> "$GITHUB_OUTPUT"
-          python3 -c "import json;m=json.load(open('matrix_full.json'));print('shard-cells:',len(m['include']),'cases:',sum(x['n'] for x in m['include']))"
+          args=(--suites "$INPUT_SUITES" --max-cases "$INPUT_MAX_CASES")
+          case "$INPUT_BACKEND" in
+            all) args+=(--backends all) ;;
+            *) args+=(--backend "$INPUT_BACKEND") ;;
+          esac
+          [ -n "$INPUT_ONLY_SKU" ] && args+=(--only-sku "$INPUT_ONLY_SKU")
+          [ -n "$INPUT_MIN_NODES" ] && args+=(--min-nodes "$INPUT_MIN_NODES")
+          [ -n "$INPUT_MAX_NODES" ] && args+=(--max-nodes "$INPUT_MAX_NODES")
+          python3 sweep_matrix.py "${args[@]}" --out matrix_full.json >/dev/null
+          python3 artifact_safety.py matrix_full.json
+          SLIM=$(python3 -c "import json;m=json.load(open('matrix_full.json'));print(json.dumps({'include':[{k:v for k,v in x.items() if k!='case_ids'} for x in m['include']]}))")
+          {
+            echo "matrix=$SLIM"
+            echo "n=$(python3 -c "import json;print(len(json.load(open('matrix_full.json'))['include']))")"
+            echo "source_backends=$(python3 -c "import json;m=json.load(open('matrix_full.json'));print(' '.join(sorted({x['backend'] for x in m['include']} & {'deepep-v2','deepep-hybrid'})))")"
+          } >> "$GITHUB_OUTPUT"
+          unsupported_n=$(python3 -c "import json;m=json.load(open('matrix_full.json'));print(sum(x['disposition']=='unsupported' for x in m['requested_cases']))")
+          echo "unsupported_n=$unsupported_n" >> "$GITHUB_OUTPUT"
+          if [ "$unsupported_n" -gt 0 ]; then
+            python3 sweep_matrix.py --emit-unsupported-from matrix_full.json \
+              --out-dir unsupported
+          fi
+          python3 -c "import json;m=json.load(open('matrix_full.json'));r=m['requested_cases'];print('shard-cells:',len(m['include']),'runnable:',sum(x['disposition']=='runnable' for x in r),'unsupported:',sum(x['disposition']=='unsupported' for x in r))"
+      - name: Prepare pinned backend source archive
+        if: ${{ steps.gen.outputs.source_backends != '' }}
+        working-directory: experimental/CollectiveX
+        env:
+          SOURCE_BACKENDS: ${{ steps.gen.outputs.source_backends }}
+          COLLECTIVEX_EXECUTION_ID: ${{ github.run_id }}_${{ github.run_attempt }}_sources
+        run: |
+          set -euo pipefail
+          source runtime/common.sh
+          work="$RUNNER_TEMP/collectivex-backend-sources"
+          archive="$RUNNER_TEMP/collectivex-backend-sources.tar"
+          rm -rf -- "$work" "$archive"
+          umask 077
+          mkdir -m 700 "$work"
+          mkdir -p "$work/experimental/CollectiveX"
+          read -r -a backends <<< "$SOURCE_BACKENDS"
+          [ "${#backends[@]}" -gt 0 ]
+          for backend in "${backends[@]}"; do
+            cx_prepare_backend_source "$work" "$backend"
+          done
+          cx_cleanup_private_logs 0
+          tar --sort=name --mtime='@1' --owner=0 --group=0 --numeric-owner \
+            -C "$work/experimental/CollectiveX" -cf "$archive" .cx_sources
+          sha256sum "$archive"
+          rm -rf -- "$work"
+      - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        if: ${{ steps.gen.outputs.source_backends != '' }}
+        with:
+          name: cxbackend-sources-${{ github.run_id }}-${{ github.run_attempt }}
+          path: ${{ runner.temp }}/collectivex-backend-sources.tar
+          if-no-files-found: error
+          retention-days: 3
       - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
-          name: cxsweep-matrix-${{ github.run_id }}
+          name: cxsweep-matrix-${{ github.run_id }}-${{ github.run_attempt }}
           path: experimental/CollectiveX/matrix_full.json
           if-no-files-found: error
+      - name: Validate unsupported artifact safety
+        id: unsupported_safety
+        if: ${{ always() && fromJSON(steps.gen.outputs.unsupported_n) > 0 }}
+        run: |
+          python3 experimental/CollectiveX/artifact_safety.py experimental/CollectiveX/unsupported/*.json
+      - name: Validate unsupported outcomes
+        id: unsupported_contracts
+        if: ${{ always() && fromJSON(steps.gen.outputs.unsupported_n) > 0 && steps.unsupported_safety.outcome == 'success' }}
+        env:
+          COLLECTIVEX_ARTIFACT_NAME: cxunsupported-${{ github.run_id }}-${{ github.run_attempt }}
+          COLLECTIVEX_EXECUTION_ID: ${{ github.run_id }}_${{ github.run_attempt }}_unsupported
+        run: |
+          python3 experimental/CollectiveX/contracts.py validate-delivery \
+            --source experimental/CollectiveX/matrix_full.json \
+            --disposition unsupported \
+            experimental/CollectiveX/unsupported/*.json
+      - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        if: ${{ always() && fromJSON(steps.gen.outputs.unsupported_n) > 0 && steps.unsupported_contracts.outcome == 'success' && steps.unsupported_safety.outcome == 'success' }}
+        with:
+          name: cxunsupported-${{ github.run_id }}-${{ github.run_attempt }}
+          path: experimental/CollectiveX/unsupported/*.json
+          if-no-files-found: error
 
   # ---- sweep: ONE matrix cell per shard (the parent job with child jobs) ----
   sweep:
@@ -73,82 +302,248 @@ jobs:
     if: ${{ fromJSON(needs.setup.outputs.n) > 0 }}
     strategy:
       fail-fast: false
-      max-parallel: 10            # don't saturate the ~20-runner fleet; cells queue as slots free
+      max-parallel: 10
       matrix: ${{ fromJSON(needs.setup.outputs.matrix) }}
-    # h200 label spans two clusters; pin to the validated dgxc pool (mirrors collectivex-experimental).
-    runs-on: ${{ matrix.sku == 'h200' && 'h200-dgxc' || matrix.sku }}
+    runs-on: ${{ matrix.sku }}
     timeout-minutes: 350
     env:
       CX_BENCH: ${{ matrix.backend }}
-      CX_DEEPEP_V2: ${{ matrix.deepep_v2 && '1' || '' }}
       CX_NODES: ${{ matrix.nodes }}
-      CX_SHARD_FILE: results/.shard_${{ matrix.id }}.json
+      CX_GPUS_PER_NODE: ${{ matrix.gpus_per_node }}
+      CX_SCALE_UP_DOMAIN: ${{ matrix.scale_up_domain }}
+      CX_SHARD_FILE: .shards/${{ matrix.id }}.json
+      CX_SHARD_SKU: ${{ matrix.sku }}
+      COLLECTIVEX_CANONICAL_GHA: '1'
       COLLECTIVEX_SOURCE_SHA: ${{ github.sha }}
-      CX_NODELIST: ${{ matrix.sku == 'mi355x' && 'mia1-p01-g10,mia1-p01-g15' || '' }}
-      CX_STAGE_DIR: ${{ matrix.sku == 'gb200' && '/mnt/lustre01/users-public/sa-shared/cx-stage' || '' }}
+      COLLECTIVEX_ARTIFACT_NAME: cxshard-${{ matrix.id }}-${{ github.run_id }}-${{ github.run_attempt }}
+      # Consolidated shards run one bounded build-group in one Slurm allocation, so
+      # the launcher's default 45-min --time is too short. 300 min covers a cold
+      # compute-node image import plus the shard. The allocation releases early
+      # when the shard finishes, so short shards don't waste it.
+      CX_TIME: '300'
+      COLLECTIVEX_EXECUTION_ID: ${{ github.run_id }}_${{ github.run_attempt }}_${{ matrix.id }}
+      CX_JOB_ROOT: /tmp/inferencex-collectivex-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.id }}
+      CX_SOURCE_ROOT: /tmp/inferencex-collectivex-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.id }}/source
+      HOME: /tmp/inferencex-collectivex-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.id }}/home
     steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v5.0.0
-        with: { clean: true }
+      - name: Prepare isolated source
+        id: source
+        env:
+          COLLECTIVEX_REPOSITORY: ${{ github.repository }}
+        run: |
+          set -euo pipefail
+          python3 - <<'PY'
+          import os
+          import re
+          import shutil
+          import stat
+          import time
+
+          pattern = re.compile(r"inferencex-collectivex-[0-9]+-[0-9]+-[A-Za-z0-9._-]+")
+          cutoff = time.time() - 86400
+          for entry in os.scandir("/tmp"):
+              if not pattern.fullmatch(entry.name):
+                  continue
+              try:
+                  metadata = entry.stat(follow_symlinks=False)
+              except FileNotFoundError:
+                  continue
+              if (
+                  not stat.S_ISDIR(metadata.st_mode)
+                  or metadata.st_uid != os.getuid()
+                  or stat.S_IMODE(metadata.st_mode) != 0o700
+                  or metadata.st_mtime >= cutoff
+              ):
+                  continue
+              marked = False
+              for marker_name in ("cleanup-safe", "cleanup-unsafe"):
+                  try:
+                      marker = os.stat(
+                          os.path.join(entry.path, marker_name), follow_symlinks=False
+                      )
+                  except FileNotFoundError:
+                      continue
+                  marked = (
+                      stat.S_ISREG(marker.st_mode)
+                      and marker.st_uid == os.getuid()
+                      and stat.S_IMODE(marker.st_mode) == 0o600
+                  )
+                  if marked:
+                      break
+              if marked:
+                  shutil.rmtree(entry.path)
+          PY
+          [[ "$CX_JOB_ROOT" =~ ^/tmp/inferencex-collectivex-[0-9]+-[0-9]+-[A-Za-z0-9._-]+$ ]] \
+            || { echo "CollectiveX isolated root is invalid" >&2; exit 1; }
+          [ "$CX_SOURCE_ROOT" = "$CX_JOB_ROOT/source" ] \
+            || { echo "CollectiveX source root is invalid" >&2; exit 1; }
+          if [ -e "$CX_JOB_ROOT" ] || [ -L "$CX_JOB_ROOT" ]; then
+            echo "CollectiveX isolated root already exists" >&2
+            exit 1
+          fi
+          umask 077
+          mkdir -m 700 -- "$CX_JOB_ROOT"
+          trap 'rc=$?; [ "$rc" = 0 ] || rm -rf -- "$CX_JOB_ROOT"; exit "$rc"' EXIT
+          mkdir -m 700 -- "$HOME" "$CX_JOB_ROOT/control" "$CX_JOB_ROOT/artifact" "$CX_SOURCE_ROOT"
+          : > "$CX_JOB_ROOT/cleanup-safe"
+          if ! {
+            GIT_CONFIG_NOSYSTEM=1 GIT_CONFIG_GLOBAL=/dev/null git init -q "$CX_SOURCE_ROOT"
+            GIT_CONFIG_NOSYSTEM=1 GIT_CONFIG_GLOBAL=/dev/null \
+              git -C "$CX_SOURCE_ROOT" remote add origin \
+                "https://github.com/${COLLECTIVEX_REPOSITORY}.git"
+            GIT_CONFIG_NOSYSTEM=1 GIT_CONFIG_GLOBAL=/dev/null \
+              git -C "$CX_SOURCE_ROOT" -c credential.helper= -c protocol.version=2 \
+                fetch -q --no-tags --depth=1 origin "$COLLECTIVEX_SOURCE_SHA"
+            GIT_CONFIG_NOSYSTEM=1 GIT_CONFIG_GLOBAL=/dev/null \
+              git -C "$CX_SOURCE_ROOT" -c advice.detachedHead=false \
+                checkout -q --detach FETCH_HEAD
+            [ "$(git -C "$CX_SOURCE_ROOT" rev-parse HEAD)" = "$COLLECTIVEX_SOURCE_SHA" ]
+          } </dev/null >/dev/null 2>&1; then
+            echo "CollectiveX source preparation failed" >&2
+            exit 1
+          fi
+          [ "$(stat -c '%a' "$CX_JOB_ROOT")" = 700 ] \
+            || { echo "CollectiveX isolated root has unsafe permissions" >&2; exit 1; }
+          echo 'prepared=true' >> "$GITHUB_OUTPUT"
+          trap - EXIT
       - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
         with:
-          name: cxsweep-matrix-${{ github.run_id }}
-          path: experimental/CollectiveX
-      - name: Extract this shard's cases (stdlib only — no runner deps)
-        working-directory: experimental/CollectiveX
+          name: cxsweep-matrix-${{ github.run_id }}-${{ github.run_attempt }}
+          path: ${{ env.CX_JOB_ROOT }}/control
+      - name: Download pinned backend source archive
+        if: ${{ matrix.backend == 'deepep-v2' || matrix.backend == 'deepep-hybrid' }}
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: cxbackend-sources-${{ github.run_id }}-${{ github.run_attempt }}
+          path: ${{ env.CX_JOB_ROOT }}/control
+      - name: Install pinned backend source seed
+        if: ${{ matrix.backend == 'deepep-v2' || matrix.backend == 'deepep-hybrid' }}
+        env:
+          EXPECTED_BACKEND: ${{ matrix.backend }}
         run: |
           set -euo pipefail
-          python3 -c "
-          import json
-          m=json.load(open('matrix_full.json'))
-          s=[x for x in m['include'] if x['id']=='${{ matrix.id }}']
-          assert s, 'shard ${{ matrix.id }} not in matrix'
-          s=s[0]
-          json.dump({'id':s['id'],'sku':s['sku'],'backend':s['backend'],'nodes':s['nodes'],'deepep_v2':s['deepep_v2'],'cases':s['cases']}, open('results/.shard_${{ matrix.id }}.json','w'))
-          print('shard ${{ matrix.id }}:', len(s['cases']), 'cases')
-          "
+          archive="$CX_JOB_ROOT/control/collectivex-backend-sources.tar"
+          destination="$CX_SOURCE_ROOT/experimental/CollectiveX"
+          seed_root="$destination/.cx_sources"
+          [ -f "$archive" ] && [ ! -e "$seed_root" ] && [ ! -L "$seed_root" ]
+          python3 - "$archive" <<'PY'
+          from pathlib import PurePosixPath
+          import sys
+          import tarfile
+
+          with tarfile.open(sys.argv[1]) as archive:
+              for member in archive.getmembers():
+                  path = PurePosixPath(member.name)
+                  if (
+                      not path.parts
+                      or path.parts[0] != ".cx_sources"
+                      or ".." in path.parts
+                      or member.issym()
+                      or member.islnk()
+                      or member.isdev()
+                  ):
+                      raise SystemExit("invalid backend source archive")
+          PY
+          umask 077
+          tar --extract --no-same-owner --no-same-permissions \
+            --file "$archive" --directory "$destination"
+          source "$destination/runtime/common.sh"
+          source_path="$(cx_backend_source_path "$seed_root" "$EXPECTED_BACKEND")"
+          cx_backend_source_is_valid "$EXPECTED_BACKEND" "$source_path"
+          printf 'CX_BACKEND_SOURCE_SEED_ROOT=%s\n' "$seed_root" >> "$GITHUB_ENV"
+      - name: Extract and validate this shard's cases
+        run: |
+          set -euo pipefail
+          cd "$CX_SOURCE_ROOT/experimental/CollectiveX" 2>/dev/null \
+            || { echo "CollectiveX source is unavailable" >&2; exit 1; }
+          python3 sweep_matrix.py \
+            --extract-from "$CX_JOB_ROOT/control/matrix_full.json" \
+            --shard-id '${{ matrix.id }}' \
+            --expect-sku '${{ matrix.sku }}' \
+            --expect-backend '${{ matrix.backend }}' \
+            --expect-nodes '${{ matrix.nodes }}' \
+            --out '${{ env.CX_SHARD_FILE }}' >/dev/null
       - name: Sweep shard ${{ matrix.id }} (${{ matrix.n }} cases, one allocation)
+        id: sweep_shard
         env:
-          RUNNER_NAME: ${{ runner.name }}
-        run: bash "experimental/CollectiveX/launchers/launch_${RUNNER_NAME%%_*}.sh"
+          COLLECTIVEX_OPERATOR_CONFIG_CONTENT: ${{ secrets.COLLECTIVEX_OPERATOR_CONFIG_V1 }}
+          COLLECTIVEX_OPERATOR_CONFIG_REQUIRED: '1'
+        run: |
+          set -euo pipefail
+          umask 077
+          : > "$CX_JOB_ROOT/cleanup-unsafe"
+          rm -f -- "$CX_JOB_ROOT/cleanup-safe"
+          cd "$CX_SOURCE_ROOT" 2>/dev/null \
+            || { echo "CollectiveX source is unavailable" >&2; exit 1; }
+          bash "experimental/CollectiveX/launchers/launch_${{ matrix.launcher }}.sh"
+      - name: Confirm allocation cleanup
+        id: allocation_cleanup
+        if: ${{ always() && steps.source.outputs.prepared == 'true' }}
+        run: |
+          set -euo pipefail
+          [ -f "$CX_JOB_ROOT/cleanup-safe" ] && [ ! -e "$CX_JOB_ROOT/cleanup-unsafe" ] \
+            || { echo "CollectiveX allocation cleanup was not confirmed" >&2; exit 1; }
+      - name: Validate shard artifact safety
+        id: artifact_safety
+        if: ${{ always() && steps.allocation_cleanup.outcome == 'success' }}
+        run: |
+          cd "$CX_SOURCE_ROOT" 2>/dev/null \
+            || { echo "CollectiveX source is unavailable" >&2; exit 1; }
+          python3 experimental/CollectiveX/artifact_safety.py experimental/CollectiveX/results/*.json
+      - name: Validate shard delivery completeness
+        id: delivery_contracts
+        if: ${{ always() && steps.artifact_safety.outcome == 'success' }}
+        run: |
+          cd "$CX_SOURCE_ROOT" 2>/dev/null \
+            || { echo "CollectiveX source is unavailable" >&2; exit 1; }
+          python3 experimental/CollectiveX/contracts.py validate-delivery \
+            --source "experimental/CollectiveX/${CX_SHARD_FILE}" \
+            experimental/CollectiveX/results/*.json
       - name: Shard summary
-        if: always()
-        run: python3 experimental/CollectiveX/summarize.py --results-dir experimental/CollectiveX/results --markdown >> "$GITHUB_STEP_SUMMARY" || true
+        if: ${{ always() && steps.artifact_safety.outcome == 'success' && steps.delivery_contracts.outcome == 'success' }}
+        run: |
+          cd "$CX_SOURCE_ROOT" 2>/dev/null \
+            || { echo "CollectiveX source is unavailable" >&2; exit 1; }
+          python3 experimental/CollectiveX/summarize.py \
+            --results-dir experimental/CollectiveX/results --markdown >> "$GITHUB_STEP_SUMMARY" || true
+      - name: Stage shard artifact
+        id: stage_artifact
+        if: ${{ always() && steps.delivery_contracts.outcome == 'success' && steps.artifact_safety.outcome == 'success' }}
+        run: |
+          set -euo pipefail
+          cd "$CX_SOURCE_ROOT" 2>/dev/null \
+            || { echo "CollectiveX source is unavailable" >&2; exit 1; }
+          cp -- experimental/CollectiveX/results/*.json "$CX_JOB_ROOT/artifact/"
       - name: Upload shard results
-        if: always()
+        id: upload_artifact
+        if: always() && steps.stage_artifact.outcome == 'success' && steps.delivery_contracts.outcome == 'success' && steps.artifact_safety.outcome == 'success'
         uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
-          name: cxshard-${{ matrix.id }}-${{ github.run_id }}
-          path: experimental/CollectiveX/results/*.json   # glob skips the hidden .shard_*.json
-          if-no-files-found: warn
-
-  # ---- aggregate: collect every shard into ONE ndjson (the "result aggregator at the end") ----
-  aggregate:
-    needs: sweep
-    if: always()
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v5.0.0
-        with: { clean: true }
-      - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
-        with:
-          pattern: cxshard-*-${{ github.run_id }}
-          path: _shards
-          merge-multiple: true
-      - name: Aggregate shards -> one ndjson
-        working-directory: experimental/CollectiveX
+          name: cxshard-${{ matrix.id }}-${{ github.run_id }}-${{ github.run_attempt }}
+          path: |
+            ${{ env.CX_JOB_ROOT }}/artifact/*.json
+          if-no-files-found: error
+      - name: Cleanup isolated workspace
+        if: ${{ always() && steps.source.outputs.prepared == 'true' }}
         run: |
           set -euo pipefail
-          tag="${{ inputs.backend }}${{ inputs.deepep_v2 && '-v2' || '' }}"
-          python3 aggregate_results.py --in-dir ../../_shards --out "results/aggregate/collectivex_${tag}_${{ github.run_id }}.ndjson"
-          {
-            echo "## CollectiveX sweep aggregate (${tag})"
-            echo '```'
-            wc -l results/aggregate/*.ndjson 2>/dev/null || echo "no ndjson"
-            echo '```'
-          } >> "$GITHUB_STEP_SUMMARY"
-      - name: Upload aggregate
-        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
-        with:
-          name: cxsweep-aggregate-${{ inputs.backend }}${{ inputs.deepep_v2 && '-v2' || '' }}-${{ github.run_id }}
-          path: experimental/CollectiveX/results/aggregate/*.ndjson
-          if-no-files-found: warn
+          [[ "$CX_JOB_ROOT" =~ ^/tmp/inferencex-collectivex-[0-9]+-[0-9]+-[A-Za-z0-9._-]+$ ]] \
+            || { echo "CollectiveX cleanup root is invalid" >&2; exit 1; }
+          [ "$CX_SOURCE_ROOT" = "$CX_JOB_ROOT/source" ] \
+            || { echo "CollectiveX cleanup source is invalid" >&2; exit 1; }
+          [ -f "$CX_JOB_ROOT/cleanup-safe" ] && [ ! -e "$CX_JOB_ROOT/cleanup-unsafe" ] \
+            || { echo "CollectiveX allocation cleanup was not confirmed; retaining isolated files" >&2; exit 1; }
+          if [ '${{ steps.sweep_shard.outcome }}' = success ] \
+              && [ '${{ steps.allocation_cleanup.outcome }}' = success ] \
+              && [ '${{ steps.artifact_safety.outcome }}' = success ] \
+              && [ '${{ steps.delivery_contracts.outcome }}' = success ] \
+              && [ '${{ steps.stage_artifact.outcome }}' = success ] \
+              && [ '${{ steps.upload_artifact.outcome }}' = success ] \
+              && [ -f "$CX_SOURCE_ROOT/experimental/CollectiveX/runtime/common.sh" ]; then
+            # shellcheck source=/dev/null
+            if source "$CX_SOURCE_ROOT/experimental/CollectiveX/runtime/common.sh" \
+                >/dev/null 2>&1; then
+              cx_cleanup_private_logs 0
+            fi
+          fi
+          rm -rf -- "$CX_JOB_ROOT"
diff --git a/experimental/CollectiveX/.gitignore b/experimental/CollectiveX/.gitignore
new file mode 100644
index 0000000000..56b307215b
--- /dev/null
+++ b/experimental/CollectiveX/.gitignore
@@ -0,0 +1,15 @@
+__pycache__/
+*.pyc
+results/
+unsupported/
+.shards/
+.cx_workloads/
+.cx_backend/
+/matrix_full.json
+gpucore.*
+
+# Local plans and infrastructure inventory.
+goal.md
+notes.md
+configs/platforms.yaml
+private-infra.md
diff --git a/experimental/CollectiveX/README.md b/experimental/CollectiveX/README.md
new file mode 100644
index 0000000000..bd01428974
--- /dev/null
+++ b/experimental/CollectiveX/README.md
@@ -0,0 +1,115 @@
+# CollectiveX
+
+<div align="center">
+
+**English** | [中文](./README_zh.md)
+
+</div>
+
+CollectiveX is an experimental MoE expert-parallel communication benchmark. It measures dispatch,
+combine, and paired roundtrip latency across EP libraries and accelerator systems.
+
+> Publication hold: historical schema 3-5 data is diagnostic. No current dataset is approved for
+> rankings, recommendations, or regression baselines.
+
+## v1 Execution Profile
+
+Every scheduled case is BF16, normal mode, `layout-and-dispatch-v1`, backend-tuned resources, packed
+placement, and `fixed-512-v1` sampling: 64 trials x 8 timed iterations with 32 synchronized full
+roundtrip warmups before each measured component at every trial/point. Roundtrip is measured first,
+and every backend uses the same phase-specific conditioning ramp and ascending point order. Routing is limited
+to uniform and one Zipf sensitivity; EPLB is measured only
+as the Zipf remedy. Combine returns activation payload only on every backend; gate weights are verified
+at dispatch. A stdlib integer counter produces byte-identical routing and gate weights.
+
+The current matrix has 38 runnable allocation cells across H100, H200, B200, B300, GB200, GB300,
+MI325X, and MI355X. It requests 360 cases / 840 token points: 228 runnable cases / 532 points and
+132 explicit unsupported cases / 308 points. `sweep_matrix.py` materializes every token ladder and
+rejects missing, stale, malformed, or altered shard controls. Workflow shards are emitted round-robin
+by SKU so the bounded GHA matrix can use every available runner pool from its first scheduling cycle.
+
+| Backend | Current scope |
+|---|---|
+| DeepEP V1 | Image-pinned `deep_ep.Buffer`: upstream v1.2.1 on x86 and the image's GB fork on arm64 |
+| DeepEP V2 | PR #605 `ElasticBuffer` plus the upstream #630 scale-up fix; NCCL Device API LSA and source/SASS-bound reproducible JIT |
+| DeepEP Hybrid | Pinned `HybridEPBuffer`; realized auto-tuned config and JIT keys; NVLink/MNNVL domain |
+| UCCL | Pinned 0.1.1 wheel and wrapper on Hopper; Blackwell is explicitly unsupported |
+| NCCL/RCCL A2A | Portable rank-deduplicated payload plus expert/routing-metadata reference |
+| MoRI | MI325X AsyncLL transport and MI355X intranode transport |
+
+FlashInfer is outside v1 because its exercised EP path failed intermittently at runtime. It is not
+misreported as a platform capability limitation and can return after a stable pinned path is proven.
+
+DeepEP V2 means the `ElasticBuffer` implementation introduced by
+[DeepEP PR #605](https://github.com/deepseek-ai/DeepEP/pull/605), not a newer legacy `Buffer` build.
+The pinned source is the minimal upstream [PR #630](https://github.com/deepseek-ai/DeepEP/pull/630)
+follow-up: its parent is the #605 merge tree and its only source change fixes pure scale-up
+initialization when GIN is unavailable. Every v1 V2 case fits inside its declared NVLink/MNNVL
+scale-up domain, so the adapter requests NCCL Device API LSA and disables network GIN. It then
+requires NCCL's realized LSA team to cover the full EP world; a smaller realized domain fails rather
+than being mislabeled. A true scale-out case must use and identify GIN separately. The isolated
+build records the API, source, loaded libraries, generated JIT source, executable SASS, and raw
+CUBIN diagnostics. NVIDIA SKUs remain unvalidated until their GPU outcomes pass the native
+correctness and publication gates.
+
+Removed v1 axes include cached-layout `[cl]`, runtime-visible `[rv]`, LL, FP8, quantized combine,
+extra routing distributions, activation profiles, uneven allocation, placement permutations, model
+envelopes, and scaling studies.
+
+## Workflow And Artifacts
+
+`.github/workflows/collectivex-sweep.yml` generates a public-SKU matrix, extracts a strict ignored
+`.shards/<id>.json` control, executes one allocation per shard, privacy-checks result JSON, and uploads
+raw GitHub artifacts. Raw producers are diagnostic-only; they cannot self-promote evidence.
+
+Development publication uses one self-hosted persistent filesystem. GitHub artifacts are
+transient input; Vercel storage, GCP, Neon, managed databases, and managed object stores are out of
+scope. `publisher.py` ingests complete downloaded workflow artifacts, verifies or promotes explicit
+bundle IDs, and writes the atomic content-addressed layout consumed by the frontend. It never runs on
+GPU workers. The store contract and promotion gates are in [docs/methodology.md](docs/methodology.md).
+
+## Runner Configuration
+
+Runner-local Slurm and storage values use a strict per-SKU JSON document at
+`$XDG_CONFIG_HOME/inferencex/collectivex.json` or `COLLECTIVEX_OPERATOR_CONFIG`. The mode-0600,
+same-owner, non-symlink file is outside the checkout and never uploaded. Unknown runners, fields,
+duplicate keys, endpoint literals, unsafe paths, and non-JSON input fail closed; configuration is
+never evaluated as shell. GHA passes encrypted `COLLECTIVEX_OPERATOR_CONFIG_V1` content only to the
+launcher, which validates it, exports the selected SKU's allowlisted values, and deletes the
+temporary copy before allocation. Required JSON fields are:
+
+| SKU | Variables |
+|---|---|
+| `h100-dgxc`, `b200-dgxc` | `partition`, `account`, `squash_dir` |
+| `h200-dgxc` | `partition`, `squash_dir` |
+| `b300` | `partition`, `account`, `squash_dir`, `stage_dir` |
+| `gb200` | `partition`, `account`, ordered `storage_roots` |
+| `gb300` | `partition`, `account`, `squash_dir`, `stage_dir`, `enroot_cache_path` |
+| `mi325x`, `mi355x` | `partition`, `squash_dir` |
+
+Before import, each Docker Hub tag is resolved with bounded registry requests and must match its
+pinned digest; digest-qualified overrides are rejected. Enroot imports use a fixed filesystem epoch
+and a versioned, registry-digest-bound cache key. Every mounted squash is freshly hashed. The
+verified registry digest and local squash hash are both recorded. Image-provided DeepEP is checked
+against exact wheel and installed-file fingerprints; source-built backends use pinned commits and
+runtime-verified GPU targets. DeepEP V2's mode-0700 cluster-local build cache is keyed by a versioned
+build recipe, verified image, architecture, upstream trees, and dependency pins; only its fixed
+`/cx-cache` mount reaches the container, and it never enters result artifacts.
+Compute containers receive an explicit environment allowlist. Private host, address, device, NIC,
+credential, workspace, and path data stays in encrypted config, ignored operator notes, or bounded
+mode-0600 runner logs; it is never uploaded.
+
+## Local Checks
+
+```bash
+uv run --with-requirements experimental/CollectiveX/requirements.txt \
+  python -m unittest discover experimental/CollectiveX/tests -p 'test_*.py'
+uv run --with-requirements experimental/CollectiveX/requirements.txt \
+  python experimental/CollectiveX/sweep_matrix.py --backends all --out /tmp/cx-matrix.json >/dev/null
+uv run --with-requirements experimental/CollectiveX/requirements.txt \
+  python experimental/CollectiveX/publisher.py --store-root "$COLLECTIVEX_STORE_ROOT" verify
+bash -n experimental/CollectiveX/runtime/*.sh experimental/CollectiveX/launchers/*.sh
+```
+
+Core paths are `capability.py`, `configs/`, `contracts.py`, `schemas/`, `sweep_matrix.py`,
+`publisher.py`, `runtime/`, `launchers/`, and `tests/`.
diff --git a/experimental/CollectiveX/README_zh.md b/experimental/CollectiveX/README_zh.md
new file mode 100644
index 0000000000..bed2172d40
--- /dev/null
+++ b/experimental/CollectiveX/README_zh.md
@@ -0,0 +1,111 @@
+# CollectiveX
+
+<div align="center">
+
+[English](./README.md) | **中文**
+
+</div>
+
+CollectiveX 是实验性的 MoE 专家并行通信基准，用于测量不同 EP 库和加速器系统的
+dispatch、combine 及配对 roundtrip 延迟。
+
+> 发布暂停：历史 schema 3-5 数据仅供诊断。目前没有数据集获准用于排名、推荐或回归基线。
+
+## v1 执行配置
+
+每个调度用例均采用 BF16、normal mode、`layout-and-dispatch-v1`、后端调优资源、packed
+placement 以及 `fixed-512-v1` 采样：64 trials x 8 timed iterations；每个 trial/point 的每个
+被测组件前执行 32 次同步完整 roundtrip warmup。先测 roundtrip；所有后端使用相同的分阶段
+conditioning ramp 和升序点位。Routing 仅保留 uniform 和一个 Zipf 敏感性场景，EPLB 只作为
+Zipf 的修正方案测量。所有后端的 combine 仅返回 activation payload，gate weights 在 dispatch
+阶段接受校验。stdlib 整数计数器生成逐字节一致的 routing 和 gate weights。
+
+当前矩阵覆盖 H100、H200、B200、B300、GB200、GB300、MI325X 和 MI355X，共 38 个可运行
+allocation cells。矩阵请求 360 个 cases / 840 个 token points：228 个可运行 cases / 532 个
+points，以及 132 个显式 unsupported cases / 308 个 points。`sweep_matrix.py` 物化每个 token
+ladder，并拒绝缺失、过期、格式错误或被修改的 shard controls。Workflow shards 按 SKU
+round-robin 发出，使受限的 GHA matrix 从第一个调度周期起即可使用所有可用 runner pools。
+
+| 后端 | 当前范围 |
+|---|---|
+| DeepEP V1 | 镜像固定的 `deep_ep.Buffer`：x86 使用 upstream v1.2.1，arm64 使用镜像内 GB fork |
+| DeepEP V2 | PR #605 `ElasticBuffer` 加 upstream #630 scale-up 修复；NCCL Device API LSA 与 source/SASS 绑定的可复现 JIT |
+| DeepEP Hybrid | 固定的 `HybridEPBuffer`；记录实际自动调优配置与 JIT keys；NVLink/MNNVL domain |
+| UCCL | Hopper 上固定的 0.1.1 wheel 和 wrapper；Blackwell 显式标为 unsupported |
+| NCCL/RCCL A2A | 可移植的 rank-deduplicated payload 加 expert/routing-metadata reference |
+| MoRI | MI325X AsyncLL transport 和 MI355X intranode transport |
+
+FlashInfer 不在 v1 范围内，因为已测试的 EP path 在运行时存在间歇性失败。该问题不会被误报为
+平台能力限制；在证明有稳定的固定实现后可重新加入。
+
+DeepEP V2 指 [DeepEP PR #605](https://github.com/deepseek-ai/DeepEP/pull/605) 引入的
+`ElasticBuffer` 实现，而不是更新的 legacy `Buffer` build。固定 source 使用最小化的 upstream
+[PR #630](https://github.com/deepseek-ai/DeepEP/pull/630) 后续修复：其 parent 是 #605 merge
+tree，唯一 source 变更是修复 GIN 不可用时的纯 scale-up 初始化。v1 的所有 V2 cases 都位于各自
+声明的 NVLink/MNNVL scale-up domain 内，因此 adapter 请求 NCCL Device API LSA 并禁用网络
+GIN。随后必须确认 NCCL 实际建立的 LSA team 覆盖整个 EP world；若实际 domain 更小，case
+会直接失败而不会被错误标注。真正的 scale-out case 必须单独启用并标识 GIN。隔离构建会记录
+API、source、loaded libraries、generated JIT source、executable SASS 与 raw CUBIN
+diagnostics。在 GPU outcome 通过 native correctness 和 publication gates 前，各 NVIDIA SKU
+仍为 unvalidated。
+
+v1 已移除的轴包括 cached-layout `[cl]`、runtime-visible `[rv]`、LL、FP8、quantized combine、
+额外 routing distributions、activation profiles、uneven allocation、placement permutations、
+model envelopes 和 scaling studies。
+
+## Workflow 与产物
+
+`.github/workflows/collectivex-sweep.yml` 生成 public-SKU matrix，提取严格且被忽略的
+`.shards/<id>.json` control，每个 shard 执行一次 allocation，对结果 JSON 做隐私检查并上传
+raw GitHub artifacts。Raw producers 仅供诊断，不能自行提升 evidence。
+
+开发阶段发布使用一个 self-hosted persistent filesystem。GitHub artifacts 仅作为临时输入；
+Vercel storage、GCP、Neon、managed databases 和 managed object stores 均不在范围内。
+`publisher.py` 摄取完整下载的 workflow artifacts，验证或提升显式 bundle IDs，并写入供前端
+使用的原子 content-addressed layout。它不会在 GPU workers 上运行。Store contract 和 promotion
+gates 见 [docs/methodology_zh.md](docs/methodology_zh.md)。
+
+## Runner 配置
+
+Runner 本地 Slurm 和 storage 值使用严格的 per-SKU JSON 文档，路径为
+`$XDG_CONFIG_HOME/inferencex/collectivex.json` 或 `COLLECTIVEX_OPERATOR_CONFIG`。该 mode-0600、
+同 owner、非 symlink 文件位于 checkout 外且永不上传。未知 runners、fields、duplicate keys、
+endpoint literals、unsafe paths 和非 JSON 输入均 fail closed；配置绝不作为 shell 执行。GHA
+仅将加密的 `COLLECTIVEX_OPERATOR_CONFIG_V1` 内容传给 launcher；launcher 验证后只导出所选
+SKU 的 allowlisted values，并在 allocation 前删除临时副本。必需 JSON fields 如下：
+
+| SKU | 变量 |
+|---|---|
+| `h100-dgxc`, `b200-dgxc` | `partition`, `account`, `squash_dir` |
+| `h200-dgxc` | `partition`, `squash_dir` |
+| `b300` | `partition`, `account`, `squash_dir`, `stage_dir` |
+| `gb200` | `partition`, `account`, 有序 `storage_roots` |
+| `gb300` | `partition`, `account`, `squash_dir`, `stage_dir`, `enroot_cache_path` |
+| `mi325x`, `mi355x` | `partition`, `squash_dir` |
+
+导入前，每个 Docker Hub tag 都通过有界 registry requests 解析，并且必须匹配固定 digest；拒绝
+digest-qualified overrides。Enroot imports 使用固定 filesystem epoch 和带版本、绑定 registry
+digest 的 cache key。每个已挂载 squash 都重新计算 hash，同时记录 verified registry digest 和
+local squash hash。镜像提供的 DeepEP 会按精确 wheel 和 installed-file fingerprints 检查；
+source-built backends 使用固定 commits 和 runtime-verified GPU targets。DeepEP V2 的 mode-0700
+cluster-local build cache 由版本化 build recipe、verified image、architecture、upstream
+trees 和 dependency pins 共同寻址；container 只看到固定的 `/cx-cache` mount，且该 cache 永不
+进入 result artifacts。
+Compute containers 仅接收显式 environment allowlist。Private host、address、device、NIC、
+credential、workspace 和 path 数据只保留在加密配置、忽略的 operator notes 或有界 mode-0600
+runner logs 中，永不上传。
+
+## 本地检查
+
+```bash
+uv run --with-requirements experimental/CollectiveX/requirements.txt \
+  python -m unittest discover experimental/CollectiveX/tests -p 'test_*.py'
+uv run --with-requirements experimental/CollectiveX/requirements.txt \
+  python experimental/CollectiveX/sweep_matrix.py --backends all --out /tmp/cx-matrix.json >/dev/null
+uv run --with-requirements experimental/CollectiveX/requirements.txt \
+  python experimental/CollectiveX/publisher.py --store-root "$COLLECTIVEX_STORE_ROOT" verify
+bash -n experimental/CollectiveX/runtime/*.sh experimental/CollectiveX/launchers/*.sh
+```
+
+核心路径为 `capability.py`、`configs/`、`contracts.py`、`schemas/`、`sweep_matrix.py`、
+`publisher.py`、`runtime/`、`launchers/` 和 `tests/`。
diff --git a/experimental/CollectiveX/artifact_safety.py b/experimental/CollectiveX/artifact_safety.py
new file mode 100644
index 0000000000..83d522fba8
--- /dev/null
+++ b/experimental/CollectiveX/artifact_safety.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+"""Fail-closed privacy check for CollectiveX public result documents."""
+from __future__ import annotations
+
+import argparse
+import ipaddress
+import json
+import os
+import re
+import stat
+
+
+SENSITIVE_FIELDS = frozenset({
+    "environment", "env", "host", "hostname", "uuid", "gpu_uuid", "device_uuid",
+    "pci_bus_id", "ip_address", "ip_addresses", "master_addr", "ssh", "ssh_target",
+    "nodelist", "node_list", "nic_guid", "ib_guid", "topology_matrix", "rdma_devices",
+    "user", "username", "password", "passwd", "secret", "token", "access_token",
+    "api_token", "auth_token", "api_key", "private_key", "credential", "credentials",
+    "address", "addresses", "ip", "ips",
+})
+SENSITIVE_FIELDS_COMPACT = frozenset(item.replace("_", "") for item in SENSITIVE_FIELDS)
+SENSITIVE_FIELD_SUFFIXES = (
+    "_host", "_hostname", "_address", "_addresses", "_path", "_paths", "_ip", "_ips",
+    "_password", "_passwd", "_secret", "_token", "_credential", "_credentials",
+    "_uuid", "_guid", "_bus_id",
+)
+SENSITIVE_VALUE_PATTERNS = (
+    ("private-path", re.compile(
+        r"(?<![A-Za-z0-9_.-])/(?:home|mnt|workspace|root|users|tmp|data|it-share|lustre|raid|nvme_home|scratch|gpfs|fsx)(?:/|$)",
+        re.I,
+    )),
+    ("ipv4-address", re.compile(r"(?<!\d)(?:\d{1,3}\.){3}\d{1,3}(?!\d)")),
+    ("pci-address", re.compile(r"\b[0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\.[0-7]\b", re.I)),
+    ("hardware-address", re.compile(
+        r"\b(?:[0-9a-f]{2}[:-]){5}(?:[0-9a-f]{2})\b|"
+        r"\b(?:[0-9a-f]{2}:){7}(?:[0-9a-f]{2})\b|\b0x[0-9a-f]{16}\b",
+        re.I,
+    )),
+    ("uuid", re.compile(
+        r"\b(?:GPU-|MIG-)?[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b",
+        re.I,
+    )),
+    ("ssh-target", re.compile(r"(?:ssh://|\bssh\s+[^\s/@]+@[^\s/]+)", re.I)),
+    ("host-identifier", re.compile(
+        r"\b(?:host(?:name)?|master[_-]?(?:addr|address)|node[_-]?list)\s*(?:=|:)\s*[^\s,;]+",
+        re.I,
+    )),
+    ("private-hostname", re.compile(
+        r"\b(?:[a-z0-9-]+\.)+(?:cluster|corp|internal|lan|local)\b|"
+        r"\b(?:compute|gpu|head|login|node|worker)[-_]?[0-9][a-z0-9_.-]*\b|"
+        r"\bdgx-[a-z0-9-]+-[0-9]+\b|\bip-(?:[0-9]{1,3}-){3}[0-9]{1,3}\b",
+        re.I,
+    )),
+    ("secret-token", re.compile(
+        r"(?:gh[pousr]_[A-Za-z0-9]{20,}|github_pat_[A-Za-z0-9_]{20,}|"
+        r"glpat-[A-Za-z0-9_-]{20,}|xox[baprs]-[A-Za-z0-9-]{20,}|"
+        r"(?:AKIA|ASIA)[0-9A-Z]{16}|AIza[0-9A-Za-z_-]{35}|"
+        r"(?:sk-(?:proj|svcacct)-[A-Za-z0-9_-]{20,}|sk-[A-Za-z0-9]{32,}|"
+        r"sk_(?:live|test)_[A-Za-z0-9]{20,}|hf_[A-Za-z0-9]{20,})|"
+        r"npm_[A-Za-z0-9]{20,}|"
+        r"pypi-[A-Za-z0-9_-]{20,}|dckr_pat_[A-Za-z0-9_-]{20,}|"
+        r"Bearer\s+[A-Za-z0-9._~+/-]{16,}|Basic\s+[A-Za-z0-9+/=]{16,}|"
+        r"eyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}|"
+        r"-----BEGIN(?: [A-Z]+)? PRIVATE KEY-----)",
+        re.I,
+    )),
+    ("secret-assignment", re.compile(
+        r"\b(?:api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|"
+        r"password|passwd|secret|accountkey)\s*(?:=|:)\s*[\"']?"
+        r"[A-Za-z0-9+/_=.~-]{8,}",
+        re.I,
+    )),
+)
+IPV6_CANDIDATE = re.compile(
+    r"(?<![0-9A-Za-z])\[?([0-9A-Fa-f:]{2,}(?:%[0-9A-Za-z_.-]+)?)\]?"
+)
+CONTEXTUAL_VALUE_RULES = frozenset({"ssh-target", "host-identifier", "private-hostname"})
+MAX_INPUT_BYTES = 64 * 1024 * 1024
+
+
+class ArtifactSafetyError(ValueError):
+    """A document contains data that cannot cross the public boundary."""
+
+
+def _normalized_field(value: object) -> str:
+    normalized = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", str(value).strip())
+    normalized = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", normalized)
+    return normalized.lower().replace("-", "_")
+
+
+def _sensitive_value_rule(value: str, *, contextual: bool = True) -> str | None:
+    matched = next(
+        (
+            name for name, pattern in SENSITIVE_VALUE_PATTERNS
+            if (contextual or name not in CONTEXTUAL_VALUE_RULES) and pattern.search(value)
+        ),
+        None,
+    )
+    if matched:
+        return matched
+    for candidate in IPV6_CANDIDATE.findall(value):
+        try:
+            address = candidate.split("%", 1)[0]
+            if ipaddress.ip_address(address).version == 6:
+                return "ipv6-address"
+        except ValueError:
+            continue
+    return None
+
+
+def assert_publication_safe(docs: list[dict]) -> None:
+    """Reject private infrastructure fields and value shapes."""
+    def walk(value, doc_index: int, parent_field: str | None = None) -> None:
+        if isinstance(value, dict):
+            for key, child in value.items():
+                field = _normalized_field(key)
+                compact = field.replace("_", "")
+                if (
+                    field in SENSITIVE_FIELDS
+                    or compact in SENSITIVE_FIELDS_COMPACT
+                    or field.endswith(SENSITIVE_FIELD_SUFFIXES)
+                ):
+                    raise ArtifactSafetyError(
+                        f"artifact safety: doc[{doc_index}] contains forbidden private field"
+                    )
+                key_rule = _sensitive_value_rule(str(key))
+                if key_rule:
+                    raise ArtifactSafetyError(
+                        f"artifact safety: doc[{doc_index}] contains forbidden {key_rule} key"
+                    )
+                walk(child, doc_index, field)
+        elif isinstance(value, list):
+            for child in value:
+                walk(child, doc_index, parent_field)
+        elif isinstance(value, str):
+            rule = _sensitive_value_rule(value, contextual=parent_field != "ref")
+            if rule:
+                raise ArtifactSafetyError(
+                    f"artifact safety: doc[{doc_index}] contains forbidden {rule} value"
+                )
+
+    for index, doc in enumerate(docs):
+        if not isinstance(doc, dict):
+            raise ArtifactSafetyError(f"artifact safety: doc[{index}] is not a JSON object")
+        walk(doc, index)
+
+
+def load_documents(paths: list[str]) -> list[dict]:
+    docs: list[dict] = []
+    for path in paths:
+        try:
+            metadata = os.lstat(path)
+        except OSError as exc:
+            raise ArtifactSafetyError("artifact safety: result file is unavailable") from exc
+        if (
+            not stat.S_ISREG(metadata.st_mode)
+            or metadata.st_uid != os.getuid()
+            or metadata.st_size <= 0
+            or metadata.st_size > MAX_INPUT_BYTES
+        ):
+            raise ArtifactSafetyError("artifact safety: result file is unavailable")
+        descriptor = -1
+        try:
+            descriptor = os.open(path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+            opened = os.fstat(descriptor)
+            if (
+                not stat.S_ISREG(opened.st_mode)
+                or (opened.st_dev, opened.st_ino, opened.st_size)
+                != (metadata.st_dev, metadata.st_ino, metadata.st_size)
+            ):
+                raise ArtifactSafetyError("artifact safety: result file changed during open")
+            with os.fdopen(descriptor, encoding="utf-8") as fh:
+                descriptor = -1
+                if path.endswith(".ndjson"):
+                    for line_number, line in enumerate(fh, 1):
+                        if not line.strip():
+                            continue
+                        try:
+                            docs.append(json.loads(line))
+                        except json.JSONDecodeError as exc:
+                            raise ArtifactSafetyError(
+                                f"artifact safety: malformed NDJSON at input line {line_number}"
+                            ) from exc
+                else:
+                    docs.append(json.load(fh))
+        except json.JSONDecodeError as exc:
+            raise ArtifactSafetyError("artifact safety: malformed JSON input") from exc
+        except (OSError, UnicodeError) as exc:
+            raise ArtifactSafetyError("artifact safety: result file is unreadable") from exc
+        finally:
+            if descriptor >= 0:
+                os.close(descriptor)
+    if not docs:
+        raise ArtifactSafetyError("artifact safety: no public result documents found")
+    return docs
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Check CollectiveX result artifacts for private data")
+    parser.add_argument("paths", nargs="+")
+    args = parser.parse_args()
+    try:
+        docs = load_documents(args.paths)
+        assert_publication_safe(docs)
+    except ArtifactSafetyError as exc:
+        parser.error(str(exc))
+    print(f"artifact safety: {len(docs)} public document(s) passed")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/experimental/CollectiveX/capability.py b/experimental/CollectiveX/capability.py
new file mode 100644
index 0000000000..6a069b09b9
--- /dev/null
+++ b/experimental/CollectiveX/capability.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+"""Public runner and backend capability registry for CollectiveX v1."""
+
+from __future__ import annotations
+
+import re
+
+
+DEEPEP_V2_COMMIT = "fa8a9b16898204afd347c663b89e65ef87dc6ce6"
+DEEPEP_V2_SKU_CAPABILITIES = {
+    "h100-dgxc": {"schedulable": True, "basis": "upstream-sm90-requirement"},
+    "h200-dgxc": {"schedulable": True, "basis": "upstream-sm90-requirement"},
+    "b200-dgxc": {"schedulable": True, "basis": "upstream-sm100-result"},
+    "gb200": {"schedulable": True, "basis": "upstream-sm100-result"},
+    "b300": {"schedulable": True, "basis": "pinned-pr605-pr630-sm103-maps-sm100f"},
+    "gb300": {"schedulable": True, "basis": "pinned-pr605-pr630-sm103-maps-sm100f"},
+    "mi325x": {"schedulable": False, "basis": "nvidia-only"},
+    "mi355x": {"schedulable": False, "basis": "nvidia-only"},
+}
+PLATFORMS = {
+    "h100-dgxc": dict(vendor="nvidia", arch="sm90", machine="amd64", product="h100", transport="nvlink", topology_class="h100-nvlink-island",
+                       gpus_per_node=8, scale_up_domain=8, ep_degrees=(8,), launcher="single-slurm"),
+    "h200-dgxc": dict(vendor="nvidia", arch="sm90", machine="amd64", product="h200", transport="nvlink", topology_class="h200-nvlink-island",
+                       gpus_per_node=8, scale_up_domain=8, ep_degrees=(8,), launcher="single-slurm"),
+    "b200-dgxc": dict(vendor="nvidia", arch="sm100", machine="amd64", product="b200", transport="nvlink", topology_class="b200-nvlink-island",
+                       gpus_per_node=8, scale_up_domain=8, ep_degrees=(8,), launcher="single-slurm"),
+    "b300": dict(vendor="nvidia", arch="sm103", machine="amd64", product="b300", transport="nvlink", topology_class="b300-nvlink-island",
+                 gpus_per_node=8, scale_up_domain=8, ep_degrees=(8,), launcher="single-slurm"),
+    "gb200": dict(vendor="nvidia", arch="sm100", machine="arm64", product="gb200", transport="mnnvl", topology_class="gb200-nvl72-mnnvl",
+                  gpus_per_node=4, scale_up_domain=72, ep_degrees=(4, 8), launcher="gb-nv"),
+    "gb300": dict(vendor="nvidia", arch="sm103", machine="arm64", product="gb300", transport="mnnvl", topology_class="gb300-nvl72-mnnvl",
+                  gpus_per_node=4, scale_up_domain=72, ep_degrees=(4, 8), launcher="gb-nv"),
+    "mi325x": dict(vendor="amd", arch="gfx942", machine="amd64", product="mi325x", transport="xgmi", topology_class="mi325x-xgmi",
+                   gpus_per_node=8, scale_up_domain=8, ep_degrees=(8,), launcher="mi-amds"),
+    "mi355x": dict(vendor="amd", arch="gfx950", machine="amd64", product="mi355x", transport="xgmi", topology_class="mi355x-xgmi",
+                   gpus_per_node=8, scale_up_domain=8, ep_degrees=(8,), launcher="mi-amds"),
+}
+
+BACKENDS = {
+    "deepep": {"vendors": {"nvidia"}},
+    "deepep-v2": {
+        "vendors": {"nvidia"},
+        "implementation": "deep_ep.ElasticBuffer",
+        "source": "deepseek-ai/DeepEP#605+#630",
+        "commit": DEEPEP_V2_COMMIT,
+        "communication_backend": "nccl-device-lsa",
+        "torch": "2.10.0+cu130",
+        "nccl": "2.30.4",
+        "sku_capabilities": DEEPEP_V2_SKU_CAPABILITIES,
+    },
+    "uccl": {
+        "vendors": {"nvidia"},
+        "machines": {"amd64"},
+        "excluded_skus": {"b200-dgxc", "b300"},
+    },
+    "deepep-hybrid": {"vendors": {"nvidia"}},
+    "mori": {"vendors": {"amd"}},
+    "nccl-ep": {"vendors": {"nvidia", "amd"}},
+}
+SWEEP_BACKENDS = tuple(BACKENDS)
+
+
+def runtime_identity_issues(
+    sku: str, *, vendor: str, arch: str, machine: str, device_name: str,
+    device_count: int, world_size: int,
+) -> list[str]:
+    """Validate public product identity on every rank without private device identifiers."""
+    platform = PLATFORMS.get(sku)
+    if platform is None:
+        return [f"unknown runner identity {sku!r}"]
+    issues = []
+    for field, observed in (("vendor", vendor), ("arch", arch), ("machine", machine)):
+        if observed != platform[field]:
+            issues.append(f"{field}={observed!r}, expected {platform[field]!r}")
+    products = set(re.findall(r"[a-z]+\d+[a-z]*", device_name.lower()))
+    if platform["product"] not in products:
+        issues.append(f"device product {device_name!r} does not identify {platform['product']}")
+    if device_count != platform["gpus_per_node"]:
+        issues.append(
+            f"visible GPUs={device_count}, expected {platform['gpus_per_node']} per node"
+        )
+    if world_size not in platform["ep_degrees"]:
+        issues.append(f"EP{world_size} is not registered for {sku}")
+    return issues
+
+
+def resolve(sku: str, backend: str, *, nodes: int = 1, routing: str = "uniform",
+            eplb: bool = False) -> tuple[bool, str]:
+    """Return whether one fixed-v1 case can run on a public GHA runner label."""
+    platform, implementation = PLATFORMS.get(sku), BACKENDS.get(backend)
+    if platform is None:
+        return False, f"unknown GHA runner label {sku!r}"
+    if implementation is None:
+        return False, f"unknown backend {backend!r}"
+    if nodes < 1 or nodes * platform["gpus_per_node"] not in platform["ep_degrees"]:
+        return False, f"{sku} does not register a {nodes}-node EP degree"
+    if routing not in {"uniform", "zipf"} or (eplb and routing != "zipf"):
+        return False, "v1 routing is uniform or zipf, with EPLB only on zipf"
+    if platform["vendor"] not in implementation["vendors"]:
+        return False, f"{backend} does not support {platform['vendor']}"
+    sku_capability = implementation.get("sku_capabilities", {}).get(sku)
+    if sku_capability is not None and not sku_capability["schedulable"]:
+        return False, f"{backend} is unsupported on {sku}: {sku_capability['basis']}"
+    if platform["machine"] not in implementation.get("machines", {platform["machine"]}):
+        return False, f"{backend} does not support {platform['machine']}"
+    if sku in implementation.get("excluded_skus", set()):
+        return False, f"{backend} is unavailable on {sku}"
+    return True, "ok"
diff --git a/experimental/CollectiveX/configs/suites.yaml b/experimental/CollectiveX/configs/suites.yaml
new file mode 100644
index 0000000000..0d72ceaae4
--- /dev/null
+++ b/experimental/CollectiveX/configs/suites.yaml
@@ -0,0 +1,21 @@
+# CollectiveX v1 comparison suites.
+schema_version: 1
+
+suites:
+  ep-core-v1:
+    workloads: [deepseek-v3-v1]
+    platforms: [h100-dgxc, h200-dgxc, b300, b200-dgxc, gb300, gb200, mi355x, mi325x]
+    routings: [uniform]
+    phases: [decode, prefill]
+    token_points_prefill: [256, 512]
+    required_publication: official
+
+  ep-routing-v1:
+    workloads: [deepseek-v3-v1]
+    platforms: [h100-dgxc, h200-dgxc, b300, b200-dgxc, gb300, gb200, mi355x, mi325x]
+    routings: [zipf]
+    eplb: [false, true]
+    phases: [decode, prefill]
+    token_points_decode: [128]
+    token_points_prefill: [512]
+    required_publication: comparable-experimental
diff --git a/experimental/CollectiveX/configs/workloads.yaml b/experimental/CollectiveX/configs/workloads.yaml
new file mode 100644
index 0000000000..b5b68334c4
--- /dev/null
+++ b/experimental/CollectiveX/configs/workloads.yaml
@@ -0,0 +1,9 @@
+# CollectiveX v1 canonical workload and phase metadata.
+schema_version: 1
+
+model_derived:
+  deepseek-v3-v1:
+    hidden: 7168
+    topk: 8
+    routed_experts: 256
+    verified_against: "deepseek-ai/DeepSeek-V3@e815299b0bcbac849fa540c768ef21845365c9eb/config.json"
diff --git a/experimental/CollectiveX/contracts.py b/experimental/CollectiveX/contracts.py
new file mode 100644
index 0000000000..6089b8d119
--- /dev/null
+++ b/experimental/CollectiveX/contracts.py
@@ -0,0 +1,2641 @@
+#!/usr/bin/env python3
+"""Strict native attempt contracts and metric validation for CollectiveX v1."""
+from __future__ import annotations
+
+import argparse
+import datetime as dt
+from functools import lru_cache
+import hashlib
+import json
+import math
+import os
+from pathlib import Path, PurePosixPath
+import re
+import sys
+from typing import Any, Iterable
+
+import artifact_safety
+import capability
+import identity
+
+TESTS = Path(__file__).resolve().parent / "tests"
+sys.path.insert(0, str(TESTS))
+import eplb as eplb_contract  # noqa: E402
+import workload as workload_contract  # noqa: E402
+
+RAW_FORMAT = "collectivex.ep.v1"
+SAMPLES_FORMAT = "collectivex.samples.v1"
+TERMINAL_FORMAT = "collectivex.terminal.v1"
+TERMINAL_CASE_FIELDS = {
+    "backend", "canonical", "eplb", "ep", "experts", "gpus_per_node", "hidden",
+    "ladder", "nodes", "phase", "required_publication", "routing", "samples_per_point",
+    "scale_up_domain", "suite", "timing", "topk", "warmup_semantics", "workload",
+}
+ALLOCATION_FACTOR_FIELDS = {
+    "artifact", "execution_id", "job", "repo", "run_attempt", "run_id", "runner",
+    "source_sha",
+}
+GIT_RUN_FIELDS = {"artifact", "job", "ref", "repo", "run_attempt", "run_id", "source_sha"}
+PRE_EXECUTION_FAILURE_REASONS = {
+    "setup": "launcher-setup-failed",
+    "repository-stage": "repository-staging-failed",
+    "registry-verification": "container-registry-verification-failed",
+    "scheduler-allocation": "scheduler-allocation-failed",
+    "container-import": "container-image-preparation-failed",
+    "container-hash": "container-image-identity-failed",
+    "container-launch": "container-runtime-launch-failed",
+    "backend-setup": "backend-setup-failed",
+    "artifact-collection": "artifact-collection-failed",
+}
+RUNTIME_FAILURE_REASONS = {
+    **PRE_EXECUTION_FAILURE_REASONS,
+    "runtime-identity": "runtime-identity-mismatch",
+    "timeout": "execution-timeout",
+    "deadlock": "execution-deadlock",
+    "execution": "distributed-command-failed",
+}
+POST_EMIT_FAILURE_REASONS = {
+    mode: "post-emit-distributed-command-failed"
+    for mode in ("runtime-identity", "timeout", "deadlock", "execution")
+}
+CAPABILITY_FAILURE_REASONS = frozenset({
+    "backend-platform-unsupported",
+    "backend-token-capacity",
+})
+RETURN_CODE_FAILURE_MODES = {
+    5: "runtime-identity",
+    124: "timeout",
+}
+PERCENTILES = ("p50", "p90", "p95", "p99")
+V1_CONDITIONING_LADDERS = {
+    "decode": (1, 2, 4, 8, 16, 32, 64, 128),
+    "prefill": (1, 2, 4, 8, 16, 32, 64, 128, 256, 512),
+}
+V1_CONDITIONING_ROUNDS_PER_SHAPE = 8
+DEEPEP_V2_JIT_KERNELS = frozenset({
+    "barrier", "combine", "combine_reduce_epilogue", "dispatch",
+    "dispatch_copy_epilogue",
+})
+DEEPEP_V2_V1_PROVENANCE = {
+    "deepep_version": "2.0.0",
+    "deepep_distribution_version": "2.0.0+fa8a9b1",
+    "deepep_commit": "fa8a9b16898204afd347c663b89e65ef87dc6ce6",
+    "deepep_tree": "29809e75c5874e6609dac4804e7b651d5226959f",
+    "deepep_pr": 605,
+    "deepep_fix_pr": 630,
+    "fmt_commit": "a4c7e17133ee9cb6a2f45545f6e974dd3c393efa",
+    "torch_version": "2.10.0+cu130",
+    "nccl_package_version": "2.30.4",
+    "nccl_version": "2.30.4",
+    "nvshmem_package_version": "3.3.9",
+    "allow_hybrid_mode": False,
+    "gin_enabled": False,
+    "communication_backend": "nccl-device-lsa",
+}
+UCCL_DEPENDENCY_VERSIONS = {
+    "intervaltree": "3.1.0",
+    "nvidia-cuda-runtime-cu12": "12.9.79",
+    "sortedcontainers": "2.4.0",
+}
+SCHEMA_DIR = Path(__file__).resolve().parent / "schemas"
+_SCHEMA_CACHE: dict[str, dict[str, Any]] = {}
+REQUIRED_BACKEND_PROVENANCE = {
+    "deepep": (
+        "deepep_version", "deepep_commit", "backend_lineage", "allow_mnnvl",
+        "mnnvl_comm",
+    ),
+    "deepep-v2": (
+        *DEEPEP_V2_V1_PROVENANCE, "api_signature_sha256", "loaded_libraries",
+        "jit_cubins", "jit_random_seed", "deterministic", "num_experts",
+        "tuning_num_experts",
+    ),
+    "deepep-hybrid": (
+        "deepep_commit", "deepep_tree", "branch", "backend_lineage",
+        "loaded_libraries", "realized_config", "jit_kernel_keys", "jit_shared_objects",
+    ),
+    "uccl": (
+        "uccl_version", "uccl_commit", "uccl_wrapper_commit", "backend_lineage",
+        "loaded_libraries", "uccl_dependency_versions",
+    ),
+    "mori": ("mori_commit",),
+    "nccl-ep": ("nccl_version", "collective_library", "backend_lineage"),
+}
+PROVENANCE_KEYS = {
+    "allocated_qps", "allow_hybrid_mode", "allow_mnnvl", "allow_multiple_reduction",
+    "api", "api_signature_sha256", "backend", "backend_lineage", "block_num",
+    "block_num_floored", "block_num_target", "branch", "collective_library",
+    "combine_dtype", "combine_warps", "communication_backend", "cuda_version",
+    "deepep_commit", "deepep_distribution_version", "deepep_fix_pr", "deepep_pr", "deepep_tree",
+    "deepep_version", "deterministic", "device_cus",
+    "device_sms", "dispatch_dtype", "dispatch_warps", "enable_sdma", "fmt_commit",
+    "gin_enabled",
+    "gpus_per_node", "heap_size",
+    "impl", "jit_cache_key", "jit_cubins", "jit_kernel_keys", "jit_random_seed",
+    "jit_shared_objects", "kernel_type",
+    "loaded_libraries", "local_experts",
+    "logical_scaleout_ranks",
+    "logical_scaleup_ranks", "mapping_variant", "max_num_inp_token_per_rank",
+    "max_num_tokens", "max_total_recv_tokens", "mnnvl_comm", "mode", "mori_commit",
+    "nccl_communicator", "nccl_package_version", "nccl_version", "num_experts",
+    "nvshmem_package_version",
+    "num_max_tokens_per_rank", "num_nvl_bytes", "num_qps", "num_qps_per_rank",
+    "num_rdma_bytes", "num_sms", "path",
+    "physical_nvlink_ranks", "physical_rdma_ranks", "prefer_overlap_with_compute",
+    "realized_config", "reference_semantics", "requested_num_sms", "resource_mode", "routing_factor",
+    "routing_metadata", "sm_fraction", "top_k",
+    "torch_git_version", "torch_version", "transport", "trtllm", "tuned_source",
+    "tuning_num_experts",
+    "uccl_commit", "uccl_dependency_versions", "uccl_version", "uccl_wrapper_commit",
+    "workspace",
+}
+
+
+class ContractError(ValueError):
+    """A document differs from the native v1 contract."""
+
+
+def resolve_deepep_mnnvl(
+    *, requested: bool, signature_parameters: Iterable[str], deepep_commit: str | None
+) -> tuple[dict[str, bool], str]:
+    """Resolve one explicit DeepEP MNNVL API mode without signature fallbacks."""
+    if not requested:
+        return {}, "not-requested"
+    if "allow_mnnvl" in set(signature_parameters):
+        return {"allow_mnnvl": True}, "explicit-allow-mnnvl"
+    raise ContractError(
+        f"requested DeepEP MNNVL is unsupported by commit {deepep_commit or 'unknown'}"
+    )
+
+
+def collective_kernel_generation(collective_library: Any) -> str:
+    """Return the public NCCL/RCCL implementation lineage."""
+    if collective_library not in {"nccl", "rccl"}:
+        raise ContractError("reference collective library must be nccl or rccl")
+    return collective_library
+
+
+def project_resource_profile(provenance: dict[str, Any]) -> dict[str, Any]:
+    """Project backend provenance into the canonical cross-backend resource vocabulary."""
+    device_units = provenance.get("device_sms") or provenance.get("device_cus")
+    if provenance.get("num_sms") is not None:
+        kind, configured = "sm", provenance["num_sms"]
+    elif (
+        provenance.get("block_num") is not None
+        and provenance.get("kernel_type") != "AsyncLL"
+    ):
+        kind, configured = "cu_block", provenance["block_num"]
+    else:
+        kind, configured = None, None
+    achieved = configured / device_units if configured and device_units else None
+    fixed = "fixed-kernel" in str(provenance.get("tuned_source", ""))
+    source = str(provenance.get("tuned_source", ""))
+    return {
+        "achieved_fraction": round(achieved, 4) if achieved else None,
+        "comm_units_kind": kind,
+        "configured_units": configured,
+        "conformance_class": (
+            "not-applicable" if fixed else "best-known" if "default" not in source
+            else "backend-default"
+        ),
+        "device_units": device_units,
+        "fixed_kernel": fixed,
+        "nonconforming": False,
+        "pareto_eligible": False,
+        "persistent_bytes": (
+            provenance.get("num_nvl_bytes")
+            or provenance.get("num_rdma_bytes")
+            or provenance.get("heap_size")
+        ),
+        "qps_per_rank": provenance.get("num_qps_per_rank"),
+        "requested_fraction": None,
+        "resource_class": "fixed-kernel" if fixed else "backend-tuned",
+        "target_achieved_within_tol": None,
+        "tolerance": 0.10,
+        "tuned_source": provenance.get("tuned_source"),
+        "warps_combine": provenance.get("combine_warps"),
+        "warps_dispatch": provenance.get("dispatch_warps"),
+    }
+
+
+def backend_version(provenance: dict[str, Any]) -> str | None:
+    """Return the canonical public backend version from implementation provenance."""
+    for field in (
+        "deepep_version", "uccl_version", "nccl_version",
+        "mori_commit", "deepep_commit",
+    ):
+        value = provenance.get(field)
+        if value is not None and str(value).strip():
+            return str(value)[:160]
+    return None
+
+
+def public_series_config(
+    *, kernel_generation: Any, provenance: dict[str, Any],
+    resource_profile: dict[str, Any], resource_mode: Any, device_product: Any,
+) -> dict[str, Any]:
+    """Project raw implementation facts into the exact public configuration fields."""
+    generation = None if kernel_generation == "n-a" else kernel_generation
+    profile = "profile-" + _sha256_json(resource_profile)[:16]
+    return {
+        "backend": {
+            "generation": generation,
+            "version": backend_version(provenance),
+        },
+        "resource": {
+            "mode": resource_mode,
+            "profile": profile,
+            "comm_units_kind": resource_profile.get("comm_units_kind"),
+            "configured_units": resource_profile.get("configured_units"),
+        },
+        "system": {"label": str(device_product)[:160]},
+    }
+
+
+def public_series_config_sha256(config: dict[str, Any]) -> str:
+    """Commit the canonical public configuration projection into series identity."""
+    return _sha256_json(config)
+
+
+SOURCE_BUILT_LIBRARY_ROLES = frozenset({
+    "deepep-extension", "deepep-hybrid-extension",
+})
+
+
+def series_provenance(provenance: dict[str, Any]) -> dict[str, Any]:
+    """Project stable semantic build identity while retaining raw binaries in private evidence."""
+    projected = {
+        key: value for key, value in provenance.items()
+        if key not in {"jit_cache_key", "jit_shared_objects", "path", "sm_fraction"}
+    }
+    libraries = provenance.get("loaded_libraries")
+    if isinstance(libraries, list):
+        projected["loaded_libraries"] = [
+            {
+                "name": item.get("name"),
+                "role": item.get("role"),
+                "source_tree": provenance.get("deepep_tree"),
+            }
+            if isinstance(item, dict) and item.get("role") in SOURCE_BUILT_LIBRARY_ROLES
+            else item
+            for item in libraries
+        ]
+    jit_cubins = provenance.get("jit_cubins")
+    if isinstance(jit_cubins, list):
+        projected["jit_cubins"] = [
+            {
+                "cache_key": item.get("cache_key"),
+                "sass_sha256": item.get("sass_sha256"),
+                "source_sha256": item.get("source_sha256"),
+            }
+            if isinstance(item, dict)
+            else item
+            for item in jit_cubins
+        ]
+    return projected
+
+
+def routing_implementation_control_sha256(implementation: dict[str, Any]) -> str:
+    """Bind routing cohorts to the same static build/generator and non-treatment configuration."""
+    provenance = implementation.get("provenance")
+    if not isinstance(provenance, dict):
+        raise ContractError("implementation provenance is unavailable")
+    semantic = series_provenance(provenance)
+    treatment_fields = {
+        "jit_cache_key", "jit_cubins", "jit_kernel_keys", "jit_shared_objects",
+        "local_experts", "num_experts", "path", "realized_config", "sm_fraction",
+    }
+    return _sha256_json({
+        "kernel_generation": implementation.get("kernel_generation"),
+        "name": implementation.get("name"),
+        "provenance": {
+            key: value for key, value in semantic.items()
+            if key not in treatment_fields
+        },
+        "resource_profile": implementation.get("resource_profile"),
+    })
+
+
+def _resolved_provenance_value(field: str, value: Any) -> bool:
+    if value is None or isinstance(value, (dict, list, tuple, set)) and not value:
+        return False
+    text = str(value).strip().lower()
+    if not text or text in {"unknown", "none", "null", "n/a", "?", "capture-failed"}:
+        return False
+    if "capture-failed" in text:
+        return False
+    if field.endswith("_commit") and (
+        text in {"main", "hybrid-ep", "uccl", "pkg-uccl"}
+        or text.endswith(("-unknown", "-none", "-main", "-hybrid-ep"))
+    ):
+        return False
+    return True
+
+
+def _content_evidence_is_valid(value: Any, required_roles: set[str]) -> bool:
+    if not isinstance(value, list) or not value:
+        return False
+    records: set[tuple[str, str]] = set()
+    roles: set[str] = set()
+    for item in value:
+        if not isinstance(item, dict) or set(item) != {"name", "role", "sha256"}:
+            return False
+        name, role, digest = item["name"], item["role"], item["sha256"]
+        if (
+            not isinstance(name, str)
+            or not re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9_.+-]{0,159}", name)
+            or not isinstance(role, str)
+            or not re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9_.+-]{0,127}", role)
+            or not isinstance(digest, str)
+            or not re.fullmatch(r"[0-9a-f]{64}", digest)
+            or (role, name) in records
+        ):
+            return False
+        records.add((role, name))
+        roles.add(role)
+    return required_roles <= roles
+
+
+def _deepep_v2_jit_cubins_are_valid(value: Any) -> bool:
+    if not isinstance(value, list) or len(value) != len(DEEPEP_V2_JIT_KERNELS):
+        return False
+    cache_keys = []
+    kernel_names = set()
+    for item in value:
+        if not isinstance(item, dict) or set(item) != {
+            "cache_key", "cubin_sha256", "sass_sha256", "source_sha256",
+        }:
+            return False
+        cache_key = item["cache_key"]
+        match = (
+            re.fullmatch(r"kernel\.([A-Za-z0-9_+-]+)\.[0-9a-f]{32}", cache_key)
+            if isinstance(cache_key, str)
+            else None
+        )
+        if (
+            match is None
+            or any(
+                not isinstance(item[field], str)
+                or not re.fullmatch(r"[0-9a-f]{64}", item[field])
+                for field in ("cubin_sha256", "sass_sha256", "source_sha256")
+            )
+        ):
+            return False
+        cache_keys.append(cache_key)
+        kernel_names.add(match.group(1))
+    return (
+        cache_keys == sorted(set(cache_keys))
+        and kernel_names == DEEPEP_V2_JIT_KERNELS
+    )
+
+
+HYBRID_REALIZED_CONFIG_FIELDS = {
+    "hidden_dim", "max_num_of_tokens_per_rank", "num_of_experts_per_rank",
+    "num_of_ranks_per_node", "num_of_nodes", "pad_multiple",
+    "num_of_tokens_per_chunk_preprocessing_api",
+    "num_of_threads_per_block_preprocessing_api", "num_of_blocks_preprocessing_api",
+    "num_of_blocks_permute", "num_of_blocks_unpermute", "token_data_type",
+    "num_of_stages_dispatch_api", "num_of_stages_permute_block_dispatch_api",
+    "num_of_in_flight_s2g_dispatch_api",
+    "num_of_in_flight_s2g_permute_block_dispatch_api",
+    "num_of_additional_in_flight_s2g_dispatch_api",
+    "num_of_tokens_per_chunk_dispatch_api", "num_of_blocks_dispatch_api",
+    "forward_dispatch_api", "device_side_sync_dispatch_api",
+    "num_of_stages_g2s_combine_api", "num_of_stages_s2g_combine_api",
+    "num_of_tokens_per_chunk_combine_api", "num_of_tokens_per_group_combine_api",
+    "num_of_blocks_combine_api", "num_of_additional_in_flight_s2g_combine_api",
+    "backward_combine_api", "device_side_sync_combine_api",
+}
+HYBRID_REALIZED_BOOL_FIELDS = {
+    "forward_dispatch_api", "device_side_sync_dispatch_api", "backward_combine_api",
+    "device_side_sync_combine_api",
+}
+
+
+def _hybrid_realized_config_is_valid(value: Any) -> bool:
+    if not isinstance(value, dict) or set(value) != HYBRID_REALIZED_CONFIG_FIELDS:
+        return False
+    for field, field_value in value.items():
+        if field in HYBRID_REALIZED_BOOL_FIELDS:
+            if type(field_value) is not bool:
+                return False
+        elif field == "token_data_type":
+            if field_value not in {"UINT8", "UINT16"}:
+                return False
+        elif type(field_value) is not int or field_value < 0:
+            return False
+    return all(value[field] > 0 for field in (
+        "hidden_dim", "max_num_of_tokens_per_rank", "num_of_experts_per_rank",
+        "num_of_ranks_per_node", "num_of_nodes",
+    ))
+
+
+def _hybrid_kernel_keys_are_valid(value: Any) -> bool:
+    return (
+        isinstance(value, list)
+        and len(value) == 3
+        and len(set(value)) == 3
+        and value == sorted(value)
+        and all(
+            isinstance(key, str)
+            and re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9_.+-]{0,511}", key)
+            for key in value
+        )
+    )
+
+
+def _hybrid_jit_evidence_is_valid(value: Any, kernel_keys: Any) -> bool:
+    if not _hybrid_kernel_keys_are_valid(kernel_keys) or not isinstance(value, list):
+        return False
+    if len(value) != len(kernel_keys):
+        return False
+    rank_sets = []
+    for expected_key, item in zip(kernel_keys, value):
+        if not isinstance(item, dict) or set(item) != {"kernel_key", "rank_artifacts"}:
+            return False
+        rank_artifacts = item["rank_artifacts"]
+        if item["kernel_key"] != expected_key or not isinstance(rank_artifacts, list):
+            return False
+        ranks = []
+        for artifact in rank_artifacts:
+            if not isinstance(artifact, dict) or set(artifact) != {"bytes", "rank", "sha256"}:
+                return False
+            rank, digest, size = artifact["rank"], artifact["sha256"], artifact["bytes"]
+            if (
+                type(rank) is not int
+                or rank < 0
+                or not isinstance(digest, str)
+                or not re.fullmatch(r"[0-9a-f]{64}", digest)
+                or type(size) is not int
+                or size <= 0
+            ):
+                return False
+            ranks.append(rank)
+        if not ranks or ranks != list(range(len(ranks))):
+            return False
+        rank_sets.append(ranks)
+    return all(ranks == rank_sets[0] for ranks in rank_sets)
+
+
+def backend_provenance_issues(backend: str, provenance: dict[str, Any]) -> list[str]:
+    unknown = [
+        field for field, value in provenance.items()
+        if isinstance(value, str) and value.strip().lower() == "unknown"
+    ]
+    unresolved = [
+        field for field in REQUIRED_BACKEND_PROVENANCE.get(backend, ())
+        if not _resolved_provenance_value(field, provenance.get(field))
+    ]
+    if backend == "deepep":
+        mode = provenance.get("mnnvl_comm")
+        allow = provenance.get("allow_mnnvl")
+        valid_modes = {
+            "not-requested": False,
+            "explicit-allow-mnnvl": True,
+        }
+        if type(allow) is not bool or valid_modes.get(mode) is not allow:
+            unresolved.append("mnnvl_comm")
+        if provenance.get("backend_lineage") != "deepep-v1":
+            unresolved.append("backend_lineage")
+    if backend == "deepep-v2":
+        for field in ("num_experts", "tuning_num_experts"):
+            if type(provenance.get(field)) is not int or provenance[field] <= 0:
+                unresolved.append(field)
+        if not _deepep_v2_jit_cubins_are_valid(provenance.get("jit_cubins")):
+            unresolved.append("jit_cubins")
+        if provenance.get("jit_random_seed") != "collectivex-deepep-v2-fa8a9b1":
+            unresolved.append("jit_random_seed")
+        unresolved.extend(
+            field for field, expected in DEEPEP_V2_V1_PROVENANCE.items()
+            if provenance.get(field) != expected
+        )
+    content_roles = {
+        "deepep-v2": {"deepep-extension", "nccl", "nvshmem"},
+        "deepep-hybrid": {"deepep-extension", "deepep-hybrid-extension"},
+        "uccl": {
+            "uccl-distribution", "uccl-wrapper", "intervaltree-distribution",
+            "sortedcontainers-distribution", "cuda-runtime",
+        },
+    }.get(backend)
+    if content_roles is not None and not _content_evidence_is_valid(
+        provenance.get("loaded_libraries"), content_roles
+    ):
+        unresolved.append("loaded_libraries")
+    if backend in {"deepep-v2", "deepep-hybrid"} and not re.fullmatch(
+        r"[0-9a-f]{40}", str(provenance.get("deepep_tree", ""))
+    ):
+        unresolved.append("deepep_tree")
+    if backend == "deepep-hybrid" and provenance.get("backend_lineage") != "deepep-hybrid":
+        unresolved.append("backend_lineage")
+    if backend == "deepep-hybrid":
+        if not _hybrid_realized_config_is_valid(provenance.get("realized_config")):
+            unresolved.append("realized_config")
+        if not _hybrid_kernel_keys_are_valid(provenance.get("jit_kernel_keys")):
+            unresolved.append("jit_kernel_keys")
+        if not _hybrid_jit_evidence_is_valid(
+            provenance.get("jit_shared_objects"), provenance.get("jit_kernel_keys")
+        ):
+            unresolved.append("jit_shared_objects")
+    if backend == "uccl" and provenance.get("backend_lineage") != "uccl":
+        unresolved.append("backend_lineage")
+    if backend == "uccl" and provenance.get("uccl_dependency_versions") != (
+        UCCL_DEPENDENCY_VERSIONS
+    ):
+        unresolved.append("uccl_dependency_versions")
+    if backend == "nccl-ep":
+        collective = provenance.get("collective_library")
+        if collective not in {"nccl", "rccl"}:
+            unresolved.append("collective_library")
+        if provenance.get("backend_lineage") != collective:
+            unresolved.append("backend_lineage")
+    return sorted(set(unknown + unresolved))
+
+
+def provenance_complete(
+    provenance: dict[str, Any], backend: str, git_run: dict[str, Any] | None,
+    *, image_digest: Any, image_verified: Any, squash_sha256: Any,
+) -> bool:
+    image = str(image_digest or "")
+    squash = str(squash_sha256 or "")
+    return (
+        not backend_provenance_issues(backend, provenance)
+        and image_verified is True
+        and bool(re.fullmatch(r"sha256:[0-9a-f]{64}", image))
+        and bool(re.fullmatch(r"[0-9a-f]{64}", squash))
+        and isinstance(git_run, dict)
+        and all(git_run.get(field) for field in GIT_RUN_FIELDS)
+    )
+
+
+def strict_load(path: str | os.PathLike[str]) -> Any:
+    """Load JSON while rejecting duplicate keys and non-finite constants."""
+    def pairs(items):
+        result = {}
+        for key, value in items:
+            if key in result:
+                raise ContractError(f"duplicate JSON key {key!r}")
+            result[key] = value
+        return result
+
+    def constant(value):
+        raise ContractError(f"non-finite JSON number {value}")
+
+    try:
+        with open(path) as handle:
+            return json.load(handle, object_pairs_hook=pairs, parse_constant=constant)
+    except (OSError, json.JSONDecodeError) as exc:
+        raise ContractError(f"invalid JSON {path}: {exc}") from exc
+
+
+def canonical_json_bytes(value: Any) -> bytes:
+    """Canonical finite JSON bytes for checksums and immutable artifacts."""
+    _finite_tree(value)
+    try:
+        return json.dumps(
+            value, allow_nan=False, ensure_ascii=False, sort_keys=True,
+            separators=(",", ":"),
+        ).encode("utf-8")
+    except (TypeError, ValueError) as exc:
+        raise ContractError(f"value is not canonical JSON: {exc}") from exc
+
+
+def content_manifest_evidence(
+    *, role: str, name: str, files: Iterable[tuple[str, str | os.PathLike[str]]]
+) -> dict[str, str]:
+    """Hash a labeled file set without exposing any host path in provenance."""
+    if not re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9_.+-]{0,127}", role):
+        raise ContractError("content evidence role is invalid")
+    if not re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9_.+-]{0,159}", name):
+        raise ContractError("content evidence name is invalid")
+    manifest: list[dict[str, Any]] = []
+    labels: set[str] = set()
+    for label, raw_path in files:
+        logical = PurePosixPath(label)
+        if (
+            not label
+            or logical.is_absolute()
+            or ".." in logical.parts
+            or label in labels
+            or any(ord(character) < 0x20 or ord(character) > 0x7E for character in label)
+        ):
+            raise ContractError("content evidence label is invalid or duplicated")
+        path = Path(raw_path)
+        if not path.is_file():
+            raise ContractError("content evidence source is not a file")
+        digest = hashlib.sha256()
+        size = 0
+        with path.open("rb") as handle:
+            for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+                digest.update(chunk)
+                size += len(chunk)
+        labels.add(label)
+        manifest.append({"bytes": size, "label": label, "sha256": digest.hexdigest()})
+    if not manifest:
+        raise ContractError("content evidence cannot be empty")
+    digest = hashlib.sha256(
+        canonical_json_bytes(sorted(manifest, key=lambda item: item["label"]))
+    ).hexdigest()
+    return {"name": name, "role": role, "sha256": digest}
+
+
+def _obj(value: Any, path: str) -> dict[str, Any]:
+    if not isinstance(value, dict):
+        raise ContractError(f"{path} must be an object")
+    return value
+
+
+def _keys(value: Any, expected: set[str], path: str) -> dict[str, Any]:
+    obj = _obj(value, path)
+    actual = set(obj)
+    if actual != expected:
+        raise ContractError(
+            f"{path} fields differ: missing={sorted(expected - actual)}, "
+            f"extra={sorted(actual - expected)}"
+        )
+    return obj
+
+
+def _text(value: Any, path: str, *, nullable: bool = False) -> str | None:
+    if nullable and value is None:
+        return None
+    if not isinstance(value, str) or not value:
+        raise ContractError(f"{path} must be a non-empty string")
+    return value
+
+
+def _integer(value: Any, path: str, *, minimum: int = 0) -> int:
+    if type(value) is not int or value < minimum:
+        raise ContractError(f"{path} must be an integer >= {minimum}")
+    return value
+
+
+def validate_conditioning_contract(value: Any, phase: str) -> dict[str, Any]:
+    """Validate the exact phase-specific v1 conditioning schedule."""
+    if phase not in V1_CONDITIONING_LADDERS:
+        raise ContractError("raw conditioning phase is invalid")
+    conditioning = _keys(
+        value, {"contract", "ladder", "roundtrips_per_shape"},
+        "raw.measurement.conditioning",
+    )
+    ladder = conditioning["ladder"]
+    if (
+        conditioning["contract"] != identity.V1_CASE_PROFILE["conditioning_contract"]
+        or type(ladder) is not list
+        or any(type(point) is not int for point in ladder)
+        or ladder != list(V1_CONDITIONING_LADDERS[phase])
+        or _integer(
+            conditioning["roundtrips_per_shape"],
+            "raw.measurement.conditioning.roundtrips_per_shape",
+            minimum=1,
+        ) != V1_CONDITIONING_ROUNDS_PER_SHAPE
+    ):
+        raise ContractError(f"raw {phase} conditioning contract differs")
+    return conditioning
+
+
+def _number(value: Any, path: str, *, minimum: float | None = None) -> float:
+    if isinstance(value, bool) or not isinstance(value, (int, float)) or not math.isfinite(value):
+        raise ContractError(f"{path} must be finite")
+    result = float(value)
+    if minimum is not None and result < minimum:
+        raise ContractError(f"{path} must be >= {minimum}")
+    return result
+
+
+def _finite_tree(value: Any, path: str = "$") -> None:
+    if isinstance(value, float) and not math.isfinite(value):
+        raise ContractError(f"{path} contains a non-finite number")
+    if isinstance(value, list):
+        for index, item in enumerate(value):
+            _finite_tree(item, f"{path}[{index}]")
+    elif isinstance(value, dict):
+        for key, item in value.items():
+            _finite_tree(item, f"{path}.{key}")
+
+
+def _typed(value: Any, kind: str, path: str) -> str:
+    if not identity.is_typed_id(value, kind):
+        raise ContractError(f"{path} is not a {kind} ID")
+    return value
+
+
+def _sha256_json(value: Any) -> str:
+    payload = json.dumps(
+        value, allow_nan=False, ensure_ascii=False, sort_keys=True, separators=(",", ":")
+    ).encode()
+    return hashlib.sha256(payload).hexdigest()
+
+
+@lru_cache(maxsize=None)
+def _expected_eplb_plan(
+    routing: str,
+    topk: int,
+    logical_experts: int,
+    physical_experts: int,
+    ep_size: int,
+    seed: int,
+    reference_tokens_per_rank: int,
+) -> dict[str, Any]:
+    indices, _ = workload_contract.canonical_routing_rows(
+        reference_tokens_per_rank * ep_size,
+        logical_experts,
+        topk,
+        routing,
+        seed,
+    )
+    load = [0] * logical_experts
+    for row in indices:
+        for expert in row:
+            load[expert] += 1
+    return eplb_contract.build_plan(load, physical_experts, ep_size)
+
+
+@lru_cache(maxsize=None)
+def _expected_canonical_trace(
+    routing: str,
+    hidden: int,
+    topk: int,
+    logical_experts: int,
+    physical_experts: int,
+    ep_size: int,
+    tokens_per_rank: int,
+    seed: int,
+    eplb_enabled: bool,
+    reference_tokens_per_rank: int,
+) -> tuple[str, dict[str, str], str, list[list[int]], list[list[float]]]:
+    member, checksums, indices, weights = workload_contract.canonical_member(
+        routing,
+        hidden,
+        topk,
+        logical_experts,
+        ep_size,
+        tokens_per_rank,
+        seed,
+    )
+    if eplb_enabled:
+        plan = _expected_eplb_plan(
+            routing,
+            topk,
+            logical_experts,
+            physical_experts,
+            ep_size,
+            seed,
+            reference_tokens_per_rank,
+        )
+        indices = eplb_contract.remap_rows(indices, plan)
+    routing_hash = workload_contract.trace_checksums(indices, weights)["trace"]
+    return member, checksums, routing_hash, indices, weights
+
+
+def _coefficient_of_variation(values: list[int]) -> float:
+    mean = sum(values) / len(values)
+    if mean == 0:
+        return 0.0
+    variance = sum((value - mean) ** 2 for value in values) / len(values)
+    return variance**0.5 / mean
+
+
+def _expected_routing_summary(
+    indices: list[list[int]],
+    weights: list[list[float]],
+    *,
+    physical_experts: int,
+    ep_size: int,
+    tokens_per_rank: int,
+    gpus_per_node: int,
+    scale_up_domain: int,
+) -> dict[str, Any]:
+    """Recompute every published routing/load statistic without torch."""
+    experts_per_rank = physical_experts // ep_size
+    expert_load = [0] * physical_experts
+    assignment_load = [0] * ep_size
+    payload_load = [0] * ep_size
+    fanouts: list[int] = []
+    local = same_node = same_domain = copies = 0
+    for token, row in enumerate(indices):
+        destinations = {expert // experts_per_rank for expert in row}
+        source = token // tokens_per_rank
+        fanouts.append(len(destinations))
+        for expert in row:
+            expert_load[expert] += 1
+            assignment_load[expert // experts_per_rank] += 1
+        for destination in destinations:
+            payload_load[destination] += 1
+            copies += 1
+            local += destination == source
+            same_node += destination // gpus_per_node == source // gpus_per_node
+            same_domain += destination // scale_up_domain == source // scale_up_domain
+    fanout_histogram = [fanouts.count(value) for value in range(1, ep_size + 1)]
+    expert_mean = sum(expert_load) / len(expert_load)
+    return {
+        "empty_expert_count": expert_load.count(0),
+        "empty_rank_count": payload_load.count(0),
+        "expert_assignment_rank_cv": _coefficient_of_variation(assignment_load),
+        "expert_assignments_per_rank": assignment_load,
+        "expert_load_cv": _coefficient_of_variation(expert_load),
+        "expert_load_max": max(expert_load),
+        "expert_load_mean": expert_mean,
+        "expert_load_min": min(expert_load),
+        "fanout_histogram": fanout_histogram,
+        "fanout_max": max(fanouts),
+        "fanout_mean": sum(fanouts) / len(fanouts),
+        "fanout_min": min(fanouts),
+        "hash": workload_contract.trace_checksums(indices, weights)["trace"],
+        "hotspot_ratio": max(expert_load) / expert_mean if expert_mean else 0.0,
+        "locality": {
+            "placement": "packed",
+            "local_rank_fraction": local / copies,
+            "same_node_fraction": same_node / copies,
+            "same_scaleup_domain_fraction": same_domain / copies,
+            "cross_node_fraction": 1 - same_node / copies,
+            "cross_domain_fraction": 1 - same_domain / copies,
+            "gpus_per_node": gpus_per_node,
+            "scale_up_domain": scale_up_domain,
+            "copies": copies,
+        },
+        "payload_copies_per_rank": payload_load,
+        "payload_rank_cv": _coefficient_of_variation(payload_load),
+        "routed_copies": copies,
+        "source_token_stats": {
+            "min": tokens_per_rank,
+            "mean": float(tokens_per_rank),
+            "max": tokens_per_rank,
+            "cv": 0.0,
+            "empty_ranks": 0,
+            "total": tokens_per_rank * ep_size,
+            "ranks": ep_size,
+        },
+    }
+
+
+def _expected_histogram(samples: list[float], bins: int = 40) -> dict[str, Any]:
+    low, high = min(samples), max(samples)
+    if high <= low:
+        return {"n": len(samples), "min": low, "max": high, "bins": bins, "counts": [len(samples)]}
+    counts = [0] * bins
+    span = high - low
+    for sample in samples:
+        index = min(bins - 1, int((sample - low) / span * bins))
+        counts[index] += 1
+    return {
+        "n": len(samples),
+        "min": round(low, 3),
+        "max": round(high, 3),
+        "bins": bins,
+        "counts": counts,
+    }
+
+
+def _expected_anomalies(
+    tokens: int, components: dict[str, Any]
+) -> list[dict[str, Any]]:
+    dispatch = components["dispatch"]["percentiles_us"]
+    combine = components["combine"]["percentiles_us"]
+    roundtrip = components["roundtrip"]["percentiles_us"]
+    isolated = components["isolated_sum"]["percentiles_us"]
+    anomalies: list[dict[str, Any]] = []
+    if isolated is not None and roundtrip["p99"] > 3.0 * isolated["p99"]:
+        anomalies.append({
+            "type": "roundtrip_gt_isolated_sum",
+            "T": tokens,
+            "roundtrip_p99": round(roundtrip["p99"], 2),
+            "isolated_sum_p99": round(isolated["p99"], 2),
+            "ratio": round(roundtrip["p99"] / isolated["p99"], 2),
+            "threshold": 3.0,
+        })
+    floor = max(dispatch["p50"], combine["p50"]) if dispatch and combine else None
+    if floor and roundtrip["p50"] < 0.95 * floor:
+        anomalies.append({
+            "type": "roundtrip_lt_component_floor",
+            "T": tokens,
+            "roundtrip_p50": round(roundtrip["p50"], 2),
+            "component_floor_p50": round(floor, 2),
+        })
+    return anomalies
+
+
+def _validate_canonical_workload(
+    workload: dict[str, Any],
+    scheduled_case: dict[str, Any],
+    rows: list[dict[str, Any]],
+    eplb: dict[str, Any],
+) -> None:
+    """Bind every canonical member and measured routing hash to its scheduled token row."""
+    profile = identity.V1_CASE_PROFILE
+    if eplb["enabled"]:
+        plan = _expected_eplb_plan(
+            scheduled_case["routing"],
+            scheduled_case["topk"],
+            scheduled_case["experts"],
+            eplb["num_physical_experts"],
+            scheduled_case["ep"],
+            profile["seed"],
+            profile["eplb_reference_tokens_per_rank"],
+        )
+        if eplb["mapping_hash"] != eplb_contract.mapping_hash(plan):
+            raise ContractError("raw EPLB mapping differs from the frozen canonical plan")
+
+    expected: dict[str, dict[str, str]] = {}
+    for index, row in enumerate(rows):
+        member, checksums, routing_hash, _, _ = _expected_canonical_trace(
+            scheduled_case["routing"],
+            scheduled_case["hidden"],
+            scheduled_case["topk"],
+            scheduled_case["experts"],
+            eplb["num_physical_experts"],
+            scheduled_case["ep"],
+            row["tokens_per_rank"],
+            profile["seed"],
+            eplb["enabled"],
+            profile["eplb_reference_tokens_per_rank"],
+        )
+        if row["routing"]["hash"] != routing_hash:
+            raise ContractError(
+                f"raw.measurement.rows[{index}].routing.hash differs from its canonical member"
+            )
+        expected[member] = checksums
+    if (
+        len(expected) != len(rows)
+        or workload["members"] != sorted(expected)
+        or workload["manifest_checksums"] != expected
+    ):
+        raise ContractError("raw canonical member set/checksums differ from scheduled rows")
+    expected_workload_id = identity.workload_id({
+        "members": [
+            {"checksums": expected[member], "workload_id": member}
+            for member in sorted(expected)
+        ]
+    })
+    if workload["workload_id"] != expected_workload_id:
+        raise ContractError("raw composite workload identity differs from scheduled rows")
+
+
+def _nearest_rank(samples: list[float], q: int) -> float:
+    ordered = sorted(samples)
+    return ordered[max(0, min(len(ordered) - 1, math.ceil(q / 100 * len(ordered)) - 1))]
+
+
+def _close(observed: Any, expected: float, path: str, tolerance: float = 1e-6) -> None:
+    value = _number(observed, path)
+    if not math.isclose(value, expected, rel_tol=tolerance, abs_tol=tolerance):
+        raise ContractError(f"{path}={value} differs from recomputed {expected}")
+
+
+def _equivalent(
+    observed: Any, expected: Any, path: str, *, tolerance: float = 1e-6
+) -> None:
+    """Compare a recomputed JSON subtree while allowing only float roundoff."""
+    if isinstance(expected, dict):
+        value = _keys(observed, set(expected), path)
+        for key, child in expected.items():
+            _equivalent(value[key], child, f"{path}.{key}", tolerance=tolerance)
+        return
+    if isinstance(expected, list):
+        if not isinstance(observed, list) or len(observed) != len(expected):
+            raise ContractError(f"{path} differs from recomputed evidence")
+        for index, child in enumerate(expected):
+            _equivalent(observed[index], child, f"{path}[{index}]", tolerance=tolerance)
+        return
+    if isinstance(expected, float):
+        _close(observed, expected, path, tolerance)
+        return
+    if type(observed) is not type(expected) or observed != expected:
+        raise ContractError(f"{path} differs from recomputed evidence")
+
+
+def _schema_equal(left: Any, right: Any) -> bool:
+    """JSON Schema equality: booleans are distinct from numbers."""
+    if isinstance(left, bool) or isinstance(right, bool):
+        return type(left) is type(right) and left == right
+    if isinstance(left, dict) and isinstance(right, dict):
+        return set(left) == set(right) and all(
+            _schema_equal(left[key], right[key]) for key in left
+        )
+    if isinstance(left, list) and isinstance(right, list):
+        return len(left) == len(right) and all(
+            _schema_equal(a, b) for a, b in zip(left, right, strict=True)
+        )
+    return left == right
+
+
+def _schema_ref(root: dict[str, Any], reference: str) -> dict[str, Any]:
+    if not reference.startswith("#/"):
+        raise ContractError("native artifact schema contains a non-local reference")
+    value: Any = root
+    for part in reference[2:].split("/"):
+        part = part.replace("~1", "/").replace("~0", "~")
+        if not isinstance(value, dict) or part not in value:
+            raise ContractError("native artifact schema contains a broken reference")
+        value = value[part]
+    if not isinstance(value, dict):
+        raise ContractError("native artifact schema reference is not an object")
+    return value
+
+
+def _schema_type_matches(value: Any, expected: str) -> bool:
+    if expected == "null":
+        return value is None
+    if expected == "boolean":
+        return type(value) is bool
+    if expected == "object":
+        return isinstance(value, dict)
+    if expected == "array":
+        return isinstance(value, list)
+    if expected == "string":
+        return isinstance(value, str)
+    if expected == "number":
+        return (
+            not isinstance(value, bool)
+            and isinstance(value, (int, float))
+            and math.isfinite(value)
+        )
+    if expected == "integer":
+        return (
+            not isinstance(value, bool)
+            and isinstance(value, (int, float))
+            and math.isfinite(value)
+            and float(value).is_integer()
+        )
+    raise ContractError(f"native artifact schema uses unsupported type {expected!r}")
+
+
+def _validate_schema_value(
+    value: Any, schema: dict[str, Any], root: dict[str, Any], path: str
+) -> None:
+    """Validate the bounded JSON Schema subset used by native artifact contracts."""
+    if "$ref" in schema:
+        _validate_schema_value(value, _schema_ref(root, schema["$ref"]), root, path)
+        return
+    if "oneOf" in schema:
+        matches = 0
+        for candidate in schema["oneOf"]:
+            try:
+                _validate_schema_value(value, candidate, root, path)
+            except ContractError:
+                continue
+            matches += 1
+        if matches != 1:
+            raise ContractError(f"{path} must match exactly one native schema alternative")
+        return
+    expected_type = schema.get("type")
+    if expected_type is not None and not _schema_type_matches(value, expected_type):
+        raise ContractError(f"{path} is not a schema {expected_type}")
+    if "const" in schema and not _schema_equal(value, schema["const"]):
+        raise ContractError(f"{path} differs from its schema constant")
+    if "enum" in schema and not any(_schema_equal(value, item) for item in schema["enum"]):
+        raise ContractError(f"{path} is outside its schema enum")
+
+    if isinstance(value, dict):
+        required = set(schema.get("required", ()))
+        properties = schema.get("properties", {})
+        missing = required - set(value)
+        if missing:
+            raise ContractError(f"{path} lacks schema fields {sorted(missing)}")
+        additional = schema.get("additionalProperties", True)
+        extra = set(value) - set(properties)
+        if additional is False and extra:
+            raise ContractError(f"{path} has extra schema fields {sorted(extra)}")
+        for key, item in value.items():
+            if key in properties:
+                _validate_schema_value(item, properties[key], root, f"{path}.{key}")
+            elif isinstance(additional, dict):
+                _validate_schema_value(item, additional, root, f"{path}.{key}")
+        property_names = schema.get("propertyNames")
+        if property_names is not None:
+            for key in value:
+                _validate_schema_value(key, property_names, root, f"{path}.<key>")
+
+    if isinstance(value, list):
+        if len(value) < schema.get("minItems", 0):
+            raise ContractError(f"{path} has too few schema items")
+        maximum = schema.get("maxItems")
+        if maximum is not None and len(value) > maximum:
+            raise ContractError(f"{path} has too many schema items")
+        if schema.get("uniqueItems") and any(
+            _schema_equal(item, prior)
+            for index, item in enumerate(value)
+            for prior in value[:index]
+        ):
+            raise ContractError(f"{path} schema items are not unique")
+        if "items" in schema:
+            for index, item in enumerate(value):
+                _validate_schema_value(item, schema["items"], root, f"{path}[{index}]")
+
+    if isinstance(value, str):
+        if len(value) < schema.get("minLength", 0):
+            raise ContractError(f"{path} is shorter than its schema minimum")
+        maximum = schema.get("maxLength")
+        if maximum is not None and len(value) > maximum:
+            raise ContractError(f"{path} is longer than its schema maximum")
+        if "pattern" in schema and re.search(schema["pattern"], value) is None:
+            raise ContractError(f"{path} does not match its schema pattern")
+        if schema.get("format") == "date-time":
+            try:
+                parsed = dt.datetime.fromisoformat(value.replace("Z", "+00:00"))
+            except ValueError as exc:
+                raise ContractError(f"{path} is not a schema date-time") from exc
+            if parsed.tzinfo is None:
+                raise ContractError(f"{path} schema date-time lacks a timezone")
+
+    if (
+        not isinstance(value, bool)
+        and isinstance(value, (int, float))
+        and math.isfinite(value)
+    ):
+        if "minimum" in schema and value < schema["minimum"]:
+            raise ContractError(f"{path} is below its schema minimum")
+        if "maximum" in schema and value > schema["maximum"]:
+            raise ContractError(f"{path} is above its schema maximum")
+
+
+def _validate_native_schema(name: str, value: Any) -> None:
+    schema = _SCHEMA_CACHE.get(name)
+    if schema is None:
+        loaded = strict_load(SCHEMA_DIR / name)
+        if not isinstance(loaded, dict):
+            raise ContractError(f"native artifact schema {name} is not an object")
+        schema = loaded
+        _SCHEMA_CACHE[name] = schema
+    _validate_schema_value(value, schema, schema, "$")
+
+
+def validate_samples_document(document: Any) -> dict[str, Any]:
+    _validate_native_schema("samples-v1.schema.json", document)
+    doc = _keys(
+        document,
+        {"allocation_id", "attempt_id", "case_id", "format", "points", "sampling",
+         "schema_version", "series_id"},
+        "samples",
+    )
+    if doc["format"] != SAMPLES_FORMAT or doc["schema_version"] != 1:
+        raise ContractError("samples format/schema differs from v1")
+    for field, kind in (
+        ("allocation_id", "allocation"), ("attempt_id", "attempt"),
+        ("case_id", "case"), ("series_id", "series"),
+    ):
+        _typed(doc[field], kind, f"samples.{field}")
+    sampling = _keys(
+        doc["sampling"], {"iterations_per_trial", "reduction", "trials"}, "samples.sampling"
+    )
+    if (
+        _integer(sampling["iterations_per_trial"], "samples.sampling.iterations_per_trial", minimum=1) != 8
+        or _integer(sampling["trials"], "samples.sampling.trials", minimum=1) != 64
+        or sampling["reduction"] != identity.V1_CASE_PROFILE["rank_reduction"]
+    ):
+        raise ContractError("samples must use the fixed 8x64 cross-rank-max contract")
+    points = doc["points"]
+    if not isinstance(points, list) or not points:
+        raise ContractError("samples.points must be non-empty")
+    seen = set()
+    for index, point_value in enumerate(points):
+        path = f"samples.points[{index}]"
+        point = _keys(
+            point_value,
+            {"components", "evidence_id", "point_id", "sample_sha256", "tokens_per_rank"},
+            path,
+        )
+        tokens = _integer(point["tokens_per_rank"], f"{path}.tokens_per_rank", minimum=1)
+        if tokens in seen:
+            raise ContractError(f"duplicate sample token point {tokens}")
+        seen.add(tokens)
+        _typed(point["point_id"], "point", f"{path}.point_id")
+        _typed(point["evidence_id"], "evidence", f"{path}.evidence_id")
+        components = _keys(point["components"], {"combine", "dispatch", "roundtrip"}, f"{path}.components")
+        for name, component_value in components.items():
+            component = _keys(
+                component_value, {"availability", "sample_count", "trials"},
+                f"{path}.components.{name}",
+            )
+            availability = component["availability"]
+            count = _integer(component["sample_count"], f"{path}.components.{name}.sample_count")
+            trials = component["trials"]
+            if availability == "unavailable":
+                if count != 0 or trials is not None or name == "roundtrip":
+                    raise ContractError(f"{path}.components.{name} has invalid unavailability")
+                continue
+            if availability != "measured" or not isinstance(trials, list) or len(trials) != 64:
+                raise ContractError(f"{path}.components.{name} must contain 64 measured trials")
+            if any(not isinstance(trial, list) or len(trial) != 8 for trial in trials):
+                raise ContractError(f"{path}.components.{name} trials must each contain 8 samples")
+            flattened = [
+                _number(sample, f"{path}.components.{name}.trials", minimum=0.0)
+                for trial in trials for sample in trial
+            ]
+            if count != 512 or len(flattened) != 512:
+                raise ContractError(f"{path}.components.{name} must contain 512 samples")
+        sample_base = {"components": components, "tokens_per_rank": tokens}
+        if point["sample_sha256"] != _sha256_json(sample_base):
+            raise ContractError(f"{path}.sample_sha256 differs")
+    return doc
+
+
+def _validate_component(
+    component_value: Any,
+    sample_component: dict[str, Any] | None,
+    path: str,
+    *,
+    derived: bool = False,
+) -> None:
+    component = _keys(
+        component_value, {"availability", "origin", "percentiles_us", "sample_count"}, path
+    )
+    availability = component["availability"]
+    if availability == "unavailable":
+        if component != {
+            "availability": "unavailable", "origin": None,
+            "percentiles_us": None, "sample_count": 0,
+        }:
+            raise ContractError(f"{path} has invalid unavailable representation")
+        if sample_component and sample_component["availability"] != "unavailable":
+            raise ContractError(f"{path} disagrees with samples")
+        return
+    expected_availability = "derived" if derived else "measured"
+    expected_origin = "derived-percentile-sum" if derived else "measured"
+    if availability != expected_availability or component["origin"] != expected_origin:
+        raise ContractError(f"{path} has invalid availability/origin")
+    percentiles = _keys(component["percentiles_us"], set(PERCENTILES), f"{path}.percentiles_us")
+    if derived:
+        if component["sample_count"] != 0:
+            raise ContractError(f"{path}.sample_count must be zero for a derived value")
+        return
+    if sample_component is None or sample_component["availability"] != "measured":
+        raise ContractError(f"{path} lacks measured sample evidence")
+    flattened = [sample for trial in sample_component["trials"] for sample in trial]
+    if component["sample_count"] != len(flattened):
+        raise ContractError(f"{path}.sample_count differs from exact samples")
+    for name, percentile in zip(PERCENTILES, (50, 90, 95, 99), strict=True):
+        _close(percentiles[name], _nearest_rank(flattened, percentile), f"{path}.{name}")
+
+
+def _validate_oracle(value: Any, path: str) -> dict[str, Any]:
+    oracle = _keys(
+        value,
+        {"atol", "checks", "combine_weight_semantics", "contract", "dispatch_sha256",
+         "max_absolute_error", "max_elementwise_relative_error", "max_relative_error",
+         "max_weight_error", "order_sha256", "ordering_contract", "passed", "receive_count",
+         "rtol"},
+        path,
+    )
+    if oracle["contract"] != identity.V1_CASE_PROFILE["oracle_contract"]:
+        raise ContractError(f"{path}.contract differs")
+    checks = _keys(
+        oracle["checks"],
+        {"combine_values", "counts", "metadata", "multiplicity", "payload", "source_set",
+         "weights"},
+        f"{path}.checks",
+    )
+    if any(type(value) is not bool for value in checks.values()):
+        raise ContractError(f"{path}.checks must be boolean")
+    if type(oracle["passed"]) is not bool:
+        raise ContractError(f"{path}.passed must be boolean")
+    _integer(oracle["receive_count"], f"{path}.receive_count")
+    _text(oracle["ordering_contract"], f"{path}.ordering_contract")
+    if oracle["combine_weight_semantics"] != "unweighted-rank-sum":
+        raise ContractError(f"{path}.combine_weight_semantics differs from v1")
+    _close(oracle["rtol"], 5e-2, f"{path}.rtol")
+    _close(oracle["atol"], 2e-2, f"{path}.atol")
+    for field in ("dispatch_sha256", "order_sha256"):
+        digest = oracle[field]
+        if digest is not None and (
+            not isinstance(digest, str) or len(digest) != 64
+            or any(character not in "0123456789abcdef" for character in digest)
+        ):
+            raise ContractError(f"{path}.{field} is not a SHA-256 digest")
+    for field in (
+        "max_absolute_error", "max_elementwise_relative_error", "max_relative_error",
+        "max_weight_error",
+    ):
+        if oracle[field] is not None:
+            _number(oracle[field], f"{path}.{field}", minimum=0.0)
+    expected_pass = (
+        all(checks.values())
+        and oracle["max_relative_error"] is not None
+        and oracle["max_relative_error"] < 5e-2
+    )
+    if oracle["passed"] != expected_pass:
+        raise ContractError(f"{path}.passed differs from its evidence")
+    return oracle
+
+
+def validate_raw_document(document: Any, samples_document: Any) -> dict[str, Any]:
+    """Validate identities, exact samples, formulas, privacy, and the native raw shape."""
+    _validate_native_schema("raw-case-v1.schema.json", document)
+    doc = _keys(
+        document,
+        {"case", "format", "generated_at", "identity", "implementation", "measurement",
+         "outcome", "provenance", "record_type", "runtime_fingerprint", "sample_artifact",
+         "schema_version", "topology", "workload"},
+        "raw",
+    )
+    _finite_tree(doc)
+    if doc["format"] != RAW_FORMAT or doc["schema_version"] != 1 or doc["record_type"] != "case-attempt":
+        raise ContractError("raw format/schema/record type differs from v1")
+    _text(doc["generated_at"], "raw.generated_at")
+    identifiers = _keys(
+        doc["identity"],
+        {"allocation_factors", "allocation_id", "attempt_id", "attempt_ordinal", "case_factors",
+         "case_id", "series_factors", "series_id"},
+        "raw.identity",
+    )
+    for field, kind in (
+        ("allocation_id", "allocation"), ("attempt_id", "attempt"),
+        ("case_id", "case"), ("series_id", "series"),
+    ):
+        _typed(identifiers[field], kind, f"raw.identity.{field}")
+    ordinal = _integer(identifiers["attempt_ordinal"], "raw.identity.attempt_ordinal", minimum=1)
+    allocation_factors = _keys(
+        identifiers["allocation_factors"], ALLOCATION_FACTOR_FIELDS,
+        "raw.identity.allocation_factors",
+    )
+    case_factors = _keys(
+        identifiers["case_factors"], {"case", "profile", "sku"},
+        "raw.identity.case_factors",
+    )
+    scheduled_case = _keys(
+        case_factors["case"], TERMINAL_CASE_FIELDS, "raw.identity.case_factors.case"
+    )
+    if case_factors["profile"] != identity.V1_CASE_PROFILE:
+        raise ContractError("raw case profile differs from CollectiveX v1")
+    _text(case_factors["sku"], "raw.identity.case_factors.sku")
+    series_factors = _keys(
+        identifiers["series_factors"],
+        {"backend", "case_id", "image_digest", "implementation_contract_sha256",
+         "public_config_sha256", "routing_control_sha256",
+         "runtime_fingerprint_sha256", "source_sha", "squash_sha256", "workload_id"},
+        "raw.identity.series_factors",
+    )
+    if identity.allocation_id(identifiers["allocation_factors"]) != identifiers["allocation_id"]:
+        raise ContractError("allocation identity differs")
+    if identity.digest("case", identifiers["case_factors"]) != identifiers["case_id"]:
+        raise ContractError("case identity differs")
+    if identity.series_id(identifiers["series_factors"]) != identifiers["series_id"]:
+        raise ContractError("series identity differs")
+    if identity.attempt_id(
+        allocation=identifiers["allocation_id"], case=identifiers["case_id"], ordinal=ordinal
+    ) != identifiers["attempt_id"]:
+        raise ContractError("attempt identity differs")
+
+    samples = validate_samples_document(samples_document)
+    for field in ("allocation_id", "attempt_id", "case_id", "series_id"):
+        if samples[field] != identifiers[field]:
+            raise ContractError(f"samples.{field} differs from raw identity")
+    sample_by_token = {point["tokens_per_rank"]: point for point in samples["points"]}
+
+    case = _keys(
+        doc["case"],
+        {"attempt_ordinal", "backend", "eplb", "ep_size", "mode", "phase",
+         "required_publication", "resource_mode", "runner", "shape", "suite", "workload_name"},
+        "raw.case",
+    )
+    ep_size = _integer(case["ep_size"], "raw.case.ep_size", minimum=1)
+    if case["attempt_ordinal"] != ordinal:
+        raise ContractError("case attempt ordinal differs")
+    for field in ("backend", "mode", "phase", "required_publication", "resource_mode", "runner",
+                  "suite", "workload_name"):
+        _text(case[field], f"raw.case.{field}")
+    shape = _keys(
+        case["shape"],
+        {"activation_profile", "dispatch_dtype", "eplb", "experts", "experts_per_rank",
+         "hidden", "kernel_gen", "num_logical_experts", "quant", "routing", "topk"},
+        "raw.case.shape",
+    )
+    hidden = _integer(shape["hidden"], "raw.case.shape.hidden", minimum=1)
+    topk = _integer(shape["topk"], "raw.case.shape.topk", minimum=1)
+    physical_experts = _integer(
+        shape["experts"], "raw.case.shape.experts", minimum=1
+    )
+    logical_experts = _integer(
+        shape["num_logical_experts"],
+        "raw.case.shape.num_logical_experts",
+        minimum=1,
+    )
+    experts_per_rank = _integer(
+        shape["experts_per_rank"], "raw.case.shape.experts_per_rank", minimum=1
+    )
+    quant = _keys(
+        shape["quant"],
+        {"combine_accum_dtype", "combine_input_dtype", "combine_output_dtype",
+         "combine_quant_mode", "scale_layout"},
+        "raw.case.shape.quant",
+    )
+    eplb = _keys(
+        case["eplb"],
+        {"enabled", "imbalance_after", "imbalance_before", "mapping_hash", "max_replicas",
+         "num_logical_experts", "num_physical_experts", "num_redundant", "planner",
+         "reference_tokens_per_rank", "replicated_experts"},
+        "raw.case.eplb",
+    )
+    if not isinstance(eplb["enabled"], bool):
+        raise ContractError("raw.case.eplb.enabled must be boolean")
+    expected_redundant = (
+        identity.V1_CASE_PROFILE["eplb_redundant_experts"] if eplb["enabled"] else 0
+    )
+    expected_physical = eplb_contract.physical_count(
+        scheduled_case["experts"], expected_redundant, ep_size
+    )
+    if (
+        shape["eplb"] != eplb["enabled"]
+        or logical_experts != scheduled_case["experts"]
+        or physical_experts != expected_physical
+        or experts_per_rank * ep_size != physical_experts
+        or eplb["num_logical_experts"] != logical_experts
+        or eplb["num_physical_experts"] != physical_experts
+        or eplb["num_redundant"] != expected_redundant
+    ):
+        raise ContractError("raw EPLB/shape dimensions differ from the frozen profile")
+    if eplb["enabled"]:
+        expected_plan = _expected_eplb_plan(
+            scheduled_case["routing"],
+            topk,
+            logical_experts,
+            physical_experts,
+            ep_size,
+            identity.V1_CASE_PROFILE["seed"],
+            identity.V1_CASE_PROFILE["eplb_reference_tokens_per_rank"],
+        )
+        expected_eplb = {
+            "enabled": True,
+            "imbalance_after": expected_plan["imbalance_after"],
+            "imbalance_before": expected_plan["imbalance_before"],
+            "mapping_hash": eplb_contract.mapping_hash(expected_plan),
+            "max_replicas": expected_plan["max_replicas"],
+            "num_logical_experts": logical_experts,
+            "num_physical_experts": physical_experts,
+            "num_redundant": expected_redundant,
+            "planner": identity.V1_CASE_PROFILE["eplb_planner"],
+            "reference_tokens_per_rank": identity.V1_CASE_PROFILE[
+                "eplb_reference_tokens_per_rank"
+            ],
+            "replicated_experts": expected_plan["replicated_experts"],
+        }
+    else:
+        expected_eplb = {
+            "enabled": False,
+            "imbalance_after": None,
+            "imbalance_before": None,
+            "mapping_hash": None,
+            "max_replicas": None,
+            "num_logical_experts": logical_experts,
+            "num_physical_experts": physical_experts,
+            "num_redundant": 0,
+            "planner": None,
+            "reference_tokens_per_rank": None,
+            "replicated_experts": 0,
+        }
+    _equivalent(eplb, expected_eplb, "raw.case.eplb", tolerance=1e-9)
+    if case_factors["sku"] != case["runner"]:
+        raise ContractError("raw case runner differs from case identity")
+
+    workload = _keys(
+        doc["workload"],
+        {"activation_generator", "activation_identity", "activation_profile",
+         "cross_rank_consistent", "manifest_checksums", "members", "routing_generator", "source",
+         "trace_hashes", "trace_signature", "workload_id"},
+        "raw.workload",
+    )
+    if workload["source"] not in {"canonical-serialized", "seeded-runtime"}:
+        raise ContractError("raw workload source is invalid")
+    if workload["source"] == "canonical-serialized":
+        _typed(workload["workload_id"], "workload", "raw.workload.workload_id")
+        members = workload["members"]
+        checksums = workload["manifest_checksums"]
+        if (
+            not isinstance(members, list)
+            or not members
+            or members != sorted(set(members))
+            or not all(identity.is_typed_id(member, "workload") for member in members)
+            or not isinstance(checksums, dict)
+            or set(checksums) != set(members)
+        ):
+            raise ContractError("raw canonical workload members/checksums are invalid")
+        for member, values in checksums.items():
+            if (
+                not isinstance(values, dict)
+                or set(values) != {"topk_idx", "topk_weights", "trace"}
+                or any(not re.fullmatch(r"[0-9a-f]{64}", str(value)) for value in values.values())
+            ):
+                raise ContractError(f"raw canonical workload checksums differ for {member}")
+        expected_workload_id = identity.workload_id({
+            "members": [
+                {"checksums": checksums[member], "workload_id": member}
+                for member in members
+            ]
+        })
+        if workload["workload_id"] != expected_workload_id:
+            raise ContractError("raw composite workload identity differs from its members")
+    elif any(workload[field] is not None for field in ("members", "manifest_checksums", "workload_id")):
+        raise ContractError("raw seeded workload cannot claim serialized members")
+    if workload["cross_rank_consistent"] is not True:
+        raise ContractError("raw workload is not consistent across ranks")
+
+    measurement = _keys(
+        doc["measurement"],
+        {"component_order_contract", "conditioning", "contract", "rows",
+         "sampling", "source_allocation"},
+        "raw.measurement",
+    )
+    validate_conditioning_contract(measurement["conditioning"], case["phase"])
+    sampling = _keys(
+        measurement["sampling"],
+        {"contract", "iterations_per_trial", "percentile_method", "reduction",
+         "samples_per_component", "trials", "warmup_iterations", "warmup_semantics"},
+        "raw.measurement.sampling",
+    )
+    expected_sampling = {
+        "contract": identity.V1_CASE_PROFILE["sampling_contract"], "iterations_per_trial": 8,
+        "percentile_method": identity.V1_CASE_PROFILE["percentile_method"],
+        "reduction": identity.V1_CASE_PROFILE["rank_reduction"],
+        "samples_per_component": 512, "trials": 64, "warmup_iterations": 32,
+        "warmup_semantics": "full-roundtrip-before-each-component-trial-point-v1",
+    }
+    if sampling != expected_sampling:
+        raise ContractError("raw sampling contract differs from fixed-512-v1")
+    profile = identity.V1_CASE_PROFILE
+    if (
+        case["mode"] != profile["mode"]
+        or case["resource_mode"] != profile["resource_mode"]
+        or measurement["contract"] != profile["contract"]
+        or measurement["component_order_contract"] != profile["component_order_contract"]
+        or measurement["source_allocation"] != "even"
+        or shape["activation_profile"] != profile["activation_profile"]
+        or shape["dispatch_dtype"] != profile["dtype"]
+        or quant["combine_input_dtype"] != profile["combine_dtype"]
+        or quant["combine_output_dtype"] != profile["combine_dtype"]
+        or quant["combine_quant_mode"] != profile["combine_quant_mode"]
+        or quant["scale_layout"] is not None
+        or workload["activation_generator"] != profile["activation_generator"]
+        or workload["activation_profile"] != profile["activation_profile"]
+        or workload["routing_generator"] != profile["routing_generator"]
+    ):
+        raise ContractError("raw case differs from the frozen v1 profile")
+    expected_activation = hashlib.sha256(
+        (
+            f"counter|seed={profile['seed']}|hidden={hidden}|"
+            f"gen={profile['activation_generator']}"
+        ).encode()
+    ).hexdigest()
+    if workload["activation_identity"] != expected_activation:
+        raise ContractError("raw activation identity differs from the frozen seed/profile")
+    rows = measurement["rows"]
+    if not isinstance(rows, list) or not rows:
+        raise ContractError("raw.measurement.rows must be non-empty")
+    seen_points = set()
+    row_tokens = []
+    recomputed_anomalies = 0
+    for index, row_value in enumerate(rows):
+        path = f"raw.measurement.rows[{index}]"
+        row = _keys(
+            row_value,
+            {"anomalies", "components", "correctness", "evidence_id", "global_tokens",
+             "logical_bytes", "point_id", "receive", "routing",
+             "sample_histograms", "sample_sha256", "token_rate_at_latency_percentile",
+             "tokens_per_rank"},
+            path,
+        )
+        tokens = _integer(row["tokens_per_rank"], f"{path}.tokens_per_rank", minimum=1)
+        row_tokens.append(tokens)
+        if tokens in seen_points or tokens not in sample_by_token:
+            raise ContractError(f"{path} token point is duplicate or missing samples")
+        seen_points.add(tokens)
+        if row["global_tokens"] != tokens * ep_size:
+            raise ContractError(f"{path}.global_tokens formula differs")
+        sample_point = sample_by_token[tokens]
+        expected_point = identity.point_id(series=identifiers["series_id"], tokens_per_rank=tokens)
+        if row["point_id"] != expected_point or sample_point["point_id"] != expected_point:
+            raise ContractError(f"{path}.point_id differs")
+        expected_evidence = identity.evidence_id(
+            point=expected_point, allocation=identifiers["allocation_id"],
+            attempt=identifiers["attempt_id"], sample_sha256=sample_point["sample_sha256"],
+        )
+        if row["evidence_id"] != expected_evidence or sample_point["evidence_id"] != expected_evidence:
+            raise ContractError(f"{path}.evidence_id differs")
+        if row["sample_sha256"] != sample_point["sample_sha256"]:
+            raise ContractError(f"{path}.sample_sha256 differs")
+        components = _keys(
+            row["components"], {"combine", "dispatch", "isolated_sum", "roundtrip"},
+            f"{path}.components",
+        )
+        for name in ("combine", "dispatch", "roundtrip"):
+            _validate_component(
+                components[name], sample_point["components"][name], f"{path}.components.{name}"
+            )
+        _validate_component(
+            components["isolated_sum"], None, f"{path}.components.isolated_sum", derived=True
+        )
+        _, _, _, expected_indices, expected_weights = _expected_canonical_trace(
+            scheduled_case["routing"],
+            hidden,
+            topk,
+            logical_experts,
+            physical_experts,
+            ep_size,
+            tokens,
+            profile["seed"],
+            eplb["enabled"],
+            profile["eplb_reference_tokens_per_rank"],
+        )
+        expected_routing = _expected_routing_summary(
+            expected_indices,
+            expected_weights,
+            physical_experts=physical_experts,
+            ep_size=ep_size,
+            tokens_per_rank=tokens,
+            gpus_per_node=scheduled_case["gpus_per_node"],
+            scale_up_domain=scheduled_case["scale_up_domain"],
+        )
+        _equivalent(
+            row["routing"], expected_routing, f"{path}.routing", tolerance=1e-5
+        )
+        expected_payload_counts = expected_routing["payload_copies_per_rank"]
+        throughput = _keys(
+            row["token_rate_at_latency_percentile"], set(PERCENTILES),
+            f"{path}.token_rate_at_latency_percentile",
+        )
+        for percentile in PERCENTILES:
+            latency = components["roundtrip"]["percentiles_us"][percentile]
+            if latency <= 0:
+                raise ContractError(f"{path} roundtrip latency must be positive")
+            _close(
+                throughput[percentile], row["global_tokens"] / (latency * 1e-6),
+                f"{path}.token_rate_at_latency_percentile.{percentile}", 1e-9,
+            )
+        correctness = _keys(
+            row["correctness"],
+            {"contract", "max_relative_error", "passed", "rank_evidence", "scope"},
+            f"{path}.correctness",
+        )
+        if (
+            correctness["contract"] != identity.V1_CASE_PROFILE["oracle_contract"]
+            or correctness["scope"] != "dispatch-metadata-and-transformed-combine"
+            or type(correctness["passed"]) is not bool
+        ):
+            raise ContractError(f"{path}.correctness contract differs")
+        _number(
+            correctness["max_relative_error"],
+            f"{path}.correctness.max_relative_error",
+            minimum=0.0,
+        )
+        rank_evidence = correctness["rank_evidence"]
+        if not isinstance(rank_evidence, list) or len(rank_evidence) != ep_size:
+            raise ContractError(f"{path}.correctness.rank_evidence must cover every rank")
+        ranks = set()
+        observed_max_error = 0.0
+        evidence_passed = True
+        for evidence_index, evidence_value in enumerate(rank_evidence):
+            evidence_path = f"{path}.correctness.rank_evidence[{evidence_index}]"
+            evidence = _keys(
+                evidence_value,
+                {"input_unchanged", "order_stable", "post_timing", "pre_timing", "rank"},
+                evidence_path,
+            )
+            evidence_rank = _integer(evidence["rank"], f"{evidence_path}.rank")
+            if evidence_rank >= ep_size:
+                raise ContractError(f"{evidence_path}.rank is outside the EP group")
+            ranks.add(evidence_rank)
+            if type(evidence["input_unchanged"]) is not bool or type(evidence["order_stable"]) is not bool:
+                raise ContractError(f"{evidence_path} stability fields must be boolean")
+            pre = _validate_oracle(evidence["pre_timing"], f"{evidence_path}.pre_timing")
+            post = _validate_oracle(evidence["post_timing"], f"{evidence_path}.post_timing")
+            if (
+                pre["receive_count"] != expected_payload_counts[evidence_rank]
+                or post["receive_count"] != expected_payload_counts[evidence_rank]
+            ):
+                raise ContractError(
+                    f"{evidence_path}.receive_count differs from canonical routing"
+                )
+            expected_stability = all(
+                pre[field] == post[field]
+                for field in ("ordering_contract", "order_sha256", "dispatch_sha256")
+            )
+            if evidence["order_stable"] != expected_stability:
+                raise ContractError(f"{evidence_path}.order_stable differs from the evidence")
+            errors = [
+                oracle["max_relative_error"]
+                for oracle in (pre, post)
+                if oracle["max_relative_error"] is not None
+            ]
+            observed_max_error = max([observed_max_error, *errors])
+            evidence_passed = evidence_passed and all(
+                (evidence["input_unchanged"], evidence["order_stable"], pre["passed"], post["passed"])
+            )
+        if ranks != set(range(ep_size)) or correctness["passed"] != evidence_passed:
+            raise ContractError(f"{path}.correctness rank coverage or outcome differs")
+        _close(
+            correctness["max_relative_error"], observed_max_error,
+            f"{path}.correctness.max_relative_error",
+        )
+        if components["dispatch"]["availability"] == "measured":
+            for percentile in PERCENTILES:
+                expected = (
+                    components["dispatch"]["percentiles_us"][percentile]
+                    + components["combine"]["percentiles_us"][percentile]
+                )
+                _close(
+                    components["isolated_sum"]["percentiles_us"][percentile], expected,
+                    f"{path}.components.isolated_sum.{percentile}",
+                )
+        routed_copies = expected_routing["routed_copies"]
+        expected_bytes = routed_copies * hidden * 2
+        expected_logical = {
+            "combine": expected_bytes,
+            "dispatch": expected_bytes,
+            "roundtrip": expected_bytes * 2,
+        }
+        _equivalent(row["logical_bytes"], expected_logical, f"{path}.logical_bytes")
+
+        max_receive = max(expected_payload_counts)
+        expected_receive = {
+            "max": max_receive,
+            "mean": sum(expected_payload_counts) / ep_size,
+            "min": min(expected_payload_counts),
+            "total": sum(expected_payload_counts),
+        }
+        _equivalent(row["receive"], expected_receive, f"{path}.receive")
+        expected_histograms = {
+            name: (
+                _expected_histogram([
+                    sample
+                    for trial in sample_point["components"][name]["trials"]
+                    for sample in trial
+                ])
+                if sample_point["components"][name]["availability"] == "measured"
+                else None
+            )
+            for name in ("dispatch", "combine", "roundtrip")
+        }
+        _equivalent(
+            row["sample_histograms"], expected_histograms, f"{path}.sample_histograms"
+        )
+        expected_anomalies = _expected_anomalies(tokens, components)
+        _equivalent(row["anomalies"], expected_anomalies, f"{path}.anomalies")
+        recomputed_anomalies += len(expected_anomalies)
+    if seen_points != set(sample_by_token):
+        raise ContractError("raw rows and sample points differ")
+    if row_tokens != sorted(row_tokens):
+        raise ContractError("raw rows must follow the scheduled token ladder")
+    expected_trace_hashes = sorted(row["routing"]["hash"] for row in rows)
+    if workload["trace_hashes"] != expected_trace_hashes:
+        raise ContractError("raw workload trace hashes differ from measured rows")
+    expected_trace_signature = hashlib.sha256(
+        "|".join(expected_trace_hashes).encode()
+    ).hexdigest()
+    if workload["trace_signature"] != expected_trace_signature:
+        raise ContractError("raw workload trace signature differs from measured rows")
+
+    implementation = _keys(
+        doc["implementation"], {"kernel_generation", "name", "provenance", "resource_profile"},
+        "raw.implementation",
+    )
+    if (
+        implementation["name"] != case["backend"]
+        or implementation["kernel_generation"] != shape["kernel_gen"]
+    ):
+        raise ContractError("raw implementation identity differs from the case")
+    provenance_fields = _obj(implementation["provenance"], "raw.implementation.provenance")
+    unknown = set(provenance_fields) - PROVENANCE_KEYS
+    if unknown:
+        raise ContractError(f"raw implementation provenance has unknown fields {sorted(unknown)}")
+    if (
+        implementation["name"] == "deepep-v2"
+        and provenance_fields.get("deterministic") is not False
+    ):
+        raise ContractError("DeepEP V2 deterministic mode differs from the v1 kernel contract")
+    if implementation["name"] == "deepep-v2" and (
+        _integer(
+            provenance_fields.get("tuning_num_experts"),
+            "raw.implementation.provenance.tuning_num_experts",
+            minimum=1,
+        ) != logical_experts
+        or _integer(
+            provenance_fields.get("num_experts"),
+            "raw.implementation.provenance.num_experts",
+            minimum=1,
+        ) != physical_experts
+    ):
+        raise ContractError("DeepEP V2 expert-count provenance differs from the case")
+    if implementation["name"] == "deepep-hybrid":
+        realized_config = provenance_fields.get("realized_config")
+        jit_kernel_keys = provenance_fields.get("jit_kernel_keys")
+        jit_shared_objects = provenance_fields.get("jit_shared_objects")
+        if (
+            not _hybrid_realized_config_is_valid(realized_config)
+            or not _hybrid_jit_evidence_is_valid(jit_shared_objects, jit_kernel_keys)
+            or realized_config["hidden_dim"] != shape["hidden"]
+            or realized_config["num_of_experts_per_rank"] * ep_size != physical_experts
+            or realized_config["num_of_ranks_per_node"] != ep_size
+            or realized_config["num_of_nodes"] != 1
+            or realized_config["token_data_type"] != "UINT16"
+            or any(
+                len(artifact["rank_artifacts"]) != ep_size
+                for artifact in jit_shared_objects
+            )
+        ):
+            raise ContractError("DeepEP Hybrid realized config/JIT evidence differs from the case")
+    if implementation["name"] == "nccl-ep" and implementation["kernel_generation"] != (
+        collective_kernel_generation(provenance_fields.get("collective_library"))
+    ):
+        raise ContractError("NCCL/RCCL kernel generation differs from collective lineage")
+    resource_profile = _obj(
+        implementation["resource_profile"], "raw.implementation.resource_profile"
+    )
+    expected_resource_profile = project_resource_profile(provenance_fields)
+    if resource_profile != expected_resource_profile:
+        raise ContractError("raw resource profile differs from implementation provenance")
+    topology = _keys(
+        doc["topology"],
+        {"device_count", "device_product", "gpus_per_node", "nodes", "placement",
+         "realized_placement", "scale_up_domain", "topology_class", "transport", "world_size"},
+        "raw.topology",
+    )
+    for field in ("device_count", "gpus_per_node", "nodes", "scale_up_domain", "world_size"):
+        _integer(topology[field], f"raw.topology.{field}", minimum=1)
+    realized = _keys(
+        topology["realized_placement"],
+        {"gpus_per_node", "nodes", "ranks_per_node", "unique_local_ranks", "valid"},
+        "raw.topology.realized_placement",
+    )
+    if realized != {
+        "gpus_per_node": topology["gpus_per_node"],
+        "nodes": topology["nodes"],
+        "ranks_per_node": topology["gpus_per_node"],
+        "unique_local_ranks": True,
+        "valid": True,
+    }:
+        raise ContractError("raw realized placement differs from requested topology")
+    if (
+        topology["world_size"] != ep_size
+        or topology["nodes"] * topology["gpus_per_node"] != ep_size
+        or topology["device_count"] != topology["gpus_per_node"]
+        or topology["placement"] != profile["placement"]
+        or topology["scale_up_domain"] < ep_size
+    ):
+        raise ContractError("raw topology dimensions differ from the case")
+    if implementation["name"] == "deepep-v2":
+        if (
+            provenance_fields.get("allow_hybrid_mode"),
+            provenance_fields.get("gin_enabled"),
+            provenance_fields.get("communication_backend"),
+        ) != (False, False, "nccl-device-lsa"):
+            raise ContractError("DeepEP V2 communication policy differs from the v1 contract")
+        lsa_topology = tuple(
+            _integer(
+                provenance_fields.get(field),
+                f"raw.implementation.provenance.{field}",
+                minimum=1,
+            )
+            for field in (
+                "physical_rdma_ranks", "physical_nvlink_ranks",
+                "logical_scaleout_ranks", "logical_scaleup_ranks",
+            )
+        )
+        if lsa_topology != (1, ep_size, 1, ep_size):
+            raise ContractError("DeepEP V2 no-GIN provenance is outside one realized LSA domain")
+    runtime = _keys(
+        doc["runtime_fingerprint"],
+        {"accelerator_runtime", "collective_library", "device", "driver_version", "framework",
+         "machine", "python_version", "vendor"},
+        "raw.runtime_fingerprint",
+    )
+    for field in ("machine", "python_version", "vendor"):
+        _text(runtime[field], f"raw.runtime_fingerprint.{field}")
+    runtime_device = _keys(
+        runtime["device"], {"arch", "compute_units", "memory_bytes", "product", "warp_size"},
+        "raw.runtime_fingerprint.device",
+    )
+    if topology["device_product"] != runtime_device["product"]:
+        raise ContractError("raw topology and runtime device products differ")
+    platform = capability.PLATFORMS.get(case["runner"])
+    if platform is not None:
+        identity_issues = capability.runtime_identity_issues(
+            case["runner"], vendor=runtime["vendor"], arch=runtime_device["arch"],
+            machine=runtime["machine"], device_name=runtime_device["product"],
+            device_count=topology["device_count"], world_size=topology["world_size"],
+        )
+        expected_topology_class = (
+            f"{case['runner']}-nvl72-mnnvl"
+            if case["runner"] in {"gb200", "gb300"}
+            else f"{case['runner']}-xgmi"
+            if platform["vendor"] == "amd"
+            else f"{platform['product']}-nvlink-island"
+        )
+        if identity_issues or (
+            topology["transport"] != platform["transport"]
+            or topology["gpus_per_node"] != platform["gpus_per_node"]
+            or topology["scale_up_domain"] != platform["scale_up_domain"]
+            or topology["topology_class"] != expected_topology_class
+        ):
+            raise ContractError(
+                "raw runtime/topology differs from the scheduled SKU: "
+                + "; ".join(identity_issues)
+            )
+    raw_provenance = _keys(
+        doc["provenance"], {"command", "distributed_launcher", "git_run", "image", "redaction"},
+        "raw.provenance",
+    )
+    image = _keys(
+        raw_provenance["image"],
+        {"arch", "digest", "digest_verified", "reference", "squash_sha256"},
+        "raw.provenance.image",
+    )
+    if (
+        image["digest_verified"] is not True
+        or not isinstance(image["digest"], str)
+        or not re.fullmatch(r"sha256:[0-9a-f]{64}", image["digest"])
+    ):
+        raise ContractError("raw image digest was not registry-verified")
+    if raw_provenance["redaction"] != "sanitized-v1":
+        raise ContractError("raw provenance redaction contract differs")
+    git_run = raw_provenance["git_run"]
+    if git_run is not None:
+        git_run = _keys(git_run, GIT_RUN_FIELDS, "raw.provenance.git_run")
+    expected_provenance_complete = provenance_complete(
+        provenance_fields,
+        case["backend"],
+        git_run,
+        image_digest=image["digest"],
+        image_verified=image["digest_verified"],
+        squash_sha256=image["squash_sha256"],
+    )
+
+    actual_scheduled_case = {
+        "backend": case["backend"],
+        "canonical": workload["source"] == "canonical-serialized",
+        "eplb": eplb["enabled"],
+        "ep": ep_size,
+        "experts": shape["num_logical_experts"],
+        "gpus_per_node": topology["gpus_per_node"],
+        "hidden": hidden,
+        "ladder": " ".join(map(str, row_tokens)),
+        "nodes": topology["nodes"],
+        "phase": case["phase"],
+        "required_publication": case["required_publication"],
+        "routing": shape["routing"],
+        "samples_per_point": sampling["samples_per_component"],
+        "scale_up_domain": topology["scale_up_domain"],
+        "suite": case["suite"],
+        "timing": (
+            f"{sampling['iterations_per_trial']}:{sampling['trials']}:"
+            f"{sampling['warmup_iterations']}"
+        ),
+        "topk": shape["topk"],
+        "warmup_semantics": sampling["warmup_semantics"],
+        "workload": case["workload_name"],
+    }
+    if scheduled_case != actual_scheduled_case:
+        mismatches = sorted(
+            field for field in scheduled_case
+            if scheduled_case[field] != actual_scheduled_case[field]
+        )
+        raise ContractError(f"raw data differs from scheduled case fields {mismatches}")
+
+    if workload["source"] == "canonical-serialized":
+        _validate_canonical_workload(workload, scheduled_case, rows, eplb)
+
+    expected_series = {
+        "backend": case["backend"],
+        "case_id": identifiers["case_id"],
+        "image_digest": image["digest"],
+        "implementation_contract_sha256": _sha256_json({
+            "kernel_generation": implementation["kernel_generation"],
+            "name": implementation["name"],
+            "provenance": series_provenance(provenance_fields),
+            "resource_profile": resource_profile,
+        }),
+        "public_config_sha256": public_series_config_sha256(public_series_config(
+            kernel_generation=implementation["kernel_generation"],
+            provenance=provenance_fields,
+            resource_profile=resource_profile,
+            resource_mode=case["resource_mode"],
+            device_product=topology["device_product"],
+        )),
+        "routing_control_sha256": routing_implementation_control_sha256(implementation),
+        "runtime_fingerprint_sha256": _sha256_json(runtime),
+        "source_sha": git_run["source_sha"] if git_run is not None else None,
+        "squash_sha256": image["squash_sha256"],
+        "workload_id": workload["workload_id"] or workload["trace_signature"],
+    }
+    if series_factors != expected_series:
+        raise ContractError("raw series factors differ from measured implementation/runtime")
+    expected_allocation = {
+        "artifact": git_run["artifact"] if git_run is not None else None,
+        "execution_id": allocation_factors["execution_id"],
+        "job": git_run["job"] if git_run is not None else None,
+        "repo": git_run["repo"] if git_run is not None else None,
+        "run_attempt": git_run["run_attempt"] if git_run is not None else None,
+        "run_id": git_run["run_id"] if git_run is not None else None,
+        "runner": case["runner"],
+        "source_sha": git_run["source_sha"] if git_run is not None else None,
+    }
+    if allocation_factors != expected_allocation:
+        raise ContractError("raw allocation factors differ from provenance")
+    artifact = _keys(doc["sample_artifact"], {"bytes", "format", "path", "sha256"}, "raw.sample_artifact")
+    if artifact["format"] != SAMPLES_FORMAT or Path(artifact["path"]).name != artifact["path"]:
+        raise ContractError("raw.sample_artifact format/path is invalid")
+    if not isinstance(artifact["sha256"], str) or len(artifact["sha256"]) != 64:
+        raise ContractError("raw.sample_artifact.sha256 is invalid")
+    _integer(artifact["bytes"], "raw.sample_artifact.bytes", minimum=1)
+    outcome = _keys(doc["outcome"], {"publication_status", "reasons", "status", "validity"}, "raw.outcome")
+    if outcome["status"] not in {"success", "invalid"} or outcome["publication_status"] not in {"diagnostic", "invalid"}:
+        raise ContractError("raw outcome status is invalid")
+    if not isinstance(outcome["reasons"], list) or not all(isinstance(x, str) for x in outcome["reasons"]):
+        raise ContractError("raw outcome reasons must be strings")
+    validity = _keys(
+        outcome["validity"],
+        {"anomaly_free", "execution_status", "measurement_conformance", "provenance_complete",
+         "resource_conformance", "sampling_conformance", "semantic_correctness",
+         "workload_identity", "workload_source"},
+        "raw.outcome.validity",
+    )
+    correctness_passed = all(row["correctness"]["passed"] for row in rows)
+    workload_consistent = workload["cross_rank_consistent"] is True
+    expected_status = "success" if correctness_passed and workload_consistent else "invalid"
+    expected_publication = "diagnostic" if expected_status == "success" else "invalid"
+    if (
+        outcome["status"] != expected_status
+        or outcome["publication_status"] != expected_publication
+        or bool(outcome["reasons"]) == (expected_status == "success")
+        or validity["execution_status"] != "complete"
+        or validity["semantic_correctness"] != ("pass" if correctness_passed else "fail")
+        or validity["workload_identity"] != (
+            "consistent-across-ranks" if workload_consistent else "inconsistent"
+        )
+        or validity["workload_source"] != workload["source"]
+        or validity["measurement_conformance"] != "conformant"
+        or validity["sampling_conformance"] != "conformant"
+        or validity["resource_conformance"] != resource_profile["conformance_class"]
+        or validity["anomaly_free"] != (recomputed_anomalies == 0)
+        or validity["provenance_complete"] is not expected_provenance_complete
+    ):
+        raise ContractError("raw outcome differs from its measurement evidence")
+    artifact_safety.assert_publication_safe([doc])
+    return doc
+
+
+def make_terminal_document(
+    *,
+    allocation_factors: dict[str, Any],
+    attempt_ordinal: int,
+    case: dict[str, Any],
+    case_factors: dict[str, Any],
+    control_sha256: str | None,
+    failure_mode: str,
+    generated_at: str,
+    git_run: dict[str, Any],
+    reason: str,
+    return_code: int,
+    source: str,
+    status: str,
+    expected_case_id: str | None = None,
+) -> dict[str, Any]:
+    """Build and self-validate one attributable non-success attempt."""
+    case_id = identity.digest("case", case_factors)
+    if expected_case_id is not None and expected_case_id != case_id:
+        raise ContractError(
+            f"scheduled case ID differs from terminal factors: {expected_case_id} != {case_id}"
+        )
+    allocation_id = identity.allocation_id(allocation_factors)
+    attempt_id = identity.attempt_id(
+        allocation=allocation_id, case=case_id, ordinal=attempt_ordinal
+    )
+    document = {
+        "format": TERMINAL_FORMAT,
+        "schema_version": 1,
+        "record_type": "terminal-outcome",
+        "generated_at": generated_at,
+        "identity": {
+            "allocation_factors": allocation_factors,
+            "allocation_id": allocation_id,
+            "attempt_id": attempt_id,
+            "attempt_ordinal": attempt_ordinal,
+            "case_factors": case_factors,
+            "case_id": case_id,
+        },
+        "case": case,
+        "provenance": {
+            "git_run": git_run,
+            "control_sha256": control_sha256,
+            "redaction": "sanitized-v1",
+            "source": source,
+        },
+        "outcome": {
+            "status": status,
+            "failure_mode": failure_mode,
+            "reason": reason,
+            "return_code": return_code,
+        },
+    }
+    return validate_terminal_document(document)
+
+
+def validate_terminal_document(document: Any) -> dict[str, Any]:
+    _validate_native_schema("terminal-outcome-v1.schema.json", document)
+    doc = _keys(
+        document,
+        {"case", "format", "generated_at", "identity", "outcome", "provenance", "record_type",
+         "schema_version"},
+        "terminal",
+    )
+    if doc["format"] != TERMINAL_FORMAT or doc["schema_version"] != 1 or doc["record_type"] != "terminal-outcome":
+        raise ContractError("terminal format/schema/record type differs from v1")
+    ids = _keys(doc["identity"], {
+        "allocation_factors", "allocation_id", "attempt_id", "attempt_ordinal",
+        "case_factors", "case_id",
+    }, "terminal.identity")
+    for field, kind in (("allocation_id", "allocation"), ("attempt_id", "attempt"), ("case_id", "case")):
+        _typed(ids[field], kind, f"terminal.identity.{field}")
+    ordinal = _integer(ids["attempt_ordinal"], "terminal.identity.attempt_ordinal", minimum=1)
+    case = _keys(doc["case"], TERMINAL_CASE_FIELDS, "terminal.case")
+    factors = _keys(ids["case_factors"], {"case", "profile", "sku"}, "terminal.identity.case_factors")
+    if factors["case"] != case or factors["profile"] != identity.V1_CASE_PROFILE:
+        raise ContractError("terminal case factors differ from the scheduled case/profile")
+    _text(factors["sku"], "terminal.identity.case_factors.sku")
+    allocation = _keys(
+        ids["allocation_factors"], ALLOCATION_FACTOR_FIELDS,
+        "terminal.identity.allocation_factors",
+    )
+    expected_case = identity.digest("case", factors)
+    expected_allocation = identity.allocation_id(allocation)
+    expected_attempt = identity.attempt_id(
+        allocation=expected_allocation, case=expected_case, ordinal=ordinal
+    )
+    if (ids["case_id"], ids["allocation_id"], ids["attempt_id"]) != (
+        expected_case, expected_allocation, expected_attempt
+    ):
+        raise ContractError("terminal typed identities do not match their factors")
+    provenance = _keys(
+        doc["provenance"], {"git_run", "control_sha256", "redaction", "source"},
+        "terminal.provenance",
+    )
+    git_run = _keys(provenance["git_run"], GIT_RUN_FIELDS, "terminal.provenance.git_run")
+    control = provenance["control_sha256"]
+    if control is not None and (
+        not isinstance(control, str) or len(control) != 64
+        or any(char not in "0123456789abcdef" for char in control)
+    ):
+        raise ContractError("terminal control_sha256 is invalid")
+    if provenance["redaction"] != "sanitized-v1":
+        raise ContractError("terminal redaction contract differs")
+    source = _text(provenance["source"], "terminal.provenance.source")
+    outcome = _keys(
+        doc["outcome"], {"failure_mode", "reason", "return_code", "status"}, "terminal.outcome"
+    )
+    if outcome["status"] not in {"failed", "invalid", "unsupported"}:
+        raise ContractError("terminal outcome status is invalid")
+    failure_mode = _text(outcome["failure_mode"], "terminal.outcome.failure_mode")
+    reason = _text(outcome["reason"], "terminal.outcome.reason")
+    _integer(outcome["return_code"], "terminal.outcome.return_code")
+    if source == "runtime-emitter":
+        expected_runner = factors["sku"]
+        expected_reason = RUNTIME_FAILURE_REASONS.get(failure_mode)
+        valid_outcome = outcome["status"] == "failed" and reason == expected_reason
+    elif source == "post-emit-command":
+        expected_runner = factors["sku"]
+        expected_reason = POST_EMIT_FAILURE_REASONS.get(failure_mode)
+        valid_outcome = outcome["status"] == "failed" and reason == expected_reason
+    elif source == "matrix-capability-resolver":
+        expected_runner = "capability-resolver"
+        valid_outcome = (
+            outcome["status"] == "unsupported"
+            and failure_mode == "capability"
+            and reason in CAPABILITY_FAILURE_REASONS
+        )
+    else:
+        raise ContractError("terminal provenance source is not registered")
+    if not valid_outcome:
+        raise ContractError("terminal source and outcome are not registered")
+    expected_allocation = {
+        "artifact": git_run["artifact"],
+        "execution_id": allocation["execution_id"],
+        "job": git_run["job"],
+        "repo": git_run["repo"],
+        "run_attempt": git_run["run_attempt"],
+        "run_id": git_run["run_id"],
+        "runner": expected_runner,
+        "source_sha": git_run["source_sha"],
+    }
+    if allocation != expected_allocation:
+        raise ContractError("terminal allocation factors differ from provenance or source")
+    artifact_safety.assert_publication_safe([doc])
+    return doc
+
+
+def load_raw_attempt(path: str | os.PathLike[str]) -> dict[str, Any]:
+    document = strict_load(path)
+    artifact = _obj(document, "raw").get("sample_artifact")
+    artifact = _obj(artifact, "raw.sample_artifact")
+    sample_path = Path(path).with_name(_text(artifact.get("path"), "raw.sample_artifact.path"))
+    payload = sample_path.read_bytes()
+    if len(payload) != artifact.get("bytes") or hashlib.sha256(payload).hexdigest() != artifact.get("sha256"):
+        raise ContractError("sample artifact bytes or digest differ")
+    samples = strict_load(sample_path)
+    return validate_raw_document(document, samples)
+
+
+def load_attempt(path: str | os.PathLike[str]) -> dict[str, Any]:
+    """Fully validate and return one native raw or terminal attempt."""
+    document = strict_load(path)
+    if isinstance(document, dict) and document.get("format") == RAW_FORMAT:
+        return load_raw_attempt(path)
+    if isinstance(document, dict) and document.get("format") == TERMINAL_FORMAT:
+        return validate_terminal_document(document)
+    raise ContractError("unknown native attempt format")
+
+
+def quarantine_invalid_attempt(path: str | os.PathLike[str]) -> bool:
+    """Move an invalid attempt and its basename-safe sample outside JSON upload globs."""
+    destination = Path(path)
+    if not destination.is_file():
+        return False
+    try:
+        load_attempt(destination)
+        return False
+    except (ContractError, OSError, ValueError):
+        try:
+            document = json.loads(destination.read_bytes())
+        except (OSError, json.JSONDecodeError):
+            document = {}
+        artifact = document.get("sample_artifact") if isinstance(document, dict) else None
+        sample_name = artifact.get("path") if isinstance(artifact, dict) else None
+        if isinstance(sample_name, str) and Path(sample_name).name == sample_name:
+            sample_path = destination.with_name(sample_name)
+            if sample_path.is_file():
+                os.replace(sample_path, sample_path.with_name(sample_path.name + ".quarantine"))
+        os.replace(destination, destination.with_name(destination.name + ".quarantine"))
+        return True
+
+
+def normalize_attempt(document: dict[str, Any]) -> dict[str, Any]:
+    """Return the publisher-facing projection after native validation."""
+    if document.get("format") == RAW_FORMAT:
+        ids = document["identity"]
+        return {
+            "allocation_id": ids["allocation_id"],
+            "attempt_id": ids["attempt_id"],
+            "case": document["case"],
+            "case_id": ids["case_id"],
+            "generated_at": document["generated_at"],
+            "outcome": document["outcome"],
+            "points": document["measurement"]["rows"],
+            "runtime_fingerprint": document["runtime_fingerprint"],
+            "series_id": ids["series_id"],
+        }
+    if document.get("format") == TERMINAL_FORMAT:
+        ids = document["identity"]
+        return {
+            "allocation_id": ids["allocation_id"],
+            "attempt_id": ids["attempt_id"],
+            "case": document["case"],
+            "case_id": ids["case_id"],
+            "generated_at": document["generated_at"],
+            "outcome": document["outcome"],
+            "points": [],
+            "runtime_fingerprint": None,
+            "series_id": None,
+        }
+    raise ContractError("unknown attempt format")
+
+
+def _env_integer(name: str, default: int) -> int:
+    try:
+        return int(os.environ.get(name, str(default)))
+    except ValueError:
+        return default
+
+
+def _env_enabled(name: str) -> bool:
+    return os.environ.get(name, "").lower() in {"1", "true", "yes"}
+
+
+def _terminal_case_from_environment(backend: str, phase: str) -> dict[str, Any]:
+    ep = _env_integer("CX_EP", _env_integer("CX_NGPUS", 1))
+    gpus_per_node = _env_integer("CX_GPUS_PER_NODE", ep)
+    ladder = os.environ.get("CX_TOKENS_LADDER", "") or (
+        "1 2 4 8 16 32 64 128"
+        if phase == "decode"
+        else "128 256 512 1024 2048 4096"
+    )
+    return {
+        "suite": os.environ.get("CX_SUITE") or "manual",
+        "workload": os.environ.get("CX_WORKLOAD_NAME") or "manual",
+        "required_publication": os.environ.get("CX_REQUIRED_PUBLICATION") or "diagnostic",
+        "backend": backend,
+        "routing": os.environ.get("CX_ROUTING", "uniform"),
+        "phase": phase,
+        "ep": ep,
+        "eplb": _env_enabled("CX_EPLB"),
+        "hidden": _env_integer("CX_HIDDEN", 7168),
+        "topk": _env_integer("CX_TOPK", 8),
+        "experts": _env_integer("CX_EXPERTS", 256),
+        "samples_per_point": _env_integer("CX_SAMPLES_PER_POINT", 512),
+        "warmup_semantics": os.environ.get(
+            "CX_WARMUP_SEMANTICS",
+            "full-roundtrip-before-each-component-trial-point-v1",
+        ),
+        "ladder": ladder,
+        "timing": (
+            f'{_env_integer("CX_ITERS", 8)}:{_env_integer("CX_TRIALS", 64)}:'
+            f'{_env_integer("CX_WARMUP", 32)}'
+        ),
+        "canonical": _env_enabled("CX_CANONICAL"),
+        "nodes": _env_integer("CX_NODES", _env_integer("SLURM_NNODES", 1)),
+        "gpus_per_node": gpus_per_node,
+        "scale_up_domain": _env_integer("CX_SCALE_UP_DOMAIN", gpus_per_node),
+    }
+
+
+def _git_run_from_environment() -> dict[str, Any]:
+    def value(name: str) -> str | None:
+        return os.environ.get(name) or None
+
+    return {
+        "run_id": value("GITHUB_RUN_ID"),
+        "run_attempt": value("GITHUB_RUN_ATTEMPT"),
+        "ref": value("GITHUB_REF_NAME") or value("GITHUB_REF"),
+        "source_sha": value("COLLECTIVEX_SOURCE_SHA") or value("GITHUB_SHA"),
+        "repo": value("GITHUB_REPOSITORY"),
+        "job": value("GITHUB_JOB"),
+        "artifact": value("COLLECTIVEX_ARTIFACT_NAME"),
+    }
+
+
+def _allocation_factors_from_environment(
+    runner: str, git_run: dict[str, Any]
+) -> dict[str, Any]:
+    return {
+        "artifact": git_run["artifact"],
+        "execution_id": os.environ.get("COLLECTIVEX_EXECUTION_ID") or None,
+        "job": git_run["job"],
+        "repo": git_run["repo"],
+        "run_attempt": git_run["run_attempt"],
+        "run_id": git_run["run_id"],
+        "runner": runner,
+        "source_sha": git_run["source_sha"],
+    }
+
+
+def make_terminal_from_environment(
+    *, backend: str, phase: str, return_code: int, failure_mode: str | None = None
+) -> dict[str, Any]:
+    """Build a terminal document from the same exported case coordinates as run_ep."""
+    mode = failure_mode or RETURN_CODE_FAILURE_MODES.get(return_code, "execution")
+    reason = RUNTIME_FAILURE_REASONS.get(mode)
+    if reason is None:
+        raise ContractError("runtime failure mode is not registered")
+    runner = os.environ.get("CX_RUNNER", "")
+    case = _terminal_case_from_environment(backend, phase)
+    case_factors = {"case": case, "profile": identity.V1_CASE_PROFILE, "sku": runner}
+    git_run = _git_run_from_environment()
+    control = os.environ.get("COLLECTIVEX_CONTROL_SHA256") or None
+    return make_terminal_document(
+        allocation_factors=_allocation_factors_from_environment(runner, git_run),
+        attempt_ordinal=_env_integer("CX_ATTEMPT_ID", 1),
+        case=case,
+        case_factors=case_factors,
+        control_sha256=control,
+        failure_mode=mode,
+        generated_at=dt.datetime.now(dt.timezone.utc).isoformat(),
+        git_run=git_run,
+        reason=reason,
+        return_code=return_code,
+        source="runtime-emitter",
+        status="failed",
+        expected_case_id=os.environ.get("CX_CASE_ID") or None,
+    )
+
+
+def _write_document(path: str | os.PathLike[str], document: dict[str, Any]) -> None:
+    destination = Path(path)
+    destination.parent.mkdir(parents=True, exist_ok=True)
+    temporary = destination.with_name(destination.name + ".tmp")
+    temporary.write_text(json.dumps(document, indent=2, sort_keys=True) + "\n")
+    os.replace(temporary, destination)
+
+
+def demote_raw_attempt(path: str | os.PathLike[str], return_code: int) -> dict[str, Any]:
+    """Replace a rank-zero raw result when the distributed command later fails."""
+    destination = Path(path)
+    raw = strict_load(destination)
+    if not isinstance(raw, dict) or raw.get("format") != RAW_FORMAT:
+        raise ContractError("only a native raw attempt can be demoted")
+    ids = _obj(raw.get("identity"), "raw.identity")
+    required = {
+        "allocation_factors", "allocation_id", "attempt_id", "attempt_ordinal",
+        "case_factors", "case_id",
+    }
+    if not required.issubset(ids):
+        raise ContractError("raw identity lacks terminal factors")
+    mode = RETURN_CODE_FAILURE_MODES.get(return_code, "execution")
+    git_run = _obj(_obj(raw.get("provenance"), "raw.provenance").get("git_run"), "raw.provenance.git_run")
+    terminal = make_terminal_document(
+        allocation_factors=ids["allocation_factors"],
+        attempt_ordinal=ids["attempt_ordinal"],
+        case=ids["case_factors"]["case"],
+        case_factors=ids["case_factors"],
+        control_sha256=os.environ.get("COLLECTIVEX_CONTROL_SHA256") or None,
+        failure_mode=mode,
+        generated_at=dt.datetime.now(dt.timezone.utc).isoformat(),
+        git_run=git_run,
+        reason=POST_EMIT_FAILURE_REASONS[mode],
+        return_code=return_code,
+        source="post-emit-command",
+        status="failed",
+        expected_case_id=ids["case_id"],
+    )
+    artifact = raw.get("sample_artifact") or {}
+    sample_name = artifact.get("path")
+    if isinstance(sample_name, str) and Path(sample_name).name == sample_name:
+        destination.with_name(sample_name).unlink(missing_ok=True)
+    _write_document(destination, terminal)
+    return terminal
+
+
+def validate_attempt_paths(paths: list[str]) -> int:
+    """Fully validate a result directory's attempts and paired sample artifacts."""
+    if not paths or len(paths) != len(set(paths)):
+        raise ContractError("validate-many requires unique result paths")
+    sample_paths: set[Path] = set()
+    referenced_samples: set[Path] = set()
+    attempt_count = 0
+    for raw_path in paths:
+        path = Path(raw_path).resolve()
+        document = strict_load(path)
+        if isinstance(document, dict) and document.get("format") == RAW_FORMAT:
+            document = load_raw_attempt(path)
+            referenced_samples.add(path.with_name(document["sample_artifact"]["path"]))
+            attempt_count += 1
+        elif isinstance(document, dict) and document.get("format") == TERMINAL_FORMAT:
+            validate_terminal_document(document)
+            attempt_count += 1
+        elif isinstance(document, dict) and document.get("format") == SAMPLES_FORMAT:
+            validate_samples_document(document)
+            sample_paths.add(path)
+        else:
+            raise ContractError(f"unknown result artifact {path.name}")
+    if sample_paths != referenced_samples:
+        raise ContractError("sample artifacts are missing, orphaned, or outside the validated set")
+    if attempt_count == 0:
+        raise ContractError("result set contains no native attempts")
+    return attempt_count
+
+
+def validate_delivery(
+    paths: list[str], source_path: str, *, disposition: str | None = None
+) -> int:
+    """Reconcile a shard or matrix disposition with its complete native attempt set."""
+    source_file = Path(source_path).resolve()
+    source = strict_load(source_file)
+    if isinstance(source, dict) and source.get("format") == "collectivex.matrix.v1":
+        if disposition is None:
+            raise ContractError("matrix delivery validation requires a disposition")
+        wrappers = [
+            item for item in source.get("requested_cases", [])
+            if isinstance(item, dict) and item.get("disposition") == disposition
+        ]
+        expected = {
+            item["case"]["case_id"]: (item["sku"], item["case"])
+            for item in wrappers
+        }
+        expected_count = len(wrappers)
+        require_one_allocation = disposition == "unsupported"
+    elif isinstance(source, dict) and isinstance(source.get("cases"), list):
+        expected = {
+            case["case_id"]: (source.get("sku"), case)
+            for case in source["cases"]
+        }
+        expected_count = len(source["cases"])
+        require_one_allocation = True
+    else:
+        raise ContractError("delivery source is not a matrix or shard control")
+    if not expected or len(expected) != expected_count:
+        raise ContractError("delivery source has empty or duplicate case coverage")
+
+    validate_attempt_paths(paths)
+    attempts = []
+    for raw_path in paths:
+        document = strict_load(raw_path)
+        if isinstance(document, dict) and document.get("format") in {RAW_FORMAT, TERMINAL_FORMAT}:
+            attempts.append(load_attempt(raw_path))
+    by_case: dict[str, list[dict[str, Any]]] = {}
+    attempt_ids = set()
+    allocation_ids = set()
+    source_sha256 = hashlib.sha256(source_file.read_bytes()).hexdigest()
+    for document in attempts:
+        ids = document["identity"]
+        case_id = ids["case_id"]
+        if case_id not in expected or ids["attempt_id"] in attempt_ids:
+            raise ContractError("delivery contains an extra case or duplicate attempt")
+        attempt_ids.add(ids["attempt_id"])
+        allocation_ids.add(ids["allocation_id"])
+        sku, scheduled = expected[case_id]
+        scheduled_case = {key: value for key, value in scheduled.items() if key != "case_id"}
+        if ids["case_factors"] != {
+            "case": scheduled_case, "profile": identity.V1_CASE_PROFILE, "sku": sku
+        }:
+            raise ContractError("delivery attempt differs from its scheduled case")
+        factors = ids["allocation_factors"]
+        expected_environment = {
+            "artifact": os.environ.get("COLLECTIVEX_ARTIFACT_NAME"),
+            "execution_id": os.environ.get("COLLECTIVEX_EXECUTION_ID"),
+            "job": os.environ.get("GITHUB_JOB"),
+            "repo": os.environ.get("GITHUB_REPOSITORY"),
+            "run_attempt": os.environ.get("GITHUB_RUN_ATTEMPT"),
+            "run_id": os.environ.get("GITHUB_RUN_ID"),
+            "source_sha": os.environ.get("COLLECTIVEX_SOURCE_SHA") or os.environ.get("GITHUB_SHA"),
+        }
+        expected_runner = (
+            "capability-resolver"
+            if document["format"] == TERMINAL_FORMAT
+            and document["provenance"]["source"] == "matrix-capability-resolver"
+            else sku
+        )
+        if any(
+            value is not None and factors[field] != value
+            for field, value in expected_environment.items()
+        ) or factors["runner"] != expected_runner:
+            raise ContractError("delivery allocation factors differ from the workflow")
+        if document["format"] == TERMINAL_FORMAT:
+            control = document["provenance"]["control_sha256"]
+            if control != source_sha256:
+                raise ContractError("terminal outcome does not reference its exact control document")
+        by_case.setdefault(case_id, []).append(document)
+    if set(by_case) != set(expected):
+        raise ContractError("delivery case coverage is incomplete")
+    for case_id, documents in by_case.items():
+        ordinals = sorted(document["identity"]["attempt_ordinal"] for document in documents)
+        if ordinals != list(range(1, len(ordinals) + 1)):
+            raise ContractError(f"delivery attempt ordinals are not contiguous for {case_id}")
+    if require_one_allocation and len(allocation_ids) != 1:
+        raise ContractError("one shard must use exactly one allocation identity")
+    return len(attempts)
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="CollectiveX native attempt contracts")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    probe = subparsers.add_parser("probe")
+    probe.add_argument("path")
+    probe.add_argument("--status", choices=("success", "invalid"))
+    emit = subparsers.add_parser("emit-terminal")
+    emit.add_argument("--out", required=True)
+    emit.add_argument("--backend", required=True)
+    emit.add_argument("--phase", required=True, choices=("decode", "prefill"))
+    emit.add_argument("--return-code", required=True, type=int)
+    emit.add_argument("--failure-mode")
+    demote = subparsers.add_parser("demote")
+    demote.add_argument("path")
+    demote.add_argument("--return-code", required=True, type=int)
+    validate_many = subparsers.add_parser("validate-many")
+    validate_many.add_argument("paths", nargs="+")
+    quarantine = subparsers.add_parser("quarantine-invalid")
+    quarantine.add_argument("path")
+    delivery = subparsers.add_parser("validate-delivery")
+    delivery.add_argument("--source", required=True)
+    delivery.add_argument("--disposition")
+    delivery.add_argument("paths", nargs="+")
+    args = parser.parse_args()
+    try:
+        if args.command == "probe":
+            document = load_attempt(args.path)
+            if args.status is None:
+                return 0
+            if document.get("format") != RAW_FORMAT:
+                return 1
+            outcome = document["outcome"]
+            validity = outcome.get("validity")
+            return int(
+                not (
+                    isinstance(validity, dict)
+                    and validity.get("execution_status") == "complete"
+                    and outcome.get("status") == args.status
+                )
+            )
+        if args.command == "emit-terminal":
+            document = make_terminal_from_environment(
+                backend=args.backend,
+                phase=args.phase,
+                return_code=args.return_code,
+                failure_mode=args.failure_mode,
+            )
+            _write_document(args.out, document)
+            print(f"preserved terminal outcome ({document['outcome']['failure_mode']})")
+            return 0
+        if args.command == "validate-many":
+            print(f"validated {validate_attempt_paths(args.paths)} native attempts")
+            return 0
+        if args.command == "quarantine-invalid":
+            quarantine_invalid_attempt(args.path)
+            return 0
+        if args.command == "validate-delivery":
+            print(
+                f"validated {validate_delivery(args.paths, args.source, disposition=args.disposition)} "
+                "delivery attempts"
+            )
+            return 0
+        demote_raw_attempt(args.path, args.return_code)
+        return 0
+    except (ContractError, identity.IdentityError, OSError, ValueError) as exc:
+        print(f"terminal contract error: {exc}", file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/experimental/CollectiveX/docs/methodology.md b/experimental/CollectiveX/docs/methodology.md
new file mode 100644
index 0000000000..f68ef89a5c
--- /dev/null
+++ b/experimental/CollectiveX/docs/methodology.md
@@ -0,0 +1,256 @@
+# CollectiveX EP v1 Contract
+
+<div align="center">
+
+**English** | [中文](./methodology_zh.md)
+
+</div>
+
+This document defines new CollectiveX results. Historical run notes are evidence, not contract.
+
+## Product Boundary
+
+CollectiveX is a communication microbenchmark for:
+
+- comparing EP libraries on one chip/topology;
+- comparing EP latency and logical payload bandwidth across systems under the same workload; and
+- exposing unsupported, failed, invalid, and unstable evidence without contaminating decisions.
+
+It does not predict serving throughput without a separate correlation study.
+
+## Matrix
+
+The promoted workload is `deepseek-v3-v1`: hidden 7168, top-k 8, 256 routed experts, BF16 dispatch
+and combine, normal mode, packed placement, backend-tuned resources, and
+`layout-and-dispatch-v1`.
+
+- `ep-core-v1`: uniform routing; decode T=1..128 powers of two; prefill T=256/512.
+- `ep-routing-v1`: Zipf with EPLB off/on; decode T=128; prefill T=512.
+- Current runnable surface: 38 cells, 228 cases, 532 token points before repeats.
+
+Unsupported combinations are terminal outcomes, not silently skipped coverage. DeepEP V2 is the
+`ElasticBuffer` introduced by PR #605, pinned with upstream PR #630's minimal pure-scale-up fix.
+Current V2 cases request NCCL Device API LSA because their world size does not exceed the declared
+scale-up domain, then fail closed unless NCCL's realized LSA team covers the full EP world. GIN is
+reserved for a separately identified true scale-out cohort. Source-declared NVIDIA capabilities
+remain unvalidated until GPU outcomes pass the native oracle and publisher gates. Removed axes
+include `[cl]`, `[rv]`, LL,
+quantization, alternate activation/routing profiles,
+uneven allocation, placement permutations, model envelopes, and scaling.
+FlashInfer is excluded from v1 after repeatable intermittent execution failures; those failures are
+not converted into planned-unsupported coverage.
+MoRI AsyncLL and intranode paths publish distinct kernel generations rather than masquerading as the
+same controlled implementation in cross-chip cohorts.
+
+## Workload Identity
+
+One canonical workload is generated over the global token batch and sliced by source rank. Expert
+indices and gate weights are serialized. Activations use a versioned integer counter formula whose
+BF16 values are exact across runtimes; its full identity is bound into the manifest. The manifest
+also binds shape/EP coordinates and oracle version. SHA-256 covers canonical bytes and parameters;
+library RNG regeneration is not proof of identity.
+
+Routing traffic distinguishes:
+
+- token-expert assignments, which determine expert compute load; and
+- rank-deduplicated token payload copies, which determine EP activation traffic.
+
+Adapters may not generate routing or reinterpret one quantity as the other.
+
+## Measurement
+
+`layout-and-dispatch-v1` times dispatch layout plus communication. Expert-output staging is outside
+isolated combine timing and inside measured paired roundtrip. Each component declares availability,
+origin, start/end states, stage scope, and sample count. A paired-only API reports null isolated
+components. Combine is activation-only for every adapter: dispatch gate weights are verified but are
+not returned over the timed combine path. `isolated_sum` is derived and never used for throughput or
+recommendations.
+
+Every measured component uses `fixed-512-v1`:
+
+- 64 trials x 8 timed iterations = 512 observations;
+- 32 synchronized full dispatch-stage-combine warmups before each available measured component at
+  every trial/point;
+- roundtrip first, then isolated dispatch and combine, with a fixed per-phase conditioning ladder; and
+- per-iteration maximum latency across ranks before nearest-rank p50/p90/p95/p99.
+
+Measured roundtrip p99 is the headline latency. Retries remain separate attempts; a later success
+does not erase earlier failures. Decode and prefill identify the serving regime represented by one
+MoE-layer collective; they do not change the timed primitive at an otherwise identical shape.
+
+The NCCL/RCCL reference is an end-to-end Python adapter, not a bare fabric primitive. Its dispatch
+boundary includes layout, count exchange, a device-to-host split synchronization, fresh receive
+allocation, and four payload/metadata all-to-all calls; activation-only combine adds one all-to-all plus
+scatter/reduction. Its p99 therefore measures the complete reference-adapter boundary and can be
+host/scheduler-sensitive. It is useful for portable system controls but must not be labeled fabric,
+link, bus, or single-collective latency.
+
+The versioned conditioning and EPLB planner contracts (reference trace, redundant count, and
+placement/remap version) are part of scheduled and evidence identity.
+
+Logical payload bandwidth is:
+
+`logical_payload_bytes / measured_latency_seconds`
+
+Payload bytes use rank-deduplicated activations plus required scale bytes at the named boundary.
+They exclude expert metadata, padding, and backend buffer capacity. Algorithm bandwidth, bus
+bandwidth, wire utilization, and physical-link utilization are not published without a defined
+primitive model or transport counters. Logical bandwidth must never be labeled physical bandwidth.
+Published payload and token rates are named `rate_at_latency_percentile`: bytes or tokens divided by
+the matching latency percentile. They are lower-tail service rates at p99 latency, not p99
+percentiles of an inverted rate distribution.
+
+## Correctness
+
+An implementation-independent oracle uses an expert-specific deterministic transform so wrong
+expert routing cannot pass an identity roundtrip. For every rank and point it verifies:
+
+1. destination rank/expert, source token, multiplicity, gate weight, and receive counts;
+2. dispatched payload and metadata before timing;
+3. combined output before timing;
+4. unchanged semantic inputs through all timed samples; and
+5. dispatched payload/metadata and combined output again after timing.
+
+Every v1 adapter uses activation-only, unweighted rank-sum combine. The oracle builds each rank's
+gate-weighted expert aggregate before combine, independently derives `sum(gate * expert(token))`,
+and checks every element with recorded `rtol=0.05` and `atol=0.02`. Any failed rank or point makes
+the case ineligible.
+Pre/post dispatch evidence is hashed in canonical source-token order. Native receive slots may be
+assigned nondeterministically, so physical receive order is not treated as a correctness property.
+
+## Native Result
+
+One raw case document uses `format: "collectivex.ep.v1"`, rejects unknown fields, and contains:
+
+- `case`: stable case ID, suite, required tier, and coordinate;
+- `workload`: canonical identity and logical MoE shape;
+- `measurement`: sampling, component states, timing, and byte accounting;
+- `implementation`: instantiated class/API, pinned source, loaded libraries, and resources;
+- `topology`: requested and realized SKU, devices, placement, scale-up domain, and transport;
+- `provenance`: source SHA, image/squash hashes, allocation, run, and attempt;
+- `rows`: point latency, byte accounting, token rate, correctness, load, fanout, and anomaly evidence; and
+- `outcome`: `success`, `failed`, `invalid`, `diagnostic`, or `unsupported`, with reasons.
+
+Raw result documents and exact samples pass through transient GitHub delivery artifacts before the
+publisher archives them in the private bundle; they never enter the public tree. Private environment
+details remain in local mode-0600 logs and ignored operator notes; they are never archived or
+published. Every expected case has one terminal selected outcome while every attempt remains retained.
+
+## Identity And Comparisons
+
+Canonical JSON produces three full SHA-256 IDs:
+
+- `series_id`: all locked factors except token coordinate and repeat allocation;
+- `point_id`: `series_id` plus token coordinate; and
+- `evidence_id`: `point_id` plus allocation/run/attempt/sample checksum.
+
+Locked factors include workload bytes, measurement and sampling contract, resources, realized
+topology, implementation/build, loaded libraries, image/squash, runtime, and source SHA.
+Deferred code generation is captured before measurement and recaptured afterward. DeepEP V2 uses a
+fixed NVCC random seed and binds final cache keys plus generated-source and executable-SASS hashes;
+raw CUBIN bytes remain private diagnostics. Hybrid binds its realized auto-tuned config and complete
+kernel-key set while retaining rank-local shared-object hashes as private diagnostics. Locally built
+extension hashes are diagnostic; their pinned source trees, build recipe, runtime, and dependencies
+remain series-bound.
+The series identity includes the case ID, which binds the complete scheduled token ladder and the
+frozen percentile, rank-reduction, conditioning, warmup, and correctness semantics.
+
+A controlled comparison declares one contrast:
+
+- `library`: backend implementation and its tuned resource profile may differ; the realized system,
+  workload, EP, resource policy, source, and measurement remain matched;
+- `chip`: a controlled platform contrast. The full realized system/topology and tuned resource
+  profile may differ while workload, EP, placement class, resource policy, backend lineage, source,
+  and measurement remain matched. It is not a silicon-only comparison;
+- `system`: all hardware/backend differences stay visible while workload, EP, and measurement match;
+- `routing`: routing distribution/EPLB differs while the static implementation build/generator,
+  system, model shape, resource profile, and measurement remain matched. Uniform and Zipf without
+  EPLB reuse the same generated implementation; EPLB's physical-expert/JIT configuration remains an
+  explicit treatment difference.
+
+Any undeclared mismatch rejects the overlay. Chip/system results describe measured systems, not
+silicon alone.
+
+## Evidence Policy
+
+Capability declarations say what may be attempted; artifacts determine evidence status. Promotion
+requires exact expected coverage with no missing, extra, duplicate, malformed, or heterogeneous
+case. Public coverage preserves each matrix disposition; promotion requires every runnable case to
+succeed and every planned-unsupported case to remain unsupported in every selected run. Only the
+pinned canonical full-v1 matrix, with a decision-grade library, chip, system, and routing cohort,
+may advance `dev-latest`; partial matrices remain diagnostic. The full-matrix digest intentionally
+pins the exact workflow shard grouping as well as the requested cases, so changing `--max-cases`
+or the SKU round-robin scheduling order produces diagnostic-only runs even when case coverage is
+unchanged. Superseded retries,
+planned-unsupported outcomes, and unstable comparison cohorts may render diagnostically but cannot
+rank or recommend; every successful required series in a promoted dataset remains decision-grade.
+Any failed, invalid, or diagnostic retry of a runnable case blocks promotion even if a later retry
+succeeds. Routing cohorts are comparable-experimental sensitivities and never produce configuration
+recommendations; official library/platform/system cohorts own actionable recommendations.
+
+A point becomes decision-grade only after three independent workflow runs and allocation IDs pass
+correctness, identity, provenance, tail gates, p50/p99 repeat-stability thresholds, and stable ordering. The
+publisher, not the frontend, computes eligibility, controlled cohorts, sensitivity pairs, and
+recommendations.
+
+## Isolated Artifact Store
+
+Development uses one self-hosted persistent filesystem. There is no Vercel storage, GCP, Neon,
+managed database, or managed object store.
+
+```text
+$COLLECTIVEX_STORE_ROOT/
+  private/incoming/          # write-once downloaded GHA attempts
+  private/bundles/<sha256>/  # immutable source archives, native results/samples, matrix, checksums
+  private/quarantine/        # rejected attempts plus machine-readable reasons
+  public/datasets/<sha256>/  # immutable sanitized frontend datasets
+  public/channels/           # small atomic pointers: latest-attempt, dev-latest
+  locks/
+```
+
+Private and public trees use separate permissions. JSON manifests and checksums are authoritative;
+a rebuildable catalog is only an index. GitHub artifacts are transient delivery input.
+
+Container tags are checked against pinned registry digests. Enroot imports use a fixed
+`SOURCE_DATE_EPOCH` and versioned cache generation; every mounted squash is freshly hashed into
+series identity. Image-provided DeepEP is also checked against exact per-architecture wheel and
+installed-file fingerprints, so a stale cache cannot inherit the pinned source identity.
+Source-built DeepEP V2 uses a separate mode-0700 cluster-local cache mounted only as `/cx-cache`.
+Its content key binds a versioned build recipe, verified image digest, CPU/GPU architecture,
+upstream source trees, and pinned build dependencies. The cache is never an artifact or publisher
+input; per-execution source/results stages remain isolated and disposable, and marker plus runtime
+probes fail closed before reuse. The runner UID is inside the trusted cluster boundary: this cache
+guards against stale or accidental mutation, not hostile same-UID jobs. Only an unpublished partial
+build may be reset automatically; a published cache that fails integrity or runtime checks is left
+intact and rejected so a concurrent allocation cannot lose files it is using.
+
+Publication is fail-closed:
+
+1. acquire an exclusive filesystem lock and stage on the destination filesystem;
+2. archive source bytes before parsing;
+3. require the exact matrix-declared artifact set and reject every unconsumed archive member;
+4. validate strict schemas, privacy, checksums, identities, timing, and exact matrix outcomes;
+5. write checksums and `COMPLETE`, fsync, then atomically rename the private bundle;
+6. build and validate the sanitized content-addressed dataset, fsync, then atomically rename it;
+7. atomically replace `dev-latest.json` only when every promotion gate passes.
+
+Rejected attempts may update `latest-attempt` but never `dev-latest`. Channel responses use
+`no-cache`; immutable datasets use content hashes and long-lived caching. A same-host read-only HTTP
+route in the InferenceX frontend exposes only the two channel documents and digest-addressed
+datasets under `public/`; it rejects incomplete objects, directory listing, and client-supplied
+filesystem paths.
+
+`publisher.py ingest` accepts the exact matrix plus one `--artifact` directory or ZIP per GitHub
+artifact. `promote` accepts explicit immutable bundle IDs. Default `verify` requires
+`latest-attempt`; it also verifies `dev-latest` when present, while an explicit
+`--channel dev-latest` requires it. The frontend process receives the same absolute,
+non-symlinked `COLLECTIVEX_STORE_ROOT` and performs the only HTTP serving.
+
+The frontend fetches the channel pointer, validates it at runtime, resolves the immutable dataset,
+verifies its digest/format, and renders terminal coverage. It never invents missing values, selects
+retries, or recomputes decision eligibility.
+
+## Legacy Data
+
+Numeric schemas 3-5 are outside the v1 publisher and frontend reader. They remain historical
+diagnostic evidence and cannot seed `dev-latest` or drive v1 decisions.
diff --git a/experimental/CollectiveX/docs/methodology_zh.md b/experimental/CollectiveX/docs/methodology_zh.md
new file mode 100644
index 0000000000..c9124dc4e4
--- /dev/null
+++ b/experimental/CollectiveX/docs/methodology_zh.md
@@ -0,0 +1,247 @@
+# CollectiveX EP v1 契约
+
+<div align="center">
+
+[English](./methodology.md) | **中文**
+
+</div>
+
+本文档定义新的 CollectiveX 结果。历史运行笔记是 evidence，不是 contract。
+
+## 产品边界
+
+CollectiveX 是通信 microbenchmark，用于：
+
+- 在同一 chip/topology 上比较 EP libraries；
+- 在相同 workload 下比较不同系统的 EP latency 和 logical payload bandwidth；
+- 展示 unsupported、failed、invalid 和 unstable evidence，同时避免污染决策。
+
+若没有单独的 correlation study，它不能预测 serving throughput。
+
+## 矩阵
+
+提升后的 workload 为 `deepseek-v3-v1`：hidden 7168、top-k 8、256 routed experts、BF16
+dispatch 和 combine、normal mode、packed placement、backend-tuned resources，以及
+`layout-and-dispatch-v1`。
+
+- `ep-core-v1`：uniform routing；decode T=1..128 的 2 次幂；prefill T=256/512。
+- `ep-routing-v1`：Zipf，EPLB off/on；decode T=128；prefill T=512。
+- 当前可运行范围：38 cells、228 cases、重复前 532 token points。
+
+Unsupported combinations 是 terminal outcomes，不会被静默跳过。DeepEP V2 指 PR #605
+引入的 `ElasticBuffer`，并固定使用 upstream PR #630 的最小纯 scale-up 修复。当前 V2 cases
+的 world size 均未超过声明的 scale-up domain，因此请求 NCCL Device API LSA；若 NCCL
+实际建立的 LSA team 未覆盖整个 EP world，则直接失败。GIN 只用于单独标识的真正 scale-out
+cohort。其 source 声明的 NVIDIA capabilities 在 GPU outcomes 通过 native oracle 和 publisher
+gates 前仍为 unvalidated。已移除的轴包括 `[cl]`、`[rv]`、LL、
+quantization、alternate activation/routing profiles、uneven allocation、placement
+permutations、model envelopes 和 scaling。
+FlashInfer 因可重复出现的间歇性执行失败而排除在 v1 外；这些失败不会转为 planned-unsupported
+coverage。
+MoRI AsyncLL 和 intranode paths 发布不同的 kernel generations，不会在 cross-chip cohorts 中
+伪装成相同的 controlled implementation。
+
+## Workload 身份
+
+一个 canonical workload 在 global token batch 上生成，再按 source rank 切分。Expert indices
+和 gate weights 会序列化。Activations 使用带版本的整数计数器公式，其 BF16 值在不同 runtime
+中精确一致；完整身份绑定到 manifest。Manifest 还绑定 shape/EP coordinates 和 oracle version。
+SHA-256 覆盖 canonical bytes 和 parameters；重新生成 library RNG 不能证明身份一致。
+
+Routing traffic 区分：
+
+- token-expert assignments，决定 expert compute load；
+- rank-deduplicated token payload copies，决定 EP activation traffic。
+
+Adapters 不得生成 routing，也不得将两种量相互解释。
+
+## 测量
+
+`layout-and-dispatch-v1` 计时 dispatch layout 加 communication。Expert-output staging 不计入
+isolated combine timing，但计入被测 paired roundtrip。每个 component 声明 availability、origin、
+start/end states、stage scope 和 sample count。仅有 paired API 时，isolated components 报 null。
+所有 adapter 的 combine 均采用 activation-only 边界：dispatch gate weights 会接受校验，但不会
+通过被测 combine 路径返回。`isolated_sum` 为派生值，不用于 throughput 或 recommendations。
+
+每个被测 component 均使用 `fixed-512-v1`：
+
+- 64 trials x 8 timed iterations = 512 observations；
+- 每个 trial/point 的每个可用被测 component 前，执行 32 次同步完整
+  dispatch-stage-combine warmups；
+- 先测 roundtrip，再测 isolated dispatch 和 combine，并使用固定的 per-phase conditioning ladder；
+- 每次 iteration 先取跨 rank 最大 latency，再以 nearest-rank 计算 p50/p90/p95/p99。
+
+被测 roundtrip p99 是 headline latency。Retries 保持为独立 attempts；后续成功不会抹除早期失败。
+Decode 和 prefill 表示一个 MoE-layer collective 所代表的 serving regime；在其他 shape 相同时，
+它们不会改变 timed primitive。
+
+NCCL/RCCL reference 是 end-to-end Python adapter，而不是 bare fabric primitive。其 dispatch
+boundary 包含 layout、count exchange、device-to-host split synchronization、fresh receive
+allocation，以及四次 payload/metadata all-to-all；activation-only combine 还包含一次 all-to-all 和
+scatter/reduction。因此其 p99 测量完整 reference-adapter boundary，可能对 host/scheduler 敏感。
+它可作为 portable system control，但不得标记为 fabric、link、bus 或 single-collective latency。
+
+带版本的 conditioning 和 EPLB planner contracts（reference trace、redundant count 和
+placement/remap version）属于 scheduled 和 evidence identity。
+
+Logical payload bandwidth 为：
+
+`logical_payload_bytes / measured_latency_seconds`
+
+Payload bytes 使用命名边界上的 rank-deduplicated activations 加必需 scale bytes，不包含 expert
+metadata、padding 和 backend buffer capacity。若没有定义 primitive model 或 transport counters，
+不发布 algorithm bandwidth、bus bandwidth、wire utilization 或 physical-link utilization。
+Logical bandwidth 绝不能标为 physical bandwidth。已发布 payload 和 token rates 命名为
+`rate_at_latency_percentile`：bytes 或 tokens 除以对应 latency percentile。它们是 p99 latency
+下的 lower-tail service rates，不是 inverted rate distribution 的 p99 percentiles。
+
+## 正确性
+
+与实现无关的 oracle 使用 expert-specific deterministic transform，使错误 expert routing 无法
+通过 identity roundtrip。它对每个 rank 和 point 验证：
+
+1. destination rank/expert、source token、multiplicity、gate weight 和 receive counts；
+2. timing 前的 dispatched payload 和 metadata；
+3. timing 前的 combined output；
+4. 所有 timed samples 期间 semantic inputs 不变；
+5. timing 后再次验证 dispatched payload/metadata 和 combined output。
+
+v1 的所有 adapter 均使用 activation-only、unweighted rank-sum combine。Oracle 在 combine 前
+构造每个 rank 的 gate-weighted expert aggregate，独立计算 `sum(gate * expert(token))`，
+并使用已记录的 `rtol=0.05` 和 `atol=0.02` 检查每个 element。任一 rank 或
+point 失败都会使 case 不合格。Pre/post dispatch evidence 按
+canonical source-token order 计算 hash。Native receive slots 可能非确定性分配，因此 physical
+receive order 不作为 correctness property。
+
+## Native 结果
+
+单个 raw case document 使用 `format: "collectivex.ep.v1"`，拒绝未知 fields，并包含：
+
+- `case`：稳定 case ID、suite、required tier 和 coordinate；
+- `workload`：canonical identity 和 logical MoE shape；
+- `measurement`：sampling、component states、timing 和 byte accounting；
+- `implementation`：实例化 class/API、固定 source、loaded libraries 和 resources；
+- `topology`：requested 和 realized SKU、devices、placement、scale-up domain 和 transport；
+- `provenance`：source SHA、image/squash hashes、allocation、run 和 attempt；
+- `rows`：point latency、byte accounting、token rate、correctness、load、fanout 和 anomaly evidence；
+- `outcome`：`success`、`failed`、`invalid`、`diagnostic` 或 `unsupported`，以及 reasons。
+
+Raw result documents 和 exact samples 会先经过临时 GitHub delivery artifacts，再由 publisher
+归档到 private bundle；它们不会进入 public tree。Private environment details 只保留在本地
+mode-0600 logs 和忽略的 operator notes 中；不会归档或发布。每个 expected case 有一个 terminal
+selected outcome，同时保留每次 attempt。
+
+## 身份与比较
+
+Canonical JSON 生成三个完整 SHA-256 IDs：
+
+- `series_id`：除 token coordinate 和 repeat allocation 外的所有 locked factors；
+- `point_id`：`series_id` 加 token coordinate；
+- `evidence_id`：`point_id` 加 allocation/run/attempt/sample checksum。
+
+Locked factors 包括 workload bytes、measurement 和 sampling contract、resources、realized
+topology、implementation/build、loaded libraries、image/squash、runtime 和 source SHA。
+Deferred code generation 会在 measurement 前捕获，并在之后再次捕获。DeepEP V2 使用固定的
+NVCC random seed，并绑定最终 cache keys、generated-source hashes 与 executable-SASS hashes；
+raw CUBIN bytes 仅保留为 private diagnostics。Hybrid 绑定实际自动调优配置与完整 kernel-key
+set，同时将各 rank 的 shared-object hashes 仅保留为 private diagnostics。本地构建的 extension
+hashes 属于 diagnostic；其固定 source trees、build recipe、runtime 与 dependencies 仍绑定到
+series。
+Series identity 包含 case ID；case ID 绑定完整 scheduled token ladder，以及固定的 percentile、
+rank-reduction、conditioning、warmup 和 correctness semantics。
+
+Controlled comparison 只声明一个 contrast：
+
+- `library`：backend implementation 及其 tuned resource profile 可以不同；realized system、
+  workload、EP、resource policy、source 和 measurement 必须匹配；
+- `chip`：受控 platform contrast。完整 realized system/topology 和 tuned resource profile 可以不同，
+  但 workload、EP、placement class、resource policy、backend lineage、source 和 measurement 必须
+  匹配。它不是 silicon-only comparison；
+- `system`：保留所有 hardware/backend 差异，同时匹配 workload、EP 和 measurement；
+- `routing`：routing distribution/EPLB 可以不同，但 static implementation build/generator、system、
+  model shape、resource profile 和 measurement 必须匹配。未启用 EPLB 的 Uniform 和 Zipf 复用
+  同一 generated implementation；EPLB 的 physical-expert/JIT configuration 是显式 treatment
+  difference。
+
+任何未声明的 mismatch 都会拒绝 overlay。Chip/system results 描述 measured systems，而非仅描述
+silicon。
+
+## Evidence 策略
+
+Capability declarations 说明可以尝试什么；artifacts 决定 evidence status。Promotion 要求完整的
+expected coverage，不能有 missing、extra、duplicate、malformed 或 heterogeneous case。Public
+coverage 保留每个 matrix disposition；promotion 要求每个 runnable case 在所有 selected runs 中
+成功，且每个 planned-unsupported case 始终为 unsupported。只有固定 canonical full-v1 matrix，
+且具有 decision-grade library、chip、system 和 routing cohort，才能推进 `dev-latest`；partial
+matrices 仍为 diagnostic。Full-matrix digest 有意绑定精确 workflow shard grouping 和 requested
+cases，因此即使 case coverage 不变，修改 `--max-cases` 或 SKU round-robin scheduling order 也只
+会产生 diagnostic-only runs。Superseded retries、planned-unsupported outcomes 和 unstable
+comparison cohorts 可以用于诊断展示，但不能排名或推荐；promoted dataset 中每个成功的 required
+series 都必须保持 decision-grade。Runnable case 的任何 failed、invalid 或 diagnostic retry 都会
+阻止 promotion，即使后续 retry 成功。Routing cohorts 是 comparable-experimental sensitivities，
+不会产生 configuration recommendations；official library/platform/system cohorts 才能产生可执行
+recommendations。
+
+一个 point 只有在三个独立 workflow runs 和 allocation IDs 均通过 correctness、identity、
+provenance、tail gates、p50/p99 repeat-stability thresholds 和 stable ordering 后才成为
+decision-grade。Eligibility、controlled cohorts、sensitivity pairs 和 recommendations 由
+publisher 而非 frontend 计算。
+
+## 隔离产物存储
+
+开发阶段使用一个 self-hosted persistent filesystem，不使用 Vercel storage、GCP、Neon、
+managed database 或 managed object store。
+
+```text
+$COLLECTIVEX_STORE_ROOT/
+  private/incoming/          # write-once downloaded GHA attempts
+  private/bundles/<sha256>/  # immutable source archives, native results/samples, matrix, checksums
+  private/quarantine/        # rejected attempts plus machine-readable reasons
+  public/datasets/<sha256>/  # immutable sanitized frontend datasets
+  public/channels/           # small atomic pointers: latest-attempt, dev-latest
+  locks/
+```
+
+Private 和 public trees 使用不同 permissions。JSON manifests 和 checksums 是权威记录；可重建
+catalog 仅为 index。GitHub artifacts 是临时 delivery input。
+
+Container tags 会与固定 registry digests 核对。Enroot imports 使用固定
+`SOURCE_DATE_EPOCH` 和 versioned cache generation；每个 mounted squash 都重新计算 hash 并纳入
+series identity。Image-provided DeepEP 也按精确 per-architecture wheel 和 installed-file
+fingerprints 检查，因此 stale cache 不能继承固定 source identity。
+Source-built DeepEP V2 使用独立的 mode-0700 cluster-local cache，并且只以 `/cx-cache` 挂载。
+其 content key 绑定版本化 build recipe、verified image digest、CPU/GPU architecture、
+upstream source trees 和固定 build dependencies。该 cache 既不是 artifact，也不是 publisher
+input；每次执行的 source/results stage 仍然隔离且可丢弃，并在复用前以 marker 和 runtime probe
+fail closed。Runner UID 属于受信任的 cluster boundary：该 cache 用于防止 stale 或意外修改，
+不防御恶意的同 UID job。只有从未发布的 partial build 才能自动重置；已发布 cache 一旦未通过
+integrity 或 runtime 检查，将保持原样并被拒绝，避免并发 allocation 正在使用的文件被删除。
+
+Publication 采用 fail-closed：
+
+1. 获取 exclusive filesystem lock，并在 destination filesystem 上 stage；
+2. 解析前归档 source bytes；
+3. 要求精确 matrix-declared artifact set，并拒绝每个未消费 archive member；
+4. 验证 strict schemas、privacy、checksums、identities、timing 和精确 matrix outcomes；
+5. 写入 checksums 和 `COMPLETE`，fsync，然后原子 rename private bundle；
+6. 构建并验证 sanitized content-addressed dataset，fsync，然后原子 rename；
+7. 仅在全部 promotion gates 通过后原子替换 `dev-latest.json`。
+
+Rejected attempts 可以更新 `latest-attempt`，但不能更新 `dev-latest`。Channel responses 使用
+`no-cache`；immutable datasets 使用 content hashes 和 long-lived caching。InferenceX 前端中的
+same-host read-only HTTP route 只暴露 `public/` 下两个 channel documents 和 digest-addressed
+datasets；它拒绝 incomplete objects、directory listing 和 client-supplied filesystem paths。
+
+`publisher.py ingest` 接受精确 matrix，并为每个 GitHub artifact 接受一个 `--artifact` directory
+或 ZIP。`promote` 接受显式 immutable bundle IDs。默认 `verify` 要求 `latest-attempt`；若存在
+`dev-latest` 也会验证，而显式 `--channel dev-latest` 则要求其存在。Frontend process 接收相同的
+absolute、non-symlinked `COLLECTIVEX_STORE_ROOT`，并执行唯一的 HTTP serving。
+
+Frontend 获取 channel pointer，在 runtime 验证它，解析 immutable dataset，验证其
+digest/format，并渲染 terminal coverage。它不会虚构 missing values、选择 retries，或重新计算
+decision eligibility。
+
+## 历史数据
+
+Numeric schemas 3-5 不在 v1 publisher 和 frontend reader 范围内。它们仍是 historical
+diagnostic evidence，不能作为 `dev-latest` 初始数据或驱动 v1 decisions。
diff --git a/experimental/CollectiveX/identity.py b/experimental/CollectiveX/identity.py
new file mode 100644
index 0000000000..f3cec953a3
--- /dev/null
+++ b/experimental/CollectiveX/identity.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+"""Canonical, cross-runtime identities for CollectiveX v1."""
+from __future__ import annotations
+
+import hashlib
+import json
+import re
+from typing import Any
+
+IDENTITY_VERSION = 1
+MAX_SAFE_INTEGER = (1 << 53) - 1
+PREFIXES = {
+    "case": "cxcase-v1-",
+    "workload": "cxwork-v1-",
+    "series": "cxseries-v1-",
+    "point": "cxpoint-v1-",
+    "evidence": "cxevidence-v1-",
+    "allocation": "cxallocation-v1-",
+    "attempt": "cxattempt-v1-",
+}
+V1_CASE_PROFILE = {
+    "activation_generator": "collectivex-activation-counter-v3",
+    "activation_profile": "canonical-counter-source-v3",
+    "combine_dtype": "bf16",
+    "combine_quant_mode": "none",
+    "component_order_contract": "roundtrip-dispatch-activation-only-combine-v2",
+    "conditioning_contract": "fixed-phase-ramp-8-roundtrips-v1",
+    "contract": "layout-and-dispatch-v1",
+    "dtype": "bf16",
+    "eplb_planner": "greedy-rank-major-v1",
+    "eplb_redundant_experts": 32,
+    "eplb_reference_tokens_per_rank": 2048,
+    "mode": "normal",
+    "oracle_contract": "expert-specific-transform-v1",
+    "oracle_tolerances": "rtol=0.05,atol=0.02",
+    "placement": "packed",
+    "percentile_method": "nearest-rank",
+    "rank_reduction": "cross-rank-max-per-iteration",
+    "resource_mode": "tuned",
+    "routing_generator": "collectivex-routing-counter-v3",
+    "sampling_contract": "fixed-512-v1",
+    "seed": 67,
+}
+
+
+class IdentityError(ValueError):
+    """An identity payload cannot be represented consistently across runtimes."""
+
+
+def _validate(value: Any, path: str = "$") -> None:
+    if value is None or isinstance(value, bool):
+        return
+    if isinstance(value, str):
+        if any(ord(character) < 0x20 or ord(character) > 0x7E for character in value):
+            raise IdentityError(f"{path}: string must contain printable ASCII only")
+        return
+    if type(value) is int:
+        if abs(value) > MAX_SAFE_INTEGER:
+            raise IdentityError(f"{path}: integer exceeds the cross-runtime safe range")
+        return
+    if isinstance(value, list):
+        for index, item in enumerate(value):
+            _validate(item, f"{path}[{index}]")
+        return
+    if isinstance(value, dict):
+        for key, item in value.items():
+            if not isinstance(key, str):
+                raise IdentityError(f"{path}: object key is not a string")
+            if any(ord(character) < 0x20 or ord(character) > 0x7E for character in key):
+                raise IdentityError(f"{path}: object key must contain printable ASCII only")
+            _validate(item, f"{path}.{key}")
+        return
+    raise IdentityError(f"{path}: unsupported identity value {type(value).__name__}")
+
+
+def canonical_bytes(value: Any) -> bytes:
+    """Return compact UTF-8 JSON after enforcing the portable value subset."""
+    _validate(value)
+    return json.dumps(
+        value,
+        ensure_ascii=False,
+        allow_nan=False,
+        sort_keys=True,
+        separators=(",", ":"),
+    ).encode("utf-8")
+
+
+def digest(kind: str, value: Any) -> str:
+    """Hash a typed v1 identity payload and return its typed identifier."""
+    try:
+        prefix = PREFIXES[kind]
+    except KeyError as exc:
+        raise IdentityError(f"unknown identity kind {kind!r}") from exc
+    body = {"kind": kind, "value": value, "version": IDENTITY_VERSION}
+    return prefix + hashlib.sha256(canonical_bytes(body)).hexdigest()
+
+
+def is_typed_id(value: Any, kind: str) -> bool:
+    prefix = PREFIXES.get(kind)
+    return bool(
+        isinstance(value, str)
+        and prefix
+        and re.fullmatch(re.escape(prefix) + r"[0-9a-f]{64}", value)
+    )
+
+
+def case_id(*, sku: str, profile: dict[str, Any], case: dict[str, Any]) -> str:
+    return digest("case", {"case": case, "profile": profile, "sku": sku})
+
+
+def workload_id(value: dict[str, Any]) -> str:
+    return digest("workload", value)
+
+
+def series_id(value: dict[str, Any]) -> str:
+    return digest("series", value)
+
+
+def point_id(*, series: str, tokens_per_rank: int) -> str:
+    return digest("point", {"series_id": series, "tokens_per_rank": tokens_per_rank})
+
+
+def allocation_id(value: dict[str, Any]) -> str:
+    return digest("allocation", value)
+
+
+def attempt_id(*, allocation: str, case: str, ordinal: int) -> str:
+    return digest(
+        "attempt", {"allocation_id": allocation, "case_id": case, "ordinal": ordinal}
+    )
+
+
+def evidence_id(
+    *, point: str, allocation: str, attempt: str, sample_sha256: str
+) -> str:
+    return digest(
+        "evidence",
+        {
+            "allocation_id": allocation,
+            "attempt_id": attempt,
+            "point_id": point,
+            "sample_sha256": sample_sha256,
+        },
+    )
+
+
+IDENTITY_TEST_VECTOR = {
+    "payload": {"backend": "deepep", "ep": 8, "shape": [7168, 8, 256]},
+    "series_id": "cxseries-v1-a79bf758488e3edd50f5531f3af825f371bf42aae7c4097e461fd2a32615af81",
+}
+
+
+def verify_test_vector() -> None:
+    observed = series_id(IDENTITY_TEST_VECTOR["payload"])
+    if observed != IDENTITY_TEST_VECTOR["series_id"]:
+        raise IdentityError(
+            f"identity implementation differs: {observed} != {IDENTITY_TEST_VECTOR['series_id']}"
+        )
+
+
+if __name__ == "__main__":
+    verify_test_vector()
+    print(IDENTITY_TEST_VECTOR["series_id"])
diff --git a/experimental/CollectiveX/launchers/launch_gb-nv.sh b/experimental/CollectiveX/launchers/launch_gb-nv.sh
new file mode 100644
index 0000000000..97d0377e00
--- /dev/null
+++ b/experimental/CollectiveX/launchers/launch_gb-nv.sh
@@ -0,0 +1,335 @@
+#!/usr/bin/env bash
+# CollectiveX shared GB200/GB300 NVL72 (aarch64) launcher.
+#
+# Two paths by CX_NODES:
+#   CX_NODES<=1 (EP4): single NVL72 tray, 4 GPU. Hands off to run_in_container.sh (torchrun -g 4).
+#   CX_NODES==2 (EP8): 2 trays, 8 GPU over the MNNVL NVLink domain. run_in_container's single-node
+#     torchrun can't span nodes, so this path runs run_ep.py DIRECTLY across 8 srun tasks (1 rank
+#     each), per-rank RANK/LOCAL_RANK from SLURM_*, MASTER_ADDR=first node — the intranode NVLink
+#     path works across <=8 ranks on MNNVL (no internode/NVSHMEM). One allocation runs the shard.
+#
+# Scheduling and compute-visible storage are supplied by the runner-local config.
+set -euo pipefail
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+CX_DIR="$(cd "$HERE/.." && pwd)"; REPO_ROOT="$(cd "$CX_DIR/../.." && pwd)"
+# shellcheck source=../runtime/common.sh
+source "$HERE/../runtime/common.sh"
+
+PRODUCT="${CX_SHARD_SKU:-${CX_GB_PRODUCT:-${CX_PUBLIC_RUNNER:-}}}"
+case "$PRODUCT" in
+  gb200|gb300) ;;
+  *) cx_die "set CX_SHARD_SKU or CX_PUBLIC_RUNNER to gb200 or gb300" ;;
+esac
+RUNNER="$PRODUCT"
+export CX_RUNNER="$RUNNER" CX_BENCH="${CX_BENCH:-deepep}"
+export CX_IMAGE_PLATFORM=linux/arm64
+JOB_ID=""
+cx_install_launcher_fail_safe
+cx_set_failure_stage setup
+cx_load_operator_config
+cx_lock_canonical_gha_env "$RUNNER"
+NODES="${CX_NODES:-1}"; GPN="${CX_GPUS_PER_NODE:-4}"
+SCALE_UP_DOMAIN="${CX_SCALE_UP_DOMAIN:-72}"
+EXPECTED_WORLD=$((NODES * GPN))
+NGPUS="${CX_NGPUS:-$EXPECTED_WORLD}"
+if [ "$PRODUCT" = gb200 ]; then default_time=30; else default_time=90; fi
+TIME_MIN="${CX_TIME:-$default_time}"
+[ "$NODES" = 1 ] || [ "$NODES" = 2 ] || cx_die "$PRODUCT supports one or two four-GPU trays"
+[ "$GPN" = 4 ] || cx_die "$PRODUCT requires four GPUs per tray"
+[ "$SCALE_UP_DOMAIN" = 72 ] || cx_die "$PRODUCT requires the NVL72 scale-up domain"
+[ "$NGPUS" = "$EXPECTED_WORLD" ] || cx_die "$PRODUCT world size must equal nodes x GPUs per tray"
+cx_apply_timing_profile
+# CX_IMAGE is a Docker tag; cx_ensure_squash derives the local squash filename.
+IMAGE="${CX_IMAGE:-$(cx_default_image "$PRODUCT")}"
+TS="$(date -u +%Y-%m-%dT%H-%M-%SZ)"
+export CX_RUNNER="$RUNNER" CX_TS="$TS" CX_TOPO="${PRODUCT}-nvl72-mnnvl" CX_TRANSPORT="mnnvl"
+export CX_NODES="$NODES" CX_GPUS_PER_NODE="$GPN" CX_SCALE_UP_DOMAIN="$SCALE_UP_DOMAIN"
+export CX_NGPUS="$NGPUS"
+case "$CX_BENCH" in
+  deepep|deepep-v2|deepep-hybrid|nccl-ep) ;;
+  *) cx_die "unsupported $PRODUCT EP backend: $CX_BENCH" ;;
+esac
+cx_validate_shard_control "$CX_DIR"
+cx_require_vars CX_PARTITION CX_ACCOUNT CX_SQUASH_DIR CX_STAGE_DIR
+[ "$PRODUCT" != gb300 ] || cx_require_vars CX_ENROOT_CACHE_PATH
+PARTITION="$CX_PARTITION"; ACCOUNT="$CX_ACCOUNT"; SQUASH_DIR="$CX_SQUASH_DIR"
+[ -z "${CX_ENROOT_CACHE_PATH:-}" ] || export ENROOT_CACHE_PATH="$CX_ENROOT_CACHE_PATH"
+export NCCL_CUMEM_ENABLE=1 NCCL_MNNVL_ENABLE=1 MC_FORCE_MNNVL=1
+
+cx_log "$PRODUCT runner=$RUNNER nodes=$NODES x ${GPN}gpu world=$NGPUS bench=$CX_BENCH phase=${CX_PHASE:-decode}"
+[ "${CX_DRYRUN:-0}" = "1" ] && { cx_log "DRYRUN"; exit 0; }
+cx_set_failure_stage registry-verification
+cx_verify_registry_image "$IMAGE"
+cx_set_failure_stage repository-stage
+MOUNT_SRC="$(cx_stage_repo "$REPO_ROOT" "$CX_STAGE_DIR")"
+cx_prepare_runtime_marker "$MOUNT_SRC"
+CONTAINER_MOUNTS="$MOUNT_SRC:/ix"
+if [ "$CX_BENCH" = deepep-v2 ] || [ "$CX_BENCH" = deepep-hybrid ]; then
+  cx_set_failure_stage backend-setup
+  cx_prepare_backend_source "$MOUNT_SRC" "$CX_BENCH" \
+    || cx_die "cannot stage the pinned backend source"
+  export CX_BACKEND_SOURCE_ROOT=/ix/experimental/CollectiveX/.cx_sources
+fi
+if [ "$CX_BENCH" = deepep-v2 ]; then
+  cx_prepare_backend_cache "$CX_SQUASH_DIR" \
+    || cx_die "cannot prepare the isolated backend cache"
+  BACKEND_CACHE="$CX_PREPARED_BACKEND_CACHE"
+  CONTAINER_MOUNTS="$CONTAINER_MOUNTS,$BACKEND_CACHE:/cx-cache"
+  export CX_BACKEND_CACHE_ROOT=/cx-cache
+fi
+cx_set_failure_stage scheduler-allocation
+command -v salloc >/dev/null || cx_die "salloc not found"
+
+if [ "$NODES" -le 1 ]; then
+  cx_salloc_jobid --partition="$PARTITION" --account="$ACCOUNT" \
+    --gres=gpu:"$GPN" --exclusive --mem=0 --cpus-per-task=72 \
+    --time="$TIME_MIN" --job-name="$RUNNER"
+else
+  cx_salloc_jobid --partition="$PARTITION" --account="$ACCOUNT" --nodes="$NODES" \
+    --gres=gpu:"$GPN" --ntasks-per-node="$GPN" --exclusive --mem=0 --cpus-per-task=35 \
+    --time="$TIME_MIN" --job-name="$RUNNER"
+fi
+[ -n "$JOB_ID" ] || cx_die "no JOB_ID from salloc"
+cx_set_failure_stage container-import
+SQUASH_FILE="$(cx_ensure_squash_on_job "$JOB_ID" "$SQUASH_DIR" "$IMAGE")"
+cx_set_failure_stage container-hash
+cx_export_squash_identity "$SQUASH_FILE"
+cx_preflight_allocation "$JOB_ID" "$NODES" "$MOUNT_SRC" "$SQUASH_FILE" \
+  "${CX_SHARD_FILE:-}"
+
+if [ "$NODES" -le 1 ]; then   # ---- EP4: single tray, run_in_container (torchrun -g 4) ----
+  run_rc=0
+  cx_set_failure_stage container-launch
+  runtime_log="$(cx_private_log_path runtime-ep4)"
+  srun --jobid="$JOB_ID" --chdir=/tmp --container-image="$SQUASH_FILE" \
+    --container-mounts="$CONTAINER_MOUNTS" \
+    --no-container-mount-home --container-writable --container-remap-root \
+    --container-workdir=/ix/experimental/CollectiveX --no-container-entrypoint \
+    --export="$(cx_container_exports)" bash /ix/experimental/CollectiveX/runtime/run_in_container.sh \
+    >"$runtime_log" 2>&1 || run_rc=$?
+  cx_adopt_runtime_stage "$MOUNT_SRC"
+  [ "$run_rc" = 0 ] || cx_fail_stage "$CX_FAILSAFE_MODE" "$runtime_log" || true
+  collect_rc=0
+  cx_collect_results "$MOUNT_SRC" "$REPO_ROOT" || collect_rc=$?
+  [ "$run_rc" != 0 ] || [ "$collect_rc" = 0 ] || cx_set_failure_stage artifact-collection
+  final_rc="$run_rc"
+  [ "$final_rc" != 0 ] || final_rc="$collect_rc"
+  exit "$final_rc"
+fi
+
+# ---- EP8: 2 trays, run_ep.py directly across 8 ranks (no torchrun; MNNVL intranode path) ----
+cx_set_failure_stage scheduler-allocation
+MA="$(scontrol show hostnames "$(squeue -j "$JOB_ID" -h -o %N 2>/dev/null)" 2>/dev/null | head -1)"
+MP="${CX_MASTER_PORT:-29551}"
+[[ "$MA" =~ ^[A-Za-z0-9][A-Za-z0-9._-]*$ ]] \
+  || cx_die "could not resolve the allocated primary node"
+[[ "$MP" =~ ^[1-9][0-9]*$ ]] && [ "$MP" -le 65535 ] \
+  || cx_die "invalid distributed rendezvous port"
+mkdir -p "$MOUNT_SRC/experimental/CollectiveX/results"
+# Restore process-local loader/import paths and exact backend build identity from build-only.
+SOURCE_BACKEND_ENV='case "${SLURM_NODEID:-}" in ""|*[!0-9]*) exit 66;; esac; env_file="/ix/experimental/CollectiveX/.cx_backend/env/node-${SLURM_NODEID}.sh"; env_root="${env_file%/*}"; [ -d "$env_root" ] && [ ! -L "$env_root" ] || exit 66; case "$(stat -c "%a" "$env_root")" in 700|[1-7]700) ;; *) exit 66;; esac; [ -f "$env_file" ] && [ -r "$env_file" ] && [ ! -L "$env_file" ] && [ "$(stat -c "%u:%a" "$env_file")" = "$(stat -c "%u" "$env_root"):600" ] || exit 66; . "$env_file" || exit 66'
+BACKEND_PROBE="$SOURCE_BACKEND_ENV"'; case "$CX_BENCH" in deepep-v2) python3 -c "import deep_ep; assert hasattr(deep_ep, '\''ElasticBuffer'\'')";; deepep-hybrid) python3 -c "import deep_ep; assert hasattr(deep_ep, '\''HybridEPBuffer'\'')";; esac'
+WRAP="$SOURCE_BACKEND_ENV"'; export RANK=$SLURM_PROCID WORLD_SIZE=$SLURM_NTASKS LOCAL_RANK=$SLURM_LOCALID; exec python3 tests/run_ep.py "$@"'
+
+# Prepare the backend once per node in the persistent container reused by every case.
+CNAME="cxep8_${JOB_ID}"
+CMOUNT=(--container-mounts="$CONTAINER_MOUNTS" --no-container-mount-home
+        --container-writable --container-remap-root
+        --container-workdir=/ix/experimental/CollectiveX --no-container-entrypoint)
+cx_log "EP backend preparation: bench=$CX_BENCH"
+cx_set_failure_stage backend-setup
+build_log="$(cx_private_log_path backend-prepare)"
+set +e
+srun --jobid="$JOB_ID" --nodes="$NODES" --ntasks-per-node=1 --chdir=/tmp \
+  --container-name="$CNAME" --container-image="$SQUASH_FILE" "${CMOUNT[@]}" \
+  --export="$(cx_container_exports),CX_BUILD_ONLY=1" \
+  bash /ix/experimental/CollectiveX/runtime/run_in_container.sh \
+  </dev/null >"$build_log" 2>&1
+build_rc=$?
+if [ "$build_rc" = 0 ]; then
+  srun --jobid="$JOB_ID" --nodes="$NODES" --ntasks-per-node=1 --chdir=/tmp \
+    --container-name="$CNAME" "${CMOUNT[@]}" --export="$(cx_container_exports)" \
+    bash -c "$BACKEND_PROBE" \
+    </dev/null >>"$build_log" 2>&1
+  build_rc=$?
+fi
+set -e
+if [ "$build_rc" != 0 ]; then
+  cx_fail_stage backend-setup "$build_log" || true
+  cx_log "ERROR: EP backend preparation failed rc=$build_rc"
+  cx_emit_setup_failures "$CX_DIR" "$MOUNT_SRC/experimental/CollectiveX/results" \
+    "$CX_BENCH" "$build_rc"
+  cx_collect_results "$MOUNT_SRC" "$REPO_ROOT" || true
+  exit "$build_rc"
+fi
+cx_set_failure_stage execution
+
+# The EP8 case list as pipe-delimited records. SWEEP (CX_SHARD_FILE set): one line per shard case,
+# so the rack-scale EP8 path sweeps EVERY case of its shard (parity with run_in_container's single-
+# node SHARD loop). MANUAL (no shard file) emits one line per requested phase.
+cx_ep8_cases() {
+  # CX_SHARD_FILE is workflow-relative (.shards/<id>.json, written by the Extract step with
+  # working-directory=experimental/CollectiveX). This EP8 path runs on the SUBMIT HOST where cwd is
+  # the repo root, so resolve it against $CX_DIR (=experimental/CollectiveX) when not found as-is —
+  # else the SHARD branch is skipped and only ONE default case runs instead of the shard's N.
+  local sf="${CX_SHARD_FILE:-}"
+  [ -n "$sf" ] && [ ! -f "$sf" ] && [ -f "$CX_DIR/$sf" ] && sf="$CX_DIR/$sf"
+  if [ -n "$sf" ]; then
+    [ -f "$sf" ] || { cx_log "ERROR: shard control disappeared"; return 1; }
+    # '|'-separated (NOT tab: tab is IFS-whitespace, so `read` would collapse consecutive tabs and
+    # swallow empty fields like a false eplb, shifting every column. No case field contains '|'.)
+    python3 - "$sf" <<'PY'
+import json, sys
+d = json.load(open(sys.argv[1]))
+for c in d["cases"]:
+    g = lambda k, dv: (str(c[k]) if c.get(k) not in (None, "") else dv)
+    print("|".join([g("phase","decode"), g("routing","uniform"),
+        ("1" if c.get("eplb") else ""), g("hidden","7168"), g("topk","8"), g("experts","256"),
+        g("ladder",""), g("suite",""), g("workload",""), g("required_publication",""),
+        ("1" if c.get("canonical") else ""), g("case_id",""), g("ep",""),
+        g("timing","8:64:32")]))
+PY
+  else
+    local phases="${CX_PHASE:-decode}"; [ "$phases" = both ] && phases="decode prefill"
+    local ph; local -a fields
+    for ph in $phases; do
+      fields=("$ph" "${CX_ROUTING:-uniform}" "${CX_EPLB:+1}"
+        "${CX_HIDDEN:-7168}" "${CX_TOPK:-8}" "${CX_EXPERTS:-256}" "${CX_TOKENS_LADDER:-}"
+        "${CX_SUITE:-}" "${CX_WORKLOAD_NAME:-}" "${CX_REQUIRED_PUBLICATION:-}"
+        "${CX_CANONICAL:+1}" "${CX_CASE_ID:-}" "$NGPUS"
+        "${CX_ITERS:-8}:${CX_TRIALS:-64}:${CX_WARMUP:-32}")
+      (IFS='|'; printf '%s\n' "${fields[*]}")
+    done
+  fi
+}
+
+# Per-rank env for the EP8 case sruns. DeepEP main's Buffer gates multi-tray NVLink behind allow_mnnvl, which defaults
+# False -> DeepEP then sets NVSHMEM_DISABLE_MNNVL=1 and the legacy buffer takes the intranode-only CUDA-IPC
+# peer path, faulting across NVL72 trays (cudaErrorIllegalAddress at csrc/legacy/buffer.hpp). CX_ALLOW_MNNVL=1
+# makes tests/ep_deepep.py pass allow_mnnvl=True so the NVL buffer spans both trays over the fabric API.
+# The pinned V1 exposes this flag explicitly; the adapter fails closed if that API changes.
+EP8_EXPORTS="$(cx_container_exports),MASTER_ADDR=$MA,MASTER_PORT=$MP,NCCL_MNNVL_ENABLE=1,NCCL_CUMEM_ENABLE=1,MC_FORCE_MNNVL=1"
+[ "$CX_BENCH" = "deepep" ] && EP8_EXPORTS="$EP8_EXPORTS,CX_ALLOW_MNNVL=1"
+
+ci=0
+failed_cases=0
+cases_file="$(mktemp)"
+if ! cx_ep8_cases > "$cases_file"; then
+  rm -f "$cases_file"
+  cx_die "could not enumerate validated shard cases"
+fi
+expected_cases="$(wc -l < "$cases_file" | tr -d ' ')"
+[ "$expected_cases" -gt 0 ] || { rm -f "$cases_file"; cx_die "case list is empty"; }
+while IFS='|' read -r ph routing eplb hidden topk experts lad suite workload required_pub \
+    canonical case_id ep timing; do
+  [ -n "$ph" ] || continue
+  ci=$((ci+1))
+  case_stem="${RUNNER}_${CX_BENCH}_${ph}_${TS}-c$(printf '%03d' "$ci")"
+  IFS=':' read -r case_iters case_trials case_warmup <<< "${timing:-8:64:32}"
+  case_iters="${case_iters:-8}"; case_trials="${case_trials:-64}"; case_warmup="${case_warmup:-32}"
+  ep="${ep:-$NGPUS}"
+  export CX_CASE_ID="$case_id" CX_SUITE="$suite" CX_WORKLOAD_NAME="$workload"
+  export CX_REQUIRED_PUBLICATION="$required_pub" CX_CANONICAL="$canonical" CX_EP="$ep"
+  export CX_ROUTING="$routing" CX_EPLB="$eplb" CX_TOKENS_LADDER="$lad"
+  export CX_HIDDEN="$hidden" CX_TOPK="$topk" CX_EXPERTS="$experts"
+  export CX_ITERS="$case_iters" CX_TRIALS="$case_trials" CX_WARMUP="$case_warmup"
+  export CX_SAMPLES_PER_POINT="$((case_iters * case_trials))"
+  export CX_WARMUP_SEMANTICS="full-roundtrip-before-each-component-trial-point-v1"
+  cx_log "EP${NGPUS}[$ci] id=${case_id:-manual} $ph $CX_BENCH routing=$routing eplb=${eplb:-0}"
+  if [ "$ep" != "$NGPUS" ]; then
+    cx_log "ERROR: case EP$ep does not match allocated world size $NGPUS"
+    export CX_ATTEMPT_ID=1
+    failure_out="$MOUNT_SRC/experimental/CollectiveX/results/failed_${case_stem}-a01.json"
+    cx_emit_ep_failed_case "$failure_out" "$CX_BENCH" "$ph" 5
+    failed_cases=$((failed_cases + 1))
+    continue
+  fi
+
+  workload_dir=""
+  if [ -n "$canonical" ]; then
+    workload_dir=".cx_workloads/ep${ep}_${routing}"
+    workload_ladder="$lad"
+    [ -n "$workload_ladder" ] || workload_ladder="1 2 4 8 16 32 64 128 256 512 1024 2048 4096"
+    workload_args=(python3 tests/make_workloads.py --out-dir "$workload_dir" --routing "$routing"
+      --ep "$ep" --hidden "$hidden" --topk "$topk" --experts "$experts"
+      --seed "${CX_SEED:-67}" --tokens-ladder "$workload_ladder")
+    workload_log="$(cx_private_log_path "workload-c$(printf '%03d' "$ci")")"
+    stage_rc=0
+    set +e
+    srun --jobid="$JOB_ID" --nodes=1 --ntasks=1 --chdir=/tmp \
+      --container-name="$CNAME" "${CMOUNT[@]}" \
+      --export="$EP8_EXPORTS" "${workload_args[@]}" \
+      </dev/null >"$workload_log" 2>&1
+    stage_rc=$?
+    set -e
+    if [ "$stage_rc" != 0 ]; then
+      cx_log "ERROR: canonical workload staging failed rc=$stage_rc"
+      export CX_ATTEMPT_ID=1
+      failure_out="$MOUNT_SRC/experimental/CollectiveX/results/failed_${case_stem}-a01.json"
+      cx_emit_ep_failed_case "$failure_out" "$CX_BENCH" "$ph" "$stage_rc"
+      failed_cases=$((failed_cases + 1))
+      continue
+    fi
+  fi
+
+  ep_args=(--backend "$CX_BENCH" --phase "$ph" --routing "$routing"
+    --gpus-per-node "$GPN" --scale-up-domain "$SCALE_UP_DOMAIN"
+    --tokens-ladder "$lad"
+    --hidden "$hidden" --topk "$topk" --experts "$experts"
+    --warmup "$case_warmup" --iters "$case_iters" --trials "$case_trials"
+    --seed "${CX_SEED:-67}" --runner "$RUNNER" --topology-class "$CX_TOPO"
+    --transport "$CX_TRANSPORT" --case-id "$case_id" --suite "$suite"
+    --workload-name "$workload" --required-publication "$required_pub")
+  [ -n "$eplb" ] && ep_args+=(--eplb)
+  [ -n "$workload_dir" ] && ep_args+=(--workload-dir "$workload_dir")
+  attempt=1
+  case_ok=0
+  export CX_ATTEMPT_ID="$attempt"
+  attempt_tag="a01"
+  out="results/${case_stem}_${attempt_tag}.json"
+  failure_out="$MOUNT_SRC/experimental/CollectiveX/results/failed_${case_stem}-${attempt_tag}.json"
+  runtime_log="$(cx_private_log_path "runtime-c$(printf '%03d' "$ci")-$attempt_tag")"
+  set +e
+  timeout -k 30 "${CX_RUN_TIMEOUT:-900}" srun --jobid="$JOB_ID" --nodes="$NODES" \
+    --ntasks="$NGPUS" --chdir=/tmp \
+    --ntasks-per-node="$GPN" --container-name="$CNAME" "${CMOUNT[@]}" \
+    --export="$EP8_EXPORTS" \
+    bash -c "$WRAP" _ "${ep_args[@]}" --out "$out" \
+    </dev/null >"$runtime_log" 2>&1
+  run_rc=$?
+  set -e
+  expected_out="$MOUNT_SRC/experimental/CollectiveX/$out"
+  if [ "$run_rc" = 0 ] && cx_result_doc_is "$expected_out" success; then
+    case_ok=1
+  elif [ "$run_rc" = 0 ] && cx_result_doc_is "$expected_out" invalid; then
+    cx_log "ERROR: EP${NGPUS}[$ci] completed with invalid semantic evidence"
+  else
+    [ "$run_rc" = 0 ] && run_rc=1
+    if cx_has_result_doc "$expected_out"; then
+      cx_demote_result_doc "$expected_out" "$run_rc" \
+        || { cx_quarantine_result_doc "$expected_out"; cx_emit_ep_failed_case "$failure_out" "$CX_BENCH" "$ph" "$run_rc"; }
+    else
+      cx_quarantine_result_doc "$expected_out"
+      cx_emit_ep_failed_case "$failure_out" "$CX_BENCH" "$ph" "$run_rc"
+    fi
+  fi
+  if [ "$case_ok" = 0 ]; then
+    failed_cases=$((failed_cases + 1))
+    cx_log "ERROR: EP${NGPUS}[$ci] failed"
+  fi
+done < "$cases_file"
+rm -f "$cases_file"
+[ "$ci" -eq "$expected_cases" ] || cx_die "enumerated $expected_cases cases but executed $ci"
+run_rc=0
+if [ "$failed_cases" -ne 0 ]; then
+  summary_log="$(cx_private_log_path shard-summary)"
+  printf 'SHARD done: %s/%s case(s) failed\n' "$failed_cases" "$expected_cases" > "$summary_log"
+  cx_fail_stage execution "$summary_log" || true
+  run_rc=1
+fi
+collect_rc=0
+cx_collect_results "$MOUNT_SRC" "$REPO_ROOT" || collect_rc=$?
+[ "$run_rc" != 0 ] || [ "$collect_rc" = 0 ] || cx_set_failure_stage artifact-collection
+final_rc="$run_rc"
+[ "$final_rc" != 0 ] || final_rc="$collect_rc"
+exit "$final_rc"
diff --git a/experimental/CollectiveX/launchers/launch_mi-amds.sh b/experimental/CollectiveX/launchers/launch_mi-amds.sh
new file mode 100644
index 0000000000..5f3de33078
--- /dev/null
+++ b/experimental/CollectiveX/launchers/launch_mi-amds.sh
@@ -0,0 +1,134 @@
+#!/usr/bin/env bash
+# CollectiveX shared MI325X/MI355X AMD Slurm launcher.
+#
+# The ROCm path imports its squash in the allocation and uses writable/remapped
+# pyxis containers. Scheduling, exclusions, node pins, and storage come from the
+# runner-local config.
+set -euo pipefail
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+CX_DIR="$(cd "$HERE/.." && pwd)"
+REPO_ROOT="$(cd "$CX_DIR/../.." && pwd)"
+# shellcheck source=../runtime/common.sh
+source "$HERE/../runtime/common.sh"
+
+RUNNER="${CX_SHARD_SKU:-${CX_PUBLIC_RUNNER:-}}"
+case "$RUNNER" in
+  mi325x) CPUS_PER_TASK=256; DEVICE_MOUNTS=",/dev/kfd:/dev/kfd,/dev/dri:/dev/dri" ;;
+  mi355x) CPUS_PER_TASK=128; DEVICE_MOUNTS="" ;;
+  *) cx_die "set CX_SHARD_SKU or CX_PUBLIC_RUNNER to mi325x or mi355x" ;;
+esac
+export CX_RUNNER="$RUNNER" CX_BENCH="${CX_BENCH:-mori}"
+export CX_IMAGE_PLATFORM=linux/amd64
+JOB_ID=""
+cx_install_launcher_fail_safe
+cx_set_failure_stage setup
+cx_load_operator_config
+cx_lock_canonical_gha_env "$RUNNER"
+NGPUS="${CX_NGPUS:-8}"
+TIME_MIN="${CX_TIME:-60}"   # generous: a cold enroot import of the large ROCm image
+EXCLUDE_NODES="${CX_EXCLUDE_NODES:-}"
+# Optional node pin overrides the exclusion list.
+NODELIST="${CX_NODELIST:-}"
+MOUNT_DIR=/ix
+TS="$(date -u +%Y-%m-%dT%H-%M-%SZ)"
+
+# AMD EP backends: MoRI and the portable NCCL/RCCL all-to-all reference.
+case "$CX_BENCH" in
+  mori|nccl-ep) ;;
+  *) cx_die "unsupported AMD EP backend: $CX_BENCH" ;;
+esac
+if [ "$RUNNER" = mi325x ]; then
+  export MORI_DISABLE_AUTO_XGMI="${MORI_DISABLE_AUTO_XGMI:-0}"
+  export MORI_ENABLE_SDMA="${MORI_ENABLE_SDMA:-1}"
+  export MORI_APP_LOG_LEVEL="${MORI_APP_LOG_LEVEL:-info}"
+  export MORI_SHMEM_LOG_LEVEL="${MORI_SHMEM_LOG_LEVEL:-info}"
+  export MORI_IO_LOG_LEVEL="${MORI_IO_LOG_LEVEL:-info}"
+  if [ "$CX_BENCH" = mori ]; then
+    export CX_IMAGE="${CX_IMAGE:-$CX_IMAGE_AMD_MORI_MI325}"
+    export CX_MORI_KERNEL_TYPE="${CX_MORI_KERNEL_TYPE:-asyncll}"
+  fi
+fi
+# Resolve the image now that CX_BENCH and RUNNER are both final (see note at IMAGE decl).
+IMAGE="${CX_IMAGE:-$(cx_default_image "$RUNNER")}"
+export CX_RUNNER="$RUNNER" CX_NGPUS="$NGPUS" CX_NODES=1 CX_GPUS_PER_NODE="$NGPUS"
+export CX_SCALE_UP_DOMAIN="$NGPUS" CX_TS="$TS"
+# topology_class is part of comparison_key; label the actual SKU when the MI325X wrapper calls this.
+case "${RUNNER}" in
+  mi325x*) export CX_TOPO="mi325x-xgmi" ;;
+  *)       export CX_TOPO="mi355x-xgmi" ;;
+esac
+export CX_TRANSPORT="xgmi"
+# Allow a longer per-phase guard for large MoRI prefill points.
+export CX_RUN_TIMEOUT="${CX_RUN_TIMEOUT:-1800}"
+cx_validate_shard_control "$CX_DIR"
+cx_require_vars CX_PARTITION CX_SQUASH_DIR
+PARTITION="$CX_PARTITION"
+SQUASH_DIR="$CX_SQUASH_DIR"
+cx_log "runner=$RUNNER ngpus=$NGPUS bench=$CX_BENCH"
+cx_set_failure_stage repository-stage
+MOUNT_SRC="$(cx_stage_repo "$REPO_ROOT" "${CX_STAGE_DIR:-}")"
+cx_prepare_runtime_marker "$MOUNT_SRC"
+
+if [ "${CX_DRYRUN:-0}" = "1" ]; then cx_log "CX_DRYRUN=1 — not allocating"; exit 0; fi
+cx_set_failure_stage registry-verification
+cx_verify_registry_image "$IMAGE"
+cx_set_failure_stage scheduler-allocation
+command -v salloc >/dev/null || cx_die "salloc not found on this runner"
+cx_require_single_node "$RUNNER"
+
+# Pin to specific nodes when configured, otherwise apply the optional exclusion list.
+allocation=(--partition="$PARTITION" --gres=gpu:"$NGPUS" --exclusive
+  --cpus-per-task="$CPUS_PER_TASK"
+  --time="$TIME_MIN" --job-name="$RUNNER")
+if [ -n "$NODELIST" ]; then
+  cx_log "using configured node pin"
+  allocation+=(--nodelist="$NODELIST")
+elif [ -n "$EXCLUDE_NODES" ]; then
+  allocation+=(--exclude="$EXCLUDE_NODES")
+fi
+cx_salloc_jobid "${allocation[@]}"
+[ -n "$JOB_ID" ] || cx_die "could not resolve allocated JOB_ID from salloc"
+
+cx_set_failure_stage container-import
+SQUASH_FILE="$(cx_ensure_squash_on_job \
+  "$JOB_ID" "$SQUASH_DIR" "$IMAGE" "${CX_LOCK_DIR:-}")"
+cx_set_failure_stage container-hash
+import_log="$(cx_private_log_path image-hash)"
+if ! COLLECTIVEX_SQUASH_SHA256="$(
+  srun --jobid="$JOB_ID" --nodes=1 --ntasks=1 --chdir=/tmp \
+    --export="$(cx_host_exports)" \
+    sha256sum "$SQUASH_FILE" \
+    2>>"$import_log" | awk 'NR==1 {print $1}'
+)"; then
+  cx_fail_stage container-hash "$import_log"
+fi
+[[ "$COLLECTIVEX_SQUASH_SHA256" =~ ^[0-9a-f]{64}$ ]] \
+  || cx_fail_stage container-hash "$import_log"
+export COLLECTIVEX_SQUASH_SHA256
+cx_preflight_allocation "$JOB_ID" 1 "$MOUNT_SRC" "$SQUASH_FILE" "${CX_SHARD_FILE:-}"
+
+run_rc=0
+cx_set_failure_stage container-launch
+runtime_log="$(cx_private_log_path runtime)"
+srun --jobid="$JOB_ID" --chdir=/tmp \
+  --container-image="$SQUASH_FILE" \
+  --container-mounts="$MOUNT_SRC:$MOUNT_DIR$DEVICE_MOUNTS" \
+  --container-writable --container-remap-root --no-container-mount-home \
+  --container-workdir="$MOUNT_DIR/experimental/CollectiveX" \
+  --no-container-entrypoint --export="$(cx_container_exports)" \
+  bash "$MOUNT_DIR/experimental/CollectiveX/runtime/run_in_container.sh" \
+  >"$runtime_log" 2>&1 || run_rc=$?
+
+cx_adopt_runtime_stage "$MOUNT_SRC"
+[ "$run_rc" = 0 ] || cx_fail_stage "$CX_FAILSAFE_MODE" "$runtime_log" || true
+collect_rc=0
+cx_collect_results "$MOUNT_SRC" "$REPO_ROOT" || collect_rc=$?
+[ "$run_rc" != 0 ] || [ "$collect_rc" = 0 ] || cx_set_failure_stage artifact-collection
+final_rc="$run_rc"
+[ "$final_rc" != 0 ] || final_rc="$collect_rc"
+# ROCm can leave gpucore.* dumps in the workdir on a crash; clear them so the
+# next checkout on this runner is clean.
+rm -f "$MOUNT_SRC"/experimental/CollectiveX/gpucore.* 2>/dev/null || true
+cx_log "done — result artifacts collected"
+exit "$final_rc"
diff --git a/experimental/CollectiveX/launchers/launch_single-slurm.sh b/experimental/CollectiveX/launchers/launch_single-slurm.sh
new file mode 100644
index 0000000000..b9b1ef9e8d
--- /dev/null
+++ b/experimental/CollectiveX/launchers/launch_single-slurm.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+# CollectiveX shared single-node NVIDIA Slurm launcher.
+set -euo pipefail
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+CX_DIR="$(cd "$HERE/.." && pwd)"
+REPO_ROOT="$(cd "$CX_DIR/../.." && pwd)"
+# shellcheck source=../runtime/common.sh
+source "$HERE/../runtime/common.sh"
+
+RUNNER="${CX_SHARD_SKU:-${CX_PUBLIC_RUNNER:-}}"
+ALLOC_EXTRA=(); SRUN_EXTRA=(); LOCAL_IMPORT=0
+case "$RUNNER" in
+  h100-dgxc) PRODUCT=h100; TOPO=h100-nvlink-island; DEFAULT_TIME=45; REQUIRE_ACCOUNT=1 ;;
+  h200-dgxc)
+    PRODUCT=h200; TOPO=h200-nvlink-island; DEFAULT_TIME=45; REQUIRE_ACCOUNT=0
+    SRUN_EXTRA=(--container-remap-root)
+    ;;
+  b200-dgxc)
+    PRODUCT=b200; TOPO=b200-nvlink-island; DEFAULT_TIME=30; REQUIRE_ACCOUNT=1
+    ALLOC_EXTRA=(--mem=0)
+    ;;
+  b300)
+    PRODUCT=b300; TOPO=b300-nvlink-island; DEFAULT_TIME=45; REQUIRE_ACCOUNT=1
+    ALLOC_EXTRA=(-N 1 --mem=0)
+    SRUN_EXTRA=(--mpi=none --container-remap-root)
+    LOCAL_IMPORT=1
+    ;;
+  *) cx_die "set CX_SHARD_SKU or CX_PUBLIC_RUNNER to a registered single-node SKU" ;;
+esac
+export CX_RUNNER="$RUNNER" CX_BENCH="${CX_BENCH:-deepep}"
+export CX_IMAGE_PLATFORM=linux/amd64
+JOB_ID=""
+cx_install_launcher_fail_safe
+cx_set_failure_stage setup
+cx_load_operator_config
+cx_lock_canonical_gha_env "$RUNNER"
+
+NGPUS="${CX_NGPUS:-8}"
+TIME_MIN="${CX_TIME:-$DEFAULT_TIME}"
+IMAGE="${CX_IMAGE:-$(cx_default_image "$PRODUCT")}"
+TS="$(date -u +%Y-%m-%dT%H-%M-%SZ)"
+
+export CX_RUNNER="$RUNNER" CX_NGPUS="$NGPUS" CX_NODES=1 CX_GPUS_PER_NODE="$NGPUS"
+export CX_SCALE_UP_DOMAIN="$NGPUS" CX_TS="$TS" CX_TOPO="$TOPO" CX_TRANSPORT=nvlink
+export CX_NCCL_HOME="${CX_NCCL_HOME:-/usr}"
+export NCCL_CUMEM_ENABLE=1
+cx_validate_shard_control "$CX_DIR"
+cx_require_vars CX_PARTITION CX_SQUASH_DIR
+[ "$REQUIRE_ACCOUNT" = 0 ] || cx_require_vars CX_ACCOUNT
+[ "$RUNNER" != b300 ] || cx_require_vars CX_STAGE_DIR
+
+cx_log "runner=$RUNNER ngpus=$NGPUS bench=$CX_BENCH"
+[ "${CX_DRYRUN:-0}" != 1 ] || { cx_log "CX_DRYRUN=1 - not allocating"; exit 0; }
+cx_set_failure_stage registry-verification
+cx_verify_registry_image "$IMAGE"
+SQUASH_FILE=""
+if [ "$LOCAL_IMPORT" = 1 ]; then
+  cx_set_failure_stage container-import
+  SQUASH_FILE="$(CX_ENROOT_LOCAL_IMPORT=1 \
+    cx_ensure_squash "$CX_SQUASH_DIR" "$IMAGE")"
+  cx_set_failure_stage container-hash
+  cx_export_squash_identity "$SQUASH_FILE"
+fi
+cx_set_failure_stage repository-stage
+MOUNT_SRC="$(cx_stage_repo "$REPO_ROOT" "${CX_STAGE_DIR:-}")"
+cx_prepare_runtime_marker "$MOUNT_SRC"
+CONTAINER_MOUNTS="$MOUNT_SRC:/ix"
+if [ "$CX_BENCH" = deepep-v2 ] || [ "$CX_BENCH" = deepep-hybrid ]; then
+  cx_set_failure_stage backend-setup
+  cx_prepare_backend_source "$MOUNT_SRC" "$CX_BENCH" \
+    || cx_die "cannot stage the pinned backend source"
+  export CX_BACKEND_SOURCE_ROOT=/ix/experimental/CollectiveX/.cx_sources
+fi
+if [ "$CX_BENCH" = deepep-v2 ]; then
+  cx_prepare_backend_cache "$CX_SQUASH_DIR" \
+    || cx_die "cannot prepare the isolated backend cache"
+  BACKEND_CACHE="$CX_PREPARED_BACKEND_CACHE"
+  CONTAINER_MOUNTS="$CONTAINER_MOUNTS,$BACKEND_CACHE:/cx-cache"
+  export CX_BACKEND_CACHE_ROOT=/cx-cache
+fi
+
+cx_set_failure_stage scheduler-allocation
+command -v salloc >/dev/null || cx_die "salloc not found on this runner"
+cx_require_single_node "$RUNNER"
+
+allocation=(--partition="$CX_PARTITION" --gres=gpu:"$NGPUS" --exclusive
+  --time="$TIME_MIN" --job-name="$RUNNER" "${ALLOC_EXTRA[@]}")
+[ -z "${CX_ACCOUNT:-}" ] || allocation+=(--account="$CX_ACCOUNT")
+[ -z "${CX_EXCLUDE_NODES:-}" ] || allocation+=(--exclude="$CX_EXCLUDE_NODES")
+cx_salloc_jobid "${allocation[@]}"
+[ -n "$JOB_ID" ] || cx_die "could not resolve allocated JOB_ID from salloc"
+if [ "$LOCAL_IMPORT" = 0 ]; then
+  cx_set_failure_stage container-import
+  SQUASH_FILE="$(cx_ensure_squash_on_job "$JOB_ID" "$CX_SQUASH_DIR" "$IMAGE")"
+  cx_set_failure_stage container-hash
+  cx_export_squash_identity "$SQUASH_FILE"
+fi
+cx_preflight_allocation "$JOB_ID" 1 "$MOUNT_SRC" "$SQUASH_FILE" "${CX_SHARD_FILE:-}"
+
+run_rc=0
+cx_set_failure_stage container-launch
+runtime_log="$(cx_private_log_path runtime)"
+srun --jobid="$JOB_ID" --container-image="$SQUASH_FILE" \
+  --container-mounts="$CONTAINER_MOUNTS" --no-container-mount-home \
+  --container-workdir=/ix/experimental/CollectiveX --no-container-entrypoint \
+  "${SRUN_EXTRA[@]}" --export="$(cx_container_exports)" \
+  bash /ix/experimental/CollectiveX/runtime/run_in_container.sh \
+  >"$runtime_log" 2>&1 || run_rc=$?
+cx_adopt_runtime_stage "$MOUNT_SRC"
+[ "$run_rc" = 0 ] || cx_fail_stage "$CX_FAILSAFE_MODE" "$runtime_log" || true
+collect_rc=0
+cx_collect_results "$MOUNT_SRC" "$REPO_ROOT" || collect_rc=$?
+[ "$run_rc" != 0 ] || [ "$collect_rc" = 0 ] || cx_set_failure_stage artifact-collection
+final_rc="$run_rc"
+[ "$final_rc" != 0 ] || final_rc="$collect_rc"
+cx_log "done - result artifacts collected"
+exit "$final_rc"
diff --git a/experimental/CollectiveX/publisher.py b/experimental/CollectiveX/publisher.py
new file mode 100644
index 0000000000..a90dc99970
--- /dev/null
+++ b/experimental/CollectiveX/publisher.py
@@ -0,0 +1,3167 @@
+#!/usr/bin/env python3
+"""Fail-closed filesystem publisher for CollectiveX EP v1 artifacts."""
+from __future__ import annotations
+
+import argparse
+import contextlib
+import datetime as dt
+import fcntl
+import hashlib
+import json
+import math
+import os
+from pathlib import Path, PurePosixPath
+import re
+import shutil
+import stat
+import statistics
+import sys
+import tempfile
+from typing import Any, Iterator, Sequence
+import zipfile
+
+import jsonschema
+
+HERE = Path(__file__).resolve().parent
+sys.path.insert(0, str(HERE))
+
+import artifact_safety  # noqa: E402
+import capability  # noqa: E402
+import contracts  # noqa: E402
+import identity  # noqa: E402
+import sweep_matrix  # noqa: E402
+
+FORMAT_BUNDLE = "collectivex.private.bundle.v1"
+FORMAT_PUBLIC = "collectivex.public.v1"
+FORMAT_CHANNEL = "collectivex.channel.v1"
+POLICY = "collectivex-decision-grade-v1"
+PUBLISHER_POLICY = "collectivex-publisher-v1"
+OUTCOMES = ("success", "unsupported", "failed", "invalid", "diagnostic")
+REQUIRED_ALLOCATIONS = 3
+REQUIRED_COHORT_KINDS = ("library", "chip", "system", "routing")
+REQUIRED_PROMOTION_COHORT_COUNTS = {"library": 48, "system": 12, "routing": 76}
+CANONICAL_FULL_V1_MATRIX_SHA256 = (
+    "292e05f8faccaa4971eda527a327190a9943e99d4f71611987f7b95f57f253e8"
+)
+CANONICAL_FULL_V1_CASE_CATALOG_SHA256 = (
+    "29a9e2d65777e0bf388d49bfe31f91e0ec6537dafdaa71ac91c6ed75f9e44b00"
+)
+P50_STABILITY_LIMIT = 1.10
+P99_STABILITY_LIMIT = 1.25
+MAX_ARCHIVE_MEMBERS = 20_000
+MAX_ARCHIVE_MEMBER_BYTES = 2 * 1024**3
+MAX_ARCHIVE_TOTAL_BYTES = 16 * 1024**3
+MAX_PUBLIC_DATASET_BYTES = 32 * 1024**2
+HEX64 = re.compile(r"[0-9a-f]{64}")
+SAFE_ID = re.compile(r"[a-z0-9][a-z0-9_.-]{0,127}")
+REASON = re.compile(r"[a-z0-9][a-z0-9.-]{0,95}")
+ARTIFACT_NAME = re.compile(
+    r"cx(?:unsupported|shard-[a-z0-9][a-z0-9_.-]{0,127})-[1-9][0-9]*-[1-9][0-9]*"
+)
+CHANNEL_PATH = re.compile(r"datasets/([0-9a-f]{64})/dataset\.json")
+SCHEMA_DIR = HERE / "schemas"
+_SCHEMAS: dict[str, jsonschema.protocols.Validator] = {}
+
+
+class PublisherError(ValueError):
+    """Input or stored state violates the publication contract."""
+
+
+strict_load = contracts.strict_load
+_canonical = contracts.canonical_json_bytes
+
+
+def _sha_bytes(data: bytes) -> str:
+    return hashlib.sha256(data).hexdigest()
+
+
+def _sha_file(path: Path) -> str:
+    digest = hashlib.sha256()
+    with path.open("rb") as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+
+def _latest_timestamp(values: Sequence[str]) -> str:
+    """Return the latest evidence timestamp without introducing publisher wall time."""
+    if not values:
+        raise PublisherError("cannot derive a timestamp without evidence")
+
+    def parsed(value: str) -> dt.datetime:
+        try:
+            timestamp = dt.datetime.fromisoformat(value.replace("Z", "+00:00"))
+        except ValueError as exc:
+            raise PublisherError("evidence timestamp is not ISO-8601") from exc
+        if timestamp.tzinfo is None:
+            raise PublisherError("evidence timestamp must include a timezone")
+        return timestamp.astimezone(dt.timezone.utc)
+
+    return max(values, key=lambda value: (parsed(value), value))
+
+
+def _schema(name: str, value: Any) -> None:
+    validator = _SCHEMAS.get(name)
+    if validator is None:
+        schema = strict_load(SCHEMA_DIR / name)
+        jsonschema.Draft202012Validator.check_schema(schema)
+        validator = jsonschema.Draft202012Validator(
+            schema, format_checker=jsonschema.FormatChecker()
+        )
+        _SCHEMAS[name] = validator
+    errors = sorted(validator.iter_errors(value), key=lambda error: list(error.absolute_path))
+    if errors:
+        error = errors[0]
+        location = ".".join(map(str, error.absolute_path)) or "$"
+        raise PublisherError(f"{name}:{location}: {error.message}")
+def _exact(obj: Any, fields: set[str], path: str) -> dict[str, Any]:
+    if not isinstance(obj, dict):
+        raise PublisherError(f"{path} must be an object")
+    actual = set(obj)
+    if actual != fields:
+        raise PublisherError(
+            f"{path} fields differ: missing={sorted(fields - actual)}, "
+            f"extra={sorted(actual - fields)}"
+        )
+    return obj
+def _array(value: Any, path: str, *, nonempty: bool = False) -> list[Any]:
+    if not isinstance(value, list) or (nonempty and not value):
+        qualifier = "a nonempty" if nonempty else "an"
+        raise PublisherError(f"{path} must be {qualifier} array")
+    return value
+
+
+def _integer(value: Any, path: str, *, minimum: int = 0) -> int:
+    if type(value) is not int or value < minimum:
+        raise PublisherError(f"{path} must be an integer >= {minimum}")
+    return value
+
+
+def _unique(values: Sequence[Any], path: str) -> None:
+    serialized = [_canonical(value) for value in values]
+    if len(serialized) != len(set(serialized)):
+        raise PublisherError(f"{path} contains duplicates")
+
+def _eligibility(value: dict[str, Any], path: str) -> dict[str, Any]:
+    allocations = value["allocation_ids"]
+    p50 = value["p50_max_min_ratio"]
+    p99 = value["p99_max_min_ratio"]
+    gates = (
+        len(allocations) >= REQUIRED_ALLOCATIONS,
+        value["complete"], value["correct"], value["measured_roundtrip_p99"],
+        value["stable_p50"], value["stable_p99"], value["stable_ordering"],
+        p50 is not None and p50 <= P50_STABILITY_LIMIT,
+        p99 is not None and p99 <= P99_STABILITY_LIMIT,
+    )
+    if value["decision_grade"] != (all(gates) and not value["reasons"]):
+        raise PublisherError(f"{path}.decision_grade does not match promotion gates")
+    if value["decision_grade"] == bool(value["reasons"]):
+        raise PublisherError(f"{path}.reasons does not match decision status")
+    return value
+
+
+def validate_channel(doc: Any, *, expected_channel: str | None = None) -> dict[str, Any]:
+    _schema("channel-v1.schema.json", doc)
+    if expected_channel and doc["channel"] != expected_channel:
+        raise PublisherError("channel name does not match its file")
+    target = doc["dataset"]
+    match = CHANNEL_PATH.fullmatch(target["path"]) if isinstance(target["path"], str) else None
+    if not match or match.group(1) != target["sha256"]:
+        raise PublisherError("channel dataset path and sha256 do not agree")
+    return doc
+
+
+def _metric_value(series: dict[str, Any], metric: dict[str, Any]) -> tuple[str, float, str]:
+    point = next(
+        (point for point in series["points"] if point["tokens_per_rank"] == metric["tokens_per_rank"]),
+        None,
+    )
+    if point is None or series["phase"] != metric["phase"]:
+        raise PublisherError("decision metric references an unavailable point")
+    component = point["components"]["roundtrip"]
+    if metric["measure"] == "latency_us":
+        value = component["latency_us"][metric["statistic"]]
+        unit = "us"
+    else:
+        rates = component["logical_payload_rate_gbps_at_latency_percentile"]
+        if rates is None:
+            raise PublisherError("logical bandwidth decision has no logical byte contract")
+        value = rates[metric["statistic"]]
+        unit = "GB/s"
+    return point["point_id"], value, unit
+
+
+def _validate_metric(metric: dict[str, Any]) -> None:
+    expected = "min" if metric["measure"] == "latency_us" else "max"
+    if metric["objective"] != expected:
+        raise PublisherError(f"{metric['measure']} objective must be {expected}")
+
+
+def _metric_label(measure: str, statistic: str) -> str:
+    return (
+        f"{statistic} latency"
+        if measure == "latency_us"
+        else f"payload rate at {statistic} latency"
+    )
+
+
+def _routing_build_control(build: dict[str, Any]) -> dict[str, Any]:
+    return {
+        key: build[key]
+        for key in (
+            "routing_control_sha256", "image_digest", "source_sha", "squash_sha256",
+        )
+    }
+
+
+def _routing_implementation_mismatch(members: Sequence[dict[str, Any]]) -> bool:
+    off_eplb_hashes = {
+        member["build"]["implementation_contract_sha256"]
+        for member in members if not member["workload"]["eplb"]
+    }
+    return len(off_eplb_hashes) > 1
+
+
+def _public_case_factors(series: dict[str, Any]) -> dict[str, Any]:
+    workload = series["workload"]
+    system = series["system"]
+    measurement = series["measurement"]
+    platform = capability.PLATFORMS[system["sku"]]
+    ep_size = system["ep_size"]
+    return {
+        "case": {
+            "backend": series["backend"]["id"],
+            "canonical": True,
+            "eplb": workload["eplb"],
+            "ep": ep_size,
+            "experts": workload["experts"],
+            "gpus_per_node": platform["gpus_per_node"],
+            "hidden": workload["hidden"],
+            "ladder": " ".join(str(point["tokens_per_rank"]) for point in series["points"]),
+            "nodes": ep_size // platform["gpus_per_node"],
+            "phase": series["phase"],
+            "required_publication": series["publication_tier"],
+            "routing": workload["routing"],
+            "samples_per_point": measurement["samples_per_component"],
+            "scale_up_domain": platform["scale_up_domain"],
+            "suite": series["suite"],
+            "timing": (
+                f"{measurement['iters']}:{measurement['trials']}:"
+                f"{measurement['warmups']}"
+            ),
+            "topk": workload["top_k"],
+            "warmup_semantics": sweep_matrix.ep_harness.WARMUP_SEMANTICS,
+            "workload": series["model"],
+        },
+        "profile": identity.V1_CASE_PROFILE,
+        "sku": system["sku"],
+    }
+
+
+def _public_series_config(series: dict[str, Any]) -> dict[str, Any]:
+    return {
+        "backend": {
+            "generation": series["backend"]["generation"],
+            "version": series["backend"]["version"],
+        },
+        "resource": series["resource"],
+        "system": {"label": series["system"]["label"]},
+    }
+
+
+def _public_cohort_factors(kind: str, item: dict[str, Any]) -> tuple[Any, Any]:
+    workload = item["workload"]
+    build = item["build"]
+    shape = {
+        key: workload[key]
+        for key in (
+            "hidden", "top_k", "experts", "dispatch_dtype", "combine_dtype",
+            "activation_profile",
+        )
+    }
+    common = {
+        "model": item["model"], "phase": item["phase"], "shape": shape,
+        "measurement": item["measurement"], "ep_size": item["system"]["ep_size"],
+    }
+    if kind == "library":
+        return (
+            {**common, "system": item["system"], "workload": workload,
+             "resource_mode": item["resource"]["mode"], "source": build["source_sha"]},
+            item["backend"]["id"],
+        )
+    if kind == "chip":
+        return (
+            {**common, "backend": item["backend"], "workload": workload,
+             "resource_mode": item["resource"]["mode"], "source": build["source_sha"]},
+            item["system"],
+        )
+    if kind == "system":
+        return {**common, "workload": workload, "source": build["source_sha"]}, [
+            item["system"]["sku"], item["backend"]["id"], item["resource"]["profile"]
+        ]
+    if kind == "routing":
+        return (
+            {**common, "backend": item["backend"], "system": item["system"],
+             "resource": item["resource"], "build": _routing_build_control(build)},
+            [workload["routing"], workload["eplb"],
+             build["implementation_contract_sha256"]],
+        )
+    raise PublisherError(f"unknown cohort kind {kind}")
+
+
+def _case_disposition_catalog_sha256(coverage: Sequence[dict[str, Any]]) -> str:
+    catalog = [
+        {"case_id": item["case_id"], "disposition": item["disposition"]}
+        for item in sorted(coverage, key=lambda item: item["case_id"])
+    ]
+    return _sha_bytes(_canonical(catalog))
+
+
+def validate_public_dataset(doc: Any) -> dict[str, Any]:
+    _schema("public-dataset-v1.schema.json", doc)
+    if len(_canonical(doc)) + 1 > MAX_PUBLIC_DATASET_BYTES:
+        raise PublisherError("public dataset exceeds the serving size limit")
+    try:
+        artifact_safety.assert_publication_safe([doc])
+    except artifact_safety.ArtifactSafetyError as exc:
+        raise PublisherError(str(exc)) from exc
+    if doc["source_bundle_ids"] != sorted(doc["source_bundle_ids"]):
+        raise PublisherError("source bundle IDs are not canonical")
+    for field, key in (
+        ("coverage", "case_id"), ("attempts", "attempt_id"),
+        ("series", "series_id"), ("cohorts", "cohort_id"),
+        ("rankings", "ranking_id"), ("recommendations", "recommendation_id"),
+        ("sensitivities", "sensitivity_id"),
+    ):
+        if doc[field] != sorted(doc[field], key=lambda item: item[key]):
+            raise PublisherError(f"{field} are not in canonical identity order")
+    promotion = doc["promotion"]
+    quarantined = promotion["status"] == "quarantined"
+    if quarantined != (promotion["reason"] is not None) or quarantined != (
+        promotion["matrix_id"] is None
+    ):
+        raise PublisherError("promotion reason/matrix identity differs from status")
+    attempts = {item["attempt_id"]: item for item in doc["attempts"]}
+    if len(attempts) != len(doc["attempts"]):
+        raise PublisherError("dataset has duplicate attempt IDs")
+    evidence = [
+        value["evidence_id"] for item in doc["attempts"] for value in item["evidence"]
+    ]
+    _unique(evidence, "dataset attempt evidence")
+    series = {item["series_id"]: item for item in doc["series"]}
+    if len(series) != len(doc["series"]):
+        raise PublisherError("dataset has duplicate series IDs")
+    allocation_ids = set(promotion["allocation_ids"])
+    case_ids = {item["case_id"] for item in doc["coverage"]}
+    if len(case_ids) != len(doc["coverage"]):
+        raise PublisherError("dataset has duplicate case coverage")
+    coverage_by_case = {item["case_id"]: item for item in doc["coverage"]}
+    for item in doc["attempts"]:
+        if item["case_id"] not in case_ids or item["allocation_id"] not in allocation_ids:
+            raise PublisherError("attempt references undeclared coverage or allocation")
+        if item["series_id"] is not None and item["series_id"] not in series:
+            raise PublisherError("attempt references unknown series")
+        if (item["outcome"] == "success") != (item["reason"] is None):
+            raise PublisherError("attempt reason must be null exactly for success")
+        if item["outcome"] == "success" and item["failure_mode"] is not None:
+            raise PublisherError("successful attempt cannot have a failure mode")
+        if (item["outcome"] == "success" and item["selected"]) != (
+            item["series_id"] is not None
+        ):
+            raise PublisherError("attempt series must be present exactly for selected success")
+    if {item["allocation_id"] for item in doc["attempts"]} != allocation_ids:
+        raise PublisherError("promotion allocation catalog differs from attempts")
+    attempt_groups: dict[tuple[str, str], list[dict[str, Any]]] = {}
+    for item in doc["attempts"]:
+        attempt_groups.setdefault((item["case_id"], item["allocation_id"]), []).append(item)
+    for (case_id, allocation_id), group in attempt_groups.items():
+        ordinals = sorted(item["attempt_index"] for item in group)
+        if ordinals != list(range(1, len(group) + 1)):
+            raise PublisherError("public retries must retain contiguous attempt indexes")
+        if any(
+            item["attempt_id"] != identity.attempt_id(
+                allocation=allocation_id, case=case_id, ordinal=item["attempt_index"]
+            )
+            for item in group
+        ):
+            raise PublisherError("public retry identity differs from its case/allocation/index")
+        selected = [item for item in group if item["selected"]]
+        if len(selected) != 1 or selected[0]["attempt_index"] != ordinals[-1]:
+            raise PublisherError("publisher must select the latest retry per case/allocation")
+    selected_by_series: dict[str, list[dict[str, Any]]] = {}
+    for item in doc["attempts"]:
+        if item["selected"] and item["outcome"] == "success":
+            selected_by_series.setdefault(item["series_id"], []).append(item)
+    terminal = 0
+    for item in doc["coverage"]:
+        listed = set(item["attempt_ids"])
+        selected = item["selected_attempt_id"]
+        expected_attempts = {
+            attempt_id for attempt_id, attempt in attempts.items()
+            if attempt["case_id"] == item["case_id"]
+        }
+        if listed != expected_attempts:
+            raise PublisherError("coverage references attempts from another case")
+        if selected is not None:
+            terminal += 1
+            if (selected not in listed or not attempts[selected]["selected"]
+                    or any(attempts[selected][field] != item[field]
+                           for field in ("outcome", "failure_mode", "reason"))):
+                raise PublisherError("coverage selected outcome differs")
+            selected_candidates = [attempts[value] for value in listed if attempts[value]["selected"]]
+            latest = max(
+                selected_candidates,
+                key=lambda attempt: (
+                    int(attempt["run_id"]), attempt["run_attempt"],
+                    attempt["attempt_index"], attempt["attempt_id"]
+                ),
+            )
+            if selected != latest["attempt_id"]:
+                raise PublisherError("coverage does not select the latest canonical allocation")
+    if promotion["requested_cases"] != len(doc["coverage"]) or promotion["terminal_cases"] != terminal:
+        raise PublisherError("promotion coverage counts differ")
+    selected_evidence: dict[tuple[str, str], set[str]] = {}
+    for attempt in doc["attempts"]:
+        if attempt["selected"] and attempt["series_id"] is not None:
+            for value in attempt["evidence"]:
+                selected_evidence.setdefault(
+                    (attempt["series_id"], value["point_id"]), set()
+                ).add(value["evidence_id"])
+    for item in doc["series"]:
+        eligibility = _eligibility(item["eligibility"], f"series {item['series_id']}")
+        workload = item["workload"]
+        model, hidden, top_k, experts = sweep_matrix.V1_WORKLOAD
+        suite_contract = sweep_matrix.V1_SUITE_CONTRACTS.get(item["suite"])
+        coordinate = (item["phase"], workload["routing"], workload["eplb"])
+        if (
+            item["model"] != model
+            or (workload["hidden"], workload["top_k"], workload["experts"])
+            != (hidden, top_k, experts)
+            or suite_contract is None
+            or coordinate not in suite_contract["coordinates"]
+            or item["publication_tier"] != suite_contract["publication"]
+        ):
+            raise PublisherError("series differs from the frozen v1 workload/suite profile")
+        backend_id = item["backend"]["id"]
+        expected_role = "reference" if backend_id == "nccl-ep" else "library"
+        if (
+            backend_id not in capability.BACKENDS
+            or item["backend"]["label"] != BACKEND_LABELS[backend_id]
+            or item["backend"]["role"] != expected_role
+            or item["backend"]["version"] is None
+        ):
+            raise PublisherError("series backend projection differs from v1")
+        sku = item["system"]["sku"]
+        platform = capability.PLATFORMS.get(sku)
+        ep_size = item["system"]["ep_size"]
+        if platform is None or ep_size % platform["gpus_per_node"]:
+            raise PublisherError("series system projection differs from v1")
+        nodes = ep_size // platform["gpus_per_node"]
+        supported, _ = capability.resolve(
+            sku, backend_id, nodes=nodes,
+            routing=workload["routing"], eplb=workload["eplb"],
+        )
+        if (
+            not supported
+            or item["system"]["vendor"] != platform["vendor"]
+            or item["system"]["transport"] != platform["transport"]
+            or item["system"]["topology_class"] != platform["topology_class"]
+            or item["system"]["world_size"] != ep_size
+            or platform["product"] not in set(
+                re.findall(r"[a-z]+\d+[a-z]*", item["system"]["label"].lower())
+            )
+        ):
+            raise PublisherError("series system projection differs from v1")
+        if contracts.public_series_config_sha256(_public_series_config(item)) != item[
+            "build"
+        ]["public_config_sha256"]:
+            raise PublisherError("public series configuration differs from its commitment")
+        covered = [coverage_by_case.get(case_id) for case_id in item["case_ids"]]
+        if not covered or any(
+            case is None
+            or (case["sku"], case["backend"], case["phase"])
+            != (sku, backend_id, item["phase"])
+            for case in covered
+        ):
+            raise PublisherError("series projection differs from its case coverage")
+        if (
+            item["eplb"]["enabled"] != item["workload"]["eplb"]
+            or item["eplb"]["logical_experts"] != item["workload"]["experts"]
+        ):
+            raise PublisherError("series EPLB descriptor differs from its workload")
+        eplb = item["eplb"]
+        expected_physical = eplb["logical_experts"] + eplb["redundant_experts"]
+        nullable_eplb = (
+            "planner", "mapping_sha256", "reference_tokens_per_rank", "max_replicas",
+            "imbalance_before", "imbalance_after",
+        )
+        if eplb["enabled"]:
+            if (
+                item["workload"]["routing"] != "zipf"
+                or any(eplb[field] is None for field in nullable_eplb)
+                or eplb["planner"] != "greedy-rank-major-v1"
+                or eplb["reference_tokens_per_rank"] != 2048
+                or eplb["redundant_experts"] != 32
+                or eplb["redundant_experts"] % ep_size != 0
+                or eplb["physical_experts"] != expected_physical
+                or eplb["logical_experts"] % ep_size != 0
+                or eplb["physical_experts"] % ep_size != 0
+                or not 1 <= eplb["replicated_experts"] <= min(
+                    eplb["logical_experts"], eplb["redundant_experts"]
+                )
+                or not 2 <= eplb["max_replicas"] <= 1 + eplb["redundant_experts"]
+                or not 1 <= eplb["imbalance_after"] <= eplb["imbalance_before"] <= ep_size
+            ):
+                raise PublisherError("enabled EPLB descriptor is incomplete")
+            expected_plan = contracts._expected_eplb_plan(
+                workload["routing"], workload["top_k"],
+                eplb["logical_experts"], eplb["physical_experts"], ep_size,
+                identity.V1_CASE_PROFILE["seed"],
+                identity.V1_CASE_PROFILE["eplb_reference_tokens_per_rank"],
+            )
+            expected_eplb = {
+                "enabled": True,
+                "planner": identity.V1_CASE_PROFILE["eplb_planner"],
+                "mapping_sha256": contracts.eplb_contract.mapping_hash(expected_plan),
+                "logical_experts": eplb["logical_experts"],
+                "physical_experts": eplb["physical_experts"],
+                "redundant_experts": identity.V1_CASE_PROFILE["eplb_redundant_experts"],
+                "reference_tokens_per_rank": identity.V1_CASE_PROFILE[
+                    "eplb_reference_tokens_per_rank"
+                ],
+                "replicated_experts": expected_plan["replicated_experts"],
+                "max_replicas": expected_plan["max_replicas"],
+                "imbalance_before": expected_plan["imbalance_before"],
+                "imbalance_after": expected_plan["imbalance_after"],
+            }
+            if eplb != expected_eplb:
+                raise PublisherError("enabled EPLB descriptor differs from deterministic plan")
+        elif (
+            any(eplb[field] is not None for field in nullable_eplb)
+            or eplb["physical_experts"] != expected_physical
+            or eplb["redundant_experts"] != 0
+            or eplb["replicated_experts"] != 0
+        ):
+            raise PublisherError("disabled EPLB descriptor claims a plan")
+        if item["backend"]["id"] == "nccl-ep":
+            expected_generation = (
+                "nccl" if item["system"]["vendor"] == "nvidia" else "rccl"
+            )
+            if item["backend"]["generation"] != expected_generation:
+                raise PublisherError("NCCL/RCCL reference generation differs from system vendor")
+        if (item["status"] == "decision-grade") != eligibility["decision_grade"]:
+            raise PublisherError("series status differs from eligibility")
+        if (
+            set(eligibility["allocation_ids"]) != set(item["allocation_ids"])
+            or eligibility["correct"] != all(point["correct"] for point in item["points"])
+        ):
+            raise PublisherError("series eligibility differs from its evidence")
+        selected_attempts = selected_by_series.get(item["series_id"], [])
+        if (
+            set(item["case_ids"]) != {attempt["case_id"] for attempt in selected_attempts}
+            or set(item["allocation_ids"])
+            != {attempt["allocation_id"] for attempt in selected_attempts}
+        ):
+            raise PublisherError("series case/allocation catalog differs from selected attempts")
+        if item["eligibility"]["decision_grade"] and len(
+            {attempt["run_id"] for attempt in selected_attempts}
+        ) < REQUIRED_ALLOCATIONS:
+            raise PublisherError("decision-grade series lacks independent workflow runs")
+        tokens = [point["tokens_per_rank"] for point in item["points"]]
+        if tokens != sorted(set(tokens)):
+            raise PublisherError("series points are not in unique ascending token order")
+        if len(item["case_ids"]) != 1:
+            raise PublisherError("public series must represent exactly one v1 case")
+        case_id = item["case_ids"][0]
+        if identity.digest("case", _public_case_factors(item)) != case_id:
+            raise PublisherError("public series projection differs from its case identity")
+        build = item["build"]
+        expected_series_id = identity.series_id({
+            "backend": backend_id,
+            "case_id": case_id,
+            "image_digest": build["image_digest"],
+            "implementation_contract_sha256": build[
+                "implementation_contract_sha256"
+            ],
+            "public_config_sha256": build["public_config_sha256"],
+            "routing_control_sha256": build["routing_control_sha256"],
+            "runtime_fingerprint_sha256": build["runtime_fingerprint_sha256"],
+            "source_sha": build["source_sha"],
+            "squash_sha256": build["squash_sha256"],
+            "workload_id": workload["workload_id"],
+        })
+        if item["series_id"] != expected_series_id:
+            raise PublisherError("public series identity differs from its committed factors")
+        for point in item["points"]:
+            if point["point_id"] != identity.point_id(series=item["series_id"], tokens_per_rank=point["tokens_per_rank"]):
+                raise PublisherError("point identity differs")
+            if point["global_tokens"] != point["tokens_per_rank"] * item["system"]["ep_size"]:
+                raise PublisherError("global_tokens must use EP size")
+            routing = point["routing"]
+            max_fanout = min(item["workload"]["top_k"], item["system"]["ep_size"])
+            if (
+                routing["routed_copies"] < point["global_tokens"]
+                or routing["routed_copies"] > point["global_tokens"] * max_fanout
+                or routing["recv_tokens_max"] > routing["routed_copies"]
+                or routing["recv_tokens_max"] * item["system"]["ep_size"]
+                < routing["routed_copies"]
+                or not math.isclose(
+                    routing["fanout_mean"],
+                    routing["routed_copies"] / point["global_tokens"],
+                    rel_tol=1e-12,
+                )
+                or routing["hotspot_ratio"] < 1
+                or routing["empty_expert_count"] >= eplb["physical_experts"]
+                or routing["empty_rank_count"] >= item["system"]["ep_size"]
+            ):
+                raise PublisherError("point routing/load facts are internally inconsistent")
+            expected_evidence = selected_evidence.get(
+                (item["series_id"], point["point_id"]), set()
+            )
+            if set(point["evidence_ids"]) != expected_evidence:
+                raise PublisherError("point evidence differs from selected series attempts")
+            components = point["components"]
+            if (components["dispatch"] is None) != (components["combine"] is None):
+                raise PublisherError("dispatch/combine availability differs")
+            for name, component in components.items():
+                if component is None:
+                    continue
+                expected_origin = "derived" if name == "isolated_sum" else "measured"
+                expected_samples = None if name == "isolated_sum" else 512
+                if component["origin"] != expected_origin or component["sample_count"] != expected_samples:
+                    raise PublisherError(f"{name} origin or sample count differs")
+                if name == "isolated_sum" and (
+                    component["logical_bytes"] is not None
+                    or component["logical_payload_rate_gbps_at_latency_percentile"] is not None
+                ):
+                    raise PublisherError("isolated_sum cannot publish logical bandwidth")
+                if name != "isolated_sum" and (
+                    component["logical_bytes"] is None
+                    or component["logical_payload_rate_gbps_at_latency_percentile"] is None
+                ):
+                    raise PublisherError(f"{name} measured logical bandwidth is missing")
+                latency = component["latency_us"]
+                if list(latency.values()) != sorted(latency.values()):
+                    raise PublisherError("latency percentiles are not ordered")
+                if component["logical_payload_rate_gbps_at_latency_percentile"] is not None:
+                    for statistic, rate in component["logical_payload_rate_gbps_at_latency_percentile"].items():
+                        expected = component["logical_bytes"] / (latency[statistic] * 1000.0)
+                        if not math.isclose(rate, expected, rel_tol=1e-9, abs_tol=1e-12):
+                            raise PublisherError("logical GB/s formula differs")
+            if components["roundtrip"] is None or components["roundtrip"]["origin"] != "measured":
+                raise PublisherError("roundtrip must be measured")
+            for statistic, throughput in point["roundtrip_token_rate_at_latency_percentile"].items():
+                expected = point["global_tokens"] / (
+                    components["roundtrip"]["latency_us"][statistic] * 1e-6
+                )
+                if not math.isclose(throughput, expected, rel_tol=1e-9):
+                    raise PublisherError("roundtrip token throughput formula differs")
+            if components["dispatch"] is not None:
+                derived = components["isolated_sum"]
+                if derived is None or any(not math.isclose(
+                    derived["latency_us"][statistic],
+                    components["dispatch"]["latency_us"][statistic]
+                    + components["combine"]["latency_us"][statistic], rel_tol=1e-12
+                ) for statistic in ("p50", "p90", "p95", "p99")):
+                    raise PublisherError("isolated_sum is not the component percentile sum")
+            elif components["isolated_sum"] is not None:
+                raise PublisherError("isolated_sum requires measured dispatch/combine components")
+    cohorts = {item["cohort_id"]: item for item in doc["cohorts"]}
+    if len(cohorts) != len(doc["cohorts"]):
+        raise PublisherError("dataset has duplicate cohort IDs")
+    for item in doc["cohorts"]:
+        if not set(item["series_ids"]).issubset(series):
+            raise PublisherError("cohort references unknown series")
+        members = [series[series_id] for series_id in item["series_ids"]]
+        expected_tier = (
+            "comparable-experimental"
+            if any(member["publication_tier"] == "comparable-experimental" for member in members)
+            else "official"
+        )
+        if item["publication_tier"] != expected_tier:
+            raise PublisherError("cohort publication tier differs from its members")
+        roles = {member["backend"]["role"] for member in members}
+        if item["kind"] == "library" and roles != {"library"}:
+            raise PublisherError("library cohort contains non-library evidence")
+        if item["kind"] == "system" and roles != {"reference"}:
+            raise PublisherError("system cohort is not a portable reference comparison")
+        if item["kind"] in {"chip", "routing"} and len(
+            {_canonical(member["backend"]) for member in members}
+        ) != 1:
+            raise PublisherError(f"{item['kind']} cohort mixes backend implementations")
+        public_factors = [_public_cohort_factors(item["kind"], member) for member in members]
+        if len({_canonical(value[0]) for value in public_factors}) != 1:
+            raise PublisherError(f"{item['kind']} cohort does not control its public factors")
+        if len({_canonical(value[1]) for value in public_factors}) < 2:
+            raise PublisherError(f"{item['kind']} cohort does not vary its declared contrast")
+        if item["kind"] == "routing":
+            if item["publication_tier"] != "comparable-experimental":
+                raise PublisherError("routing cohort must be experimental")
+            has_baseline = sum(
+                member["workload"]["routing"] == "uniform"
+                and not member["workload"]["eplb"]
+                for member in members
+            ) == 1
+            missing_reason = "missing-uniform-baseline" in item["eligibility"]["reasons"]
+            if has_baseline == missing_reason:
+                raise PublisherError("routing baseline and eligibility reason disagree")
+            mismatch = _routing_implementation_mismatch(members)
+            mismatch_reason = "implementation-config-mismatch" in item["eligibility"]["reasons"]
+            if mismatch != mismatch_reason:
+                raise PublisherError("routing implementation control and eligibility disagree")
+        expected_id = _derived_id("cxcohort-v1-", {
+            "kind": item["kind"], "series_ids": item["series_ids"],
+            "controlled_factors": item["controlled_factors"],
+            "varying_factors": item["varying_factors"],
+        })
+        if item["cohort_id"] != expected_id:
+            raise PublisherError("cohort ID differs from its public factors")
+        expected_factors = {
+            "library": (
+                ["system", "workload", "phase", "measurement", "resource.mode", "source"],
+                ["backend", "resource"],
+            ),
+            "chip": (
+                ["backend", "source", "workload", "phase", "measurement", "resource.mode"],
+                ["system", "resource"],
+            ),
+            "system": (
+                ["workload", "phase", "measurement", "source"],
+                ["system", "backend", "resource"],
+            ),
+            "routing": (
+                ["backend", "implementation-static-build", "system", "model-shape", "phase", "measurement", "resource"],
+                ["workload.routing", "workload.eplb", "implementation-config"],
+            ),
+        }[item["kind"]]
+        member_allocations = {
+            allocation for series_id in item["series_ids"]
+            for allocation in series[series_id]["allocation_ids"]
+        }
+        if (
+            (item["controlled_factors"], item["varying_factors"]) != expected_factors
+            or set(item["eligibility"]["allocation_ids"]) != member_allocations
+        ):
+            raise PublisherError("cohort factors or allocations differ from its members")
+        _eligibility(item["eligibility"], f"cohort {item['cohort_id']}")
+    expected_ranking_keys: set[tuple[str, str, str, int]] = set()
+    for cohort in doc["cohorts"]:
+        if not cohort["eligibility"]["decision_grade"]:
+            continue
+        members = [series[series_id] for series_id in cohort["series_ids"]]
+        tokens = set.intersection(*(
+            {point["tokens_per_rank"] for point in member["points"]}
+            for member in members
+        ))
+        expected_ranking_keys.update(
+            (cohort["cohort_id"], measure, statistic, token)
+            for token in tokens
+            for measure in ("latency_us", "logical_payload_rate_gbps_at_latency_percentile")
+            for statistic in ("p50", "p99")
+        )
+    ranking_top: dict[tuple[str, str, str, int], dict[str, Any]] = {}
+    ranking_ids: set[str] = set()
+    for ranking in doc["rankings"]:
+        cohort = cohorts.get(ranking["cohort_id"])
+        if (
+            cohort is None
+            or not cohort["eligibility"]["decision_grade"]
+            or ranking["eligibility"] != cohort["eligibility"]
+            or ranking["publication_tier"] != cohort["publication_tier"]
+        ):
+            raise PublisherError("ranking references an ineligible cohort")
+        entries = ranking["entries"]
+        _validate_metric(ranking["metric"])
+        if cohort["kind"] == "library" and any(
+            series[series_id]["backend"]["role"] == "reference"
+            for series_id in cohort["series_ids"]
+        ):
+            raise PublisherError("reference evidence cannot drive a library ranking")
+        if {entry["series_id"] for entry in entries} != set(cohort["series_ids"]):
+            raise PublisherError("ranking does not cover its cohort")
+        for entry in entries:
+            point_id, value, unit = _metric_value(series[entry["series_id"]], ranking["metric"])
+            if entry["point_id"] != point_id or entry["unit"] != unit or not math.isclose(entry["value"], value, rel_tol=1e-12):
+                raise PublisherError("ranking entry differs from series data")
+        reverse = ranking["metric"]["objective"] == "max"
+        expected = sorted(entries, key=lambda entry: (entry["value"], entry["series_id"]), reverse=reverse)
+        if entries != expected or [entry["rank"] for entry in entries] != list(range(1, len(entries) + 1)):
+            raise PublisherError("ranking order differs")
+        metric = ranking["metric"]
+        expected_id = _derived_id("cxranking-v1-", {
+            "cohort_id": ranking["cohort_id"], "metric": metric,
+        })
+        if ranking["ranking_id"] != expected_id or expected_id in ranking_ids:
+            raise PublisherError("ranking ID is duplicate or differs")
+        ranking_ids.add(expected_id)
+        ranking_top[(ranking["cohort_id"], metric["measure"], metric["statistic"], metric["tokens_per_rank"])] = entries[0]
+    if set(ranking_top) != expected_ranking_keys:
+        raise PublisherError("rankings do not cover every eligible cohort metric")
+    objective = {
+        "min-p50-latency": ("latency_us", "p50"), "min-p99-latency": ("latency_us", "p99"),
+        "max-payload-rate-at-p50-latency": (
+            "logical_payload_rate_gbps_at_latency_percentile", "p50"
+        ),
+        "max-payload-rate-at-p99-latency": (
+            "logical_payload_rate_gbps_at_latency_percentile", "p99"
+        ),
+    }
+    recommendation_ids: set[str] = set()
+    for item in doc["recommendations"]:
+        measure, statistic = objective[item["objective"]]
+        candidates = [top for key, top in ranking_top.items()
+                      if key[:3] == (item["cohort_id"], measure, statistic) and top["point_id"] == item["point_id"]]
+        if len(candidates) != 1 or any(item[field] != candidates[0][field] for field in ("series_id", "point_id", "value", "unit")):
+            raise PublisherError("recommendation is not a ranking winner")
+        matching_ranking = next(
+            ranking for ranking in doc["rankings"]
+            if ranking["cohort_id"] == item["cohort_id"]
+            and ranking["metric"]["measure"] == measure
+            and ranking["metric"]["statistic"] == statistic
+            and ranking["entries"][0]["point_id"] == item["point_id"]
+        )
+        expected_id = _derived_id("cxrecommendation-v1-", {
+            "objective": item["objective"], "ranking_id": matching_ranking["ranking_id"],
+        })
+        cohort = cohorts[item["cohort_id"]]
+        if (item["recommendation_id"] != expected_id or expected_id in recommendation_ids
+                or cohort["publication_tier"] != "official"
+                or item["publication_tier"] != "official"
+                or item["eligibility"] != cohort["eligibility"]):
+            raise PublisherError("recommendation ID/eligibility differs")
+        recommendation_ids.add(expected_id)
+    expected_recommendations = sum(
+        cohorts[ranking["cohort_id"]]["publication_tier"] == "official"
+        for ranking in doc["rankings"]
+    )
+    if len(doc["recommendations"]) != expected_recommendations:
+        raise PublisherError("recommendations do not cover every actionable ranking")
+    sensitivity_ids: set[str] = set()
+    sensitivity_keys: set[tuple[str, str, str, str, str, int]] = set()
+    for item in doc["sensitivities"]:
+        cohort = cohorts.get(item["cohort_id"])
+        if (
+            cohort is None
+            or cohort["kind"] != "routing"
+            or not cohort["eligibility"]["decision_grade"]
+            or item["publication_tier"] != cohort["publication_tier"]
+            or item["eligibility"] != cohort["eligibility"]
+        ):
+            raise PublisherError("sensitivity references a non-routing cohort")
+        if (
+            item["baseline_series_id"] == item["candidate_series_id"]
+            or not {item["baseline_series_id"], item["candidate_series_id"]}.issubset(cohort["series_ids"])
+        ):
+            raise PublisherError("sensitivity series differ from its routing cohort")
+        _validate_metric(item["metric"])
+        baseline_series = series[item["baseline_series_id"]]
+        if (
+            baseline_series["workload"]["routing"] != "uniform"
+            or baseline_series["workload"]["eplb"]
+        ):
+            raise PublisherError("sensitivity baseline is not uniform without EPLB")
+        _, baseline, _ = _metric_value(series[item["baseline_series_id"]], item["metric"])
+        _, candidate, _ = _metric_value(series[item["candidate_series_id"]], item["metric"])
+        if not math.isclose(item["signed_change_ratio"], (candidate - baseline) / baseline, rel_tol=1e-12):
+            raise PublisherError("sensitivity ratio differs")
+        expected_id = _derived_id("cxsensitivity-v1-", {
+            "baseline": item["baseline_series_id"],
+            "candidate": item["candidate_series_id"],
+            "cohort": item["cohort_id"], "metric": item["metric"],
+        })
+        if item["sensitivity_id"] != expected_id or expected_id in sensitivity_ids:
+            raise PublisherError("sensitivity ID is duplicate or differs")
+        sensitivity_ids.add(expected_id)
+        sensitivity_keys.add((
+            item["cohort_id"], item["baseline_series_id"], item["candidate_series_id"],
+            item["metric"]["measure"], item["metric"]["statistic"],
+            item["metric"]["tokens_per_rank"],
+        ))
+    expected_sensitivity_keys: set[tuple[str, str, str, str, str, int]] = set()
+    for cohort in doc["cohorts"]:
+        if cohort["kind"] != "routing" or not cohort["eligibility"]["decision_grade"]:
+            continue
+        members = [series[series_id] for series_id in cohort["series_ids"]]
+        baseline = next((
+            member for member in members
+            if member["workload"]["routing"] == "uniform" and not member["workload"]["eplb"]
+        ), None)
+        if baseline is None:
+            continue
+        tokens = set.intersection(*(
+            {point["tokens_per_rank"] for point in member["points"]}
+            for member in members
+        ))
+        expected_sensitivity_keys.update(
+            (cohort["cohort_id"], baseline["series_id"], candidate["series_id"],
+             measure, statistic, token)
+            for candidate in members if candidate is not baseline
+            for token in tokens
+            for measure in ("latency_us", "logical_payload_rate_gbps_at_latency_percentile")
+            for statistic in ("p50", "p99")
+        )
+    if sensitivity_keys != expected_sensitivity_keys:
+        raise PublisherError("sensitivities do not cover every routing contrast metric")
+    if promotion["status"] == "promoted":
+        run_ids = {item["run_id"] for item in doc["attempts"] if item["selected"]}
+        repeated_cases = all(
+            len({
+                attempts[attempt_id]["run_id"]
+                for attempt_id in coverage["attempt_ids"]
+                if attempts[attempt_id]["selected"]
+            }) == REQUIRED_ALLOCATIONS
+            for coverage in doc["coverage"]
+        )
+        if promotion["matrix_id"] != CANONICAL_FULL_V1_MATRIX_SHA256:
+            raise PublisherError("promotion requires the canonical full-v1 matrix")
+        if (
+            _case_disposition_catalog_sha256(doc["coverage"])
+            != CANONICAL_FULL_V1_CASE_CATALOG_SHA256
+        ):
+            raise PublisherError("promotion requires the canonical case/disposition catalog")
+        if (
+            terminal != len(doc["coverage"])
+            or len(doc["source_bundle_ids"]) != REQUIRED_ALLOCATIONS
+            or len(run_ids) != REQUIRED_ALLOCATIONS
+            or not repeated_cases
+        ):
+            raise PublisherError("promoted dataset lacks complete coverage")
+        expected_outcomes = {
+            item["case_id"]: (
+                "success" if item["disposition"] == "runnable" else "unsupported"
+            )
+            for item in doc["coverage"]
+        }
+        if any(
+            item["selected"]
+            and item["outcome"] != expected_outcomes[item["case_id"]]
+            for item in doc["attempts"]
+        ):
+            raise PublisherError("promoted outcomes differ from requested dispositions")
+        runnable_cases = {
+            item["case_id"] for item in doc["coverage"]
+            if item["disposition"] == "runnable"
+        }
+        if any(
+            item["case_id"] in runnable_cases and item["outcome"] != "success"
+            for item in doc["attempts"]
+        ):
+            raise PublisherError(
+                "promotion rejects runnable cases with failed, invalid, or diagnostic retries"
+            )
+        _require_promotion_series(doc["series"])
+        _require_promotion_cohorts(doc["cohorts"], doc["series"])
+        if not doc["rankings"] or not doc["recommendations"]:
+            raise PublisherError("promoted dataset lacks eligible decisions")
+    if promotion["status"] == "quarantined" and any((
+        doc["source_bundle_ids"], promotion["allocation_ids"], doc["coverage"],
+        doc["attempts"], doc["series"], doc["cohorts"], doc["rankings"],
+        doc["recommendations"], doc["sensitivities"],
+    )):
+        raise PublisherError("quarantined dataset exposes unvalidated evidence")
+    return doc
+
+
+def _file_record(value: Any, path: str) -> dict[str, Any]:
+    item = _exact(value, {"path", "sha256", "bytes"}, path)
+    if not isinstance(item["path"], str) or PurePosixPath(item["path"]).is_absolute() or ".." in PurePosixPath(item["path"]).parts:
+        raise PublisherError(f"{path}.path is unsafe")
+    if not isinstance(item["sha256"], str) or HEX64.fullmatch(item["sha256"]) is None:
+        raise PublisherError(f"{path}.sha256 is invalid")
+    _integer(item["bytes"], f"{path}.bytes", minimum=1)
+    return item
+
+def validate_bundle_manifest(doc: Any) -> dict[str, Any]:
+    _schema("private-bundle-v1.schema.json", doc)
+    attempts = {item["attempt_id"]: item for item in doc["attempts"]}
+    if len(attempts) != len(doc["attempts"]):
+        raise PublisherError("bundle has duplicate attempt IDs")
+    selections = doc["coverage"]["selections"]
+    if len({item["case_id"] for item in selections}) != len(selections):
+        raise PublisherError("bundle has duplicate selected cases")
+    counts = {name: 0 for name in OUTCOMES}
+    for selection in selections:
+        attempt = attempts.get(selection["selected_attempt_id"])
+        if attempt is None or not attempt["selected"] or attempt["case_id"] != selection["case_id"] or attempt["outcome"] != selection["outcome"]:
+            raise PublisherError("bundle selection differs from retained attempt")
+        counts[selection["outcome"]] += 1
+    coverage = doc["coverage"]
+    if coverage["terminal_cases"] != len(selections) or coverage["outcome_counts"] != counts:
+        raise PublisherError("bundle terminal counts differ")
+    if coverage["complete"] != (coverage["expected_cases"] == len(selections)):
+        raise PublisherError("bundle completeness differs from coverage")
+    fingerprints: dict[str, set[str]] = {}
+    for attempt in doc["attempts"]:
+        value = attempt["runtime_fingerprint_sha256"]
+        if value:
+            fingerprints.setdefault(attempt["allocation_id"], set()).add(value)
+    if any(len(values) != 1 for values in fingerprints.values()):
+        raise PublisherError("bundle runtime is heterogeneous within an allocation")
+    return doc
+
+
+def _fsync_dir(path: Path) -> None:
+    descriptor = os.open(path, os.O_RDONLY | getattr(os, "O_DIRECTORY", 0))
+    try:
+        os.fsync(descriptor)
+    finally:
+        os.close(descriptor)
+
+
+def _write_bytes(path: Path, data: bytes, *, mode: int) -> None:
+    descriptor = os.open(
+        path,
+        os.O_WRONLY | os.O_CREAT | os.O_EXCL | getattr(os, "O_NOFOLLOW", 0),
+        mode,
+    )
+    try:
+        os.fchmod(descriptor, mode)
+        with os.fdopen(descriptor, "wb", closefd=False) as handle:
+            handle.write(data)
+            handle.flush()
+            os.fsync(handle.fileno())
+    finally:
+        os.close(descriptor)
+
+
+def _write_all(descriptor: int, data: bytes) -> None:
+    view = memoryview(data)
+    while view:
+        view = view[os.write(descriptor, view):]
+
+
+def _write_json(path: Path, value: Any, *, mode: int) -> bytes:
+    data = _canonical(value) + b"\n"
+    _write_bytes(path, data, mode=mode)
+    return data
+
+
+def _file_metadata(path: Path, relative_to: Path) -> dict[str, Any]:
+    return {
+        "path": path.relative_to(relative_to).as_posix(),
+        "sha256": _sha_file(path),
+        "bytes": path.stat().st_size,
+    }
+
+
+def _tree_files(root: Path) -> list[Path]:
+    return sorted(
+        path for path in root.rglob("*")
+        if path.is_file() and not path.is_symlink() and path.name != "COMPLETE"
+    )
+
+
+def _verify_regular_file(path: Path, expected_mode: int) -> None:
+    _reject_symlinked_path(path.parent)
+    try:
+        metadata = os.lstat(path)
+    except FileNotFoundError as exc:
+        raise PublisherError(f"required file is missing: {path.name}") from exc
+    if (
+        not stat.S_ISREG(metadata.st_mode)
+        or metadata.st_uid != os.getuid()
+        or stat.S_IMODE(metadata.st_mode) != expected_mode
+    ):
+        raise PublisherError(
+            f"file is not an owned regular {expected_mode:o} object: {path.name}"
+        )
+
+
+def _verify_frozen_tree(root: Path, *, private: bool) -> None:
+    _reject_symlinked_path(root)
+    directory_mode = 0o500 if private else 0o555
+    file_mode = 0o400 if private else 0o444
+    try:
+        root_metadata = os.lstat(root)
+    except OSError as exc:
+        raise PublisherError(f"cannot inspect immutable object: {root.name}") from exc
+    if not stat.S_ISDIR(root_metadata.st_mode):
+        raise PublisherError(f"immutable object is not a real directory: {root.name}")
+    try:
+        entries = [root, *root.rglob("*")]
+    except OSError as exc:
+        raise PublisherError(f"cannot inspect immutable object: {root.name}") from exc
+    for path in entries:
+        metadata = os.lstat(path)
+        if metadata.st_uid != os.getuid():
+            raise PublisherError(f"immutable object has the wrong owner: {path.name}")
+        if stat.S_ISDIR(metadata.st_mode):
+            expected = directory_mode
+        elif stat.S_ISREG(metadata.st_mode):
+            expected = file_mode
+        else:
+            raise PublisherError(f"immutable object contains a linked or special entry: {path.name}")
+        if stat.S_IMODE(metadata.st_mode) != expected:
+            raise PublisherError(
+                f"immutable object mode differs for {path.name}: expected {expected:o}"
+            )
+
+
+def _freeze_tree(root: Path, *, private: bool) -> None:
+    files: list[Path] = []
+    directories = [root]
+    for path in root.rglob("*"):
+        metadata = os.lstat(path)
+        if stat.S_ISDIR(metadata.st_mode):
+            directories.append(path)
+        elif stat.S_ISREG(metadata.st_mode):
+            files.append(path)
+        else:
+            raise PublisherError(f"immutable object contains a linked or special entry: {path.name}")
+    for path in files:
+        os.chmod(path, 0o400 if private else 0o444)
+    for path in sorted(directories, key=lambda item: len(item.parts), reverse=True):
+        os.chmod(path, 0o500 if private else 0o555)
+        _fsync_dir(path)
+    _verify_frozen_tree(root, private=private)
+
+
+def _reject_symlinked_path(path: Path) -> None:
+    current = Path(path.anchor)
+    for part in path.parts[1:]:
+        current /= part
+        try:
+            metadata = os.lstat(current)
+        except FileNotFoundError:
+            break
+        if stat.S_ISLNK(metadata.st_mode):
+            raise PublisherError("COLLECTIVEX_STORE_ROOT must not traverse a symlinked parent")
+        if not stat.S_ISDIR(metadata.st_mode):
+            raise PublisherError(f"store path component is not a directory: {current}")
+
+
+class Store:
+    """Atomic private/public directory operations on one operator filesystem."""
+
+    def __init__(self, root: str | os.PathLike[str]):
+        candidate = Path(os.path.abspath(os.path.expanduser(root)))
+        _reject_symlinked_path(candidate)
+        candidate.mkdir(parents=True, exist_ok=True, mode=0o750)
+        resolved = candidate.resolve()
+        if candidate != resolved:
+            raise PublisherError(
+                "COLLECTIVEX_STORE_ROOT must not traverse a symlinked parent"
+            )
+        root_metadata = candidate.stat()
+        if root_metadata.st_uid != os.getuid() or stat.S_IMODE(root_metadata.st_mode) & 0o022:
+            raise PublisherError(
+                "COLLECTIVEX_STORE_ROOT must be owned by this user and not group/world writable"
+            )
+        os.chmod(candidate, 0o750)
+        if stat.S_IMODE(candidate.stat().st_mode) != 0o750:
+            raise PublisherError("COLLECTIVEX_STORE_ROOT mode must be 750")
+        self.root = resolved
+        raw = self.root
+        self.private = raw / "private"
+        self.incoming = self.private / "incoming"
+        self.bundles = self.private / "bundles"
+        self.quarantine = self.private / "quarantine"
+        self.public = raw / "public"
+        self.datasets = self.public / "datasets"
+        self.channels = self.public / "channels"
+        self.locks = raw / "locks"
+        for path, mode in (
+            (self.private, 0o700), (self.incoming, 0o700), (self.bundles, 0o700),
+            (self.quarantine, 0o700), (self.public, 0o755), (self.datasets, 0o755),
+            (self.channels, 0o755), (self.locks, 0o700),
+        ):
+            path.mkdir(parents=True, exist_ok=True, mode=mode)
+            if path.is_symlink() or not path.is_dir():
+                raise PublisherError(f"store path is not a real directory: {path}")
+            os.chmod(path, mode)
+
+    @contextlib.contextmanager
+    def locked(self) -> Iterator[None]:
+        lock_path = self.locks / "publisher.lock"
+        descriptor = os.open(
+            lock_path,
+            os.O_RDWR | os.O_CREAT | getattr(os, "O_NOFOLLOW", 0),
+            0o600,
+        )
+        try:
+            os.fchmod(descriptor, 0o600)
+            metadata = os.fstat(descriptor)
+            if (
+                not stat.S_ISREG(metadata.st_mode)
+                or metadata.st_uid != os.getuid()
+                or stat.S_IMODE(metadata.st_mode) != 0o600
+            ):
+                raise PublisherError("publisher lock is not an owned regular 600 file")
+            fcntl.flock(descriptor, fcntl.LOCK_EX)
+            yield
+        finally:
+            fcntl.flock(descriptor, fcntl.LOCK_UN)
+            os.close(descriptor)
+
+    @contextlib.contextmanager
+    def staging(self, parent: Path, *, private: bool) -> Iterator[Path]:
+        stage = Path(tempfile.mkdtemp(prefix=".staging-", dir=parent))
+        os.chmod(stage, 0o700 if private else 0o755)
+        try:
+            yield stage
+        finally:
+            if stage.exists():
+                for path in stage.rglob("*"):
+                    metadata = os.lstat(path)
+                    if stat.S_ISDIR(metadata.st_mode):
+                        os.chmod(path, 0o700)
+                    elif stat.S_ISREG(metadata.st_mode):
+                        os.chmod(path, 0o600)
+                os.chmod(stage, 0o700)
+            shutil.rmtree(stage, ignore_errors=True)
+
+    @staticmethod
+    def complete(stage: Path, value: str, *, private: bool) -> None:
+        _write_bytes(stage / "COMPLETE", (value + "\n").encode(), mode=0o600 if private else 0o644)
+        _fsync_dir(stage)
+
+    @staticmethod
+    def install(stage: Path, destination: Path, *, private: bool) -> None:
+        if destination.is_symlink():
+            raise PublisherError(f"immutable destination is a symlink: {destination.name}")
+        if destination.exists():
+            _verify_frozen_tree(destination, private=private)
+            marker = destination / "COMPLETE"
+            if not marker.is_file() or marker.read_text().strip() != destination.name:
+                raise PublisherError(f"immutable destination is incomplete: {destination.name}")
+            return
+        _freeze_tree(stage, private=private)
+        os.rename(stage, destination)
+        _fsync_dir(destination.parent)
+        _verify_frozen_tree(destination, private=private)
+
+    def install_dataset(self, dataset: dict[str, Any]) -> tuple[str, int]:
+        validate_public_dataset(dataset)
+        payload = _canonical(dataset) + b"\n"
+        if len(payload) > MAX_PUBLIC_DATASET_BYTES:
+            raise PublisherError("public dataset exceeds the serving size limit")
+        digest = _sha_bytes(payload)
+        destination = self.datasets / digest
+        with self.staging(self.datasets, private=False) as stage:
+            _write_bytes(stage / "dataset.json", payload, mode=0o644)
+            self.complete(stage, digest, private=False)
+            self.install(stage, destination, private=False)
+        stored = destination / "dataset.json"
+        marker = destination / "COMPLETE"
+        if (not marker.is_file() or marker.read_text().strip() != digest
+                or _sha_file(stored) != digest or stored.stat().st_size != len(payload)):
+            raise PublisherError("stored dataset checksum differs after installation")
+        return digest, len(payload)
+
+    def update_channel(self, channel: str, digest: str, size: int, generated_at: str) -> None:
+        if size > MAX_PUBLIC_DATASET_BYTES:
+            raise PublisherError("channel dataset exceeds the serving size limit")
+        _verify_frozen_tree(self.datasets / digest, private=False)
+        marker = self.datasets / digest / "COMPLETE"
+        if not marker.is_file() or marker.read_text().strip() != digest:
+            raise PublisherError("cannot advance a channel to an incomplete dataset")
+        dataset_path = self.datasets / digest / "dataset.json"
+        dataset = validate_public_dataset(strict_load(dataset_path))
+        if (
+            _sha_file(dataset_path) != digest
+            or dataset_path.stat().st_size != size
+            or dataset["generated_at"] != generated_at
+        ):
+            raise PublisherError("channel metadata differs from its stored dataset")
+        if channel == "dev-latest" and dataset["promotion"]["status"] != "promoted":
+            raise PublisherError("dev-latest may only reference a promoted dataset")
+        pointer = {
+            "format": FORMAT_CHANNEL,
+            "channel": channel,
+            "dataset": {
+                "path": f"datasets/{digest}/dataset.json",
+                "sha256": digest,
+                "bytes": size,
+            },
+            "generated_at": generated_at,
+        }
+        validate_channel(pointer, expected_channel=channel)
+        destination = self.channels / f"{channel}.json"
+        temporary = self.channels / f".{channel}.tmp-{os.getpid()}"
+        try:
+            data = _canonical(pointer) + b"\n"
+            _write_bytes(temporary, data, mode=0o644)
+            os.replace(temporary, destination)
+            _fsync_dir(self.channels)
+        finally:
+            temporary.unlink(missing_ok=True)
+
+    def verify_channel(self, channel: str) -> dict[str, Any]:
+        channel_path = self.channels / f"{channel}.json"
+        _verify_regular_file(channel_path, 0o644)
+        pointer = validate_channel(strict_load(channel_path), expected_channel=channel)
+        target = self.public / pointer["dataset"]["path"]
+        _verify_frozen_tree(target.parent, private=False)
+        if target.stat().st_size != pointer["dataset"]["bytes"] or _sha_file(target) != pointer["dataset"]["sha256"]:
+            raise PublisherError(f"channel {channel} dataset checksum differs")
+        marker = target.parent / "COMPLETE"
+        if not marker.is_file() or marker.read_text().strip() != pointer["dataset"]["sha256"]:
+            raise PublisherError(f"channel {channel} dataset is incomplete")
+        dataset = validate_public_dataset(strict_load(target))
+        if pointer["generated_at"] != dataset["generated_at"]:
+            raise PublisherError(f"channel {channel} metadata differs from its dataset")
+        if channel == "dev-latest" and dataset["promotion"]["status"] != "promoted":
+            raise PublisherError("dev-latest points to a non-promoted dataset")
+        return pointer
+
+
+def _copy_source(source: Path, destination: Path) -> None:
+    if source.is_symlink() or not source.is_file() or not stat.S_ISREG(source.stat().st_mode):
+        raise PublisherError(f"source must be a regular non-symlink file: {source}")
+    descriptor = os.open(source, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+    try:
+        output = os.open(destination, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o600)
+        try:
+            while True:
+                chunk = os.read(descriptor, 1024 * 1024)
+                if not chunk:
+                    break
+                _write_all(output, chunk)
+            os.fsync(output)
+        finally:
+            os.close(output)
+    finally:
+        os.close(descriptor)
+
+
+def _archive_download_directory(source: Path, destination: Path) -> None:
+    if source.is_symlink() or not source.is_dir():
+        raise PublisherError(f"artifact directory is invalid: {source}")
+    files: list[Path] = []
+    for path in source.rglob("*"):
+        if path.is_symlink():
+            raise PublisherError("artifact directory contains a symlink")
+        if path.is_dir():
+            continue
+        if not path.is_file():
+            raise PublisherError("artifact directory contains a non-regular entry")
+        files.append(path)
+    files.sort()
+    if not files or len(files) > MAX_ARCHIVE_MEMBERS:
+        raise PublisherError("artifact directory has an invalid file count")
+    total = 0
+    with zipfile.ZipFile(destination, "x", compression=zipfile.ZIP_STORED) as archive:
+        for path in files:
+            descriptor = os.open(path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+            with os.fdopen(descriptor, "rb") as handle:
+                metadata = os.fstat(handle.fileno())
+                if not stat.S_ISREG(metadata.st_mode):
+                    raise PublisherError("artifact directory member changed type")
+                size = metadata.st_size
+                total += size
+                if size > MAX_ARCHIVE_MEMBER_BYTES or total > MAX_ARCHIVE_TOTAL_BYTES:
+                    raise PublisherError("artifact directory exceeds size limits")
+                relative = path.relative_to(source).as_posix()
+                _safe_member(relative)
+                info = zipfile.ZipInfo(relative, date_time=(1980, 1, 1, 0, 0, 0))
+                info.compress_type = zipfile.ZIP_STORED
+                info.external_attr = (stat.S_IFREG | 0o600) << 16
+                with archive.open(info, "w") as output:
+                    written = 0
+                    for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+                        output.write(chunk)
+                        written += len(chunk)
+                    if written != size:
+                        raise PublisherError("artifact directory member changed size")
+    descriptor = os.open(destination, os.O_RDONLY)
+    try:
+        os.fsync(descriptor)
+    finally:
+        os.close(descriptor)
+
+
+def _artifact_name(source: Path) -> str:
+    name = source.name if source.is_dir() else source.name.removesuffix(".zip")
+    if (
+        not source.is_dir() and source.suffix != ".zip"
+        or ARTIFACT_NAME.fullmatch(name) is None
+    ):
+        raise PublisherError(f"artifact source has an invalid GHA name: {source.name}")
+    return name
+
+
+def archive_incoming(
+    store: Store,
+    matrix: Path,
+    artifacts: Sequence[Path],
+    run: dict[str, Any],
+) -> tuple[str, Path, list[dict[str, Any]]]:
+    """Copy exact delivery bytes into immutable incoming before any JSON/ZIP parse."""
+    if not artifacts:
+        raise PublisherError("at least one GitHub artifact archive is required")
+    with store.staging(store.incoming, private=True) as stage:
+        sources = stage / "sources"
+        sources.mkdir(mode=0o700)
+        copied: list[dict[str, Any]] = []
+        named_artifacts = sorted(
+            ((_artifact_name(path), path) for path in artifacts), key=lambda item: item[0]
+        )
+        artifact_names = [name for name, _ in named_artifacts]
+        if len(artifact_names) != len(set(artifact_names)):
+            raise PublisherError("artifact delivery contains duplicate GHA names")
+        inputs = [("matrix.json", matrix, "matrix", None)] + [
+            (f"artifact-{index:04d}.zip", path, "artifact", artifact_name)
+            for index, (artifact_name, path) in enumerate(named_artifacts)
+        ]
+        for name, source, kind, artifact_name in inputs:
+            destination = sources / name
+            if source.is_dir():
+                _archive_download_directory(source, destination)
+            else:
+                if source != matrix and source.stat().st_size > MAX_ARCHIVE_TOTAL_BYTES:
+                    raise PublisherError("artifact archive exceeds the size limit")
+                _copy_source(source, destination)
+            copied.append({
+                **_file_metadata(destination, stage),
+                "kind": kind,
+                "artifact_name": artifact_name,
+            })
+        ingest_id = _sha_bytes(_canonical({"run": run, "sources": copied}))
+        incoming_manifest = {
+            "format": "collectivex.incoming.v1",
+            "schema_version": 1,
+            "ingest_id": ingest_id,
+            "run": run,
+            "sources": copied,
+        }
+        _write_json(stage / "incoming.json", incoming_manifest, mode=0o600)
+        store.complete(stage, ingest_id, private=True)
+        destination = store.incoming / ingest_id
+        store.install(stage, destination, private=True)
+    installed = store.incoming / ingest_id
+    if strict_load(installed / "incoming.json") != incoming_manifest:
+        raise PublisherError("existing incoming object differs from archived delivery")
+    for record in copied:
+        _resolve_bundle_file(installed, record)
+    return ingest_id, installed, copied
+
+
+def _safe_member(name: str) -> PurePosixPath:
+    if "\\" in name or "\0" in name:
+        raise PublisherError("archive member has an unsafe separator")
+    path = PurePosixPath(name)
+    if path.is_absolute() or not path.parts or any(part in {"", ".", ".."} for part in path.parts):
+        raise PublisherError("archive member path escapes its artifact")
+    return path
+
+
+def extract_archive(archive: Path, destination: Path) -> list[Path]:
+    """Extract a bounded regular-file ZIP without trusting member paths or links."""
+    try:
+        handle = zipfile.ZipFile(archive)
+    except (OSError, zipfile.BadZipFile) as exc:
+        raise PublisherError("artifact is not a valid ZIP archive") from exc
+    extracted: list[Path] = []
+    seen: set[str] = set()
+    total = 0
+    with handle:
+        members = handle.infolist()
+        if not members or len(members) > MAX_ARCHIVE_MEMBERS:
+            raise PublisherError("artifact has an invalid member count")
+        for member in members:
+            path = _safe_member(member.filename.rstrip("/"))
+            key = path.as_posix()
+            if key in seen:
+                raise PublisherError("artifact contains duplicate member paths")
+            seen.add(key)
+            mode = member.external_attr >> 16
+            if stat.S_ISLNK(mode) or (mode and not (stat.S_ISREG(mode) or stat.S_ISDIR(mode))):
+                raise PublisherError("artifact contains a non-regular member")
+            if member.flag_bits & 0x1:
+                raise PublisherError("encrypted artifact members are not accepted")
+            if member.file_size > MAX_ARCHIVE_MEMBER_BYTES:
+                raise PublisherError("artifact member exceeds the size limit")
+            total += member.file_size
+            if total > MAX_ARCHIVE_TOTAL_BYTES:
+                raise PublisherError("artifact exceeds the expanded size limit")
+            target = destination.joinpath(*path.parts)
+            if member.is_dir():
+                target.mkdir(parents=True, exist_ok=True, mode=0o700)
+                continue
+            target.parent.mkdir(parents=True, exist_ok=True, mode=0o700)
+            output = os.open(target, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o600)
+            try:
+                with handle.open(member, "r") as source:
+                    written = 0
+                    while True:
+                        chunk = source.read(1024 * 1024)
+                        if not chunk:
+                            break
+                        _write_all(output, chunk)
+                        written += len(chunk)
+                    if written != member.file_size:
+                        raise PublisherError("artifact member size changed during extraction")
+                os.fsync(output)
+            finally:
+                os.close(output)
+            extracted.append(target)
+    return extracted
+
+
+def validate_matrix(document: Any) -> list[dict[str, Any]]:
+    try:
+        artifact_safety.assert_publication_safe([document])
+        matrix = sweep_matrix.validate_matrix_document(document)
+    except (SystemExit, ValueError, artifact_safety.ArtifactSafetyError) as exc:
+        raise PublisherError(f"requested matrix is invalid: {exc}") from exc
+    return [
+        {
+            "sku": item["sku"],
+            **item["case"],
+            "_disposition": item["disposition"],
+            "_reason": item["reason"],
+        }
+        for item in matrix["requested_cases"]
+    ]
+
+
+def _expected_deliveries(
+    matrix: dict[str, Any], cases: Sequence[dict[str, Any]], run: dict[str, Any]
+) -> dict[str, tuple[str, str, str]]:
+    shard_by_case: dict[str, str] = {}
+    for shard in matrix["include"]:
+        for case_id in shard["case_ids"]:
+            if case_id in shard_by_case:
+                raise PublisherError("requested case appears in two runnable shards")
+            shard_by_case[case_id] = shard["id"]
+    suffix = f"{run['run_id']}-{run['run_attempt']}"
+    deliveries: dict[str, tuple[str, str, str]] = {}
+    for case in cases:
+        case_id = case["case_id"]
+        if case["_disposition"] == "unsupported":
+            deliveries[case_id] = (
+                f"cxunsupported-{suffix}", "setup",
+                f"{run['run_id']}_{run['run_attempt']}_unsupported",
+            )
+            continue
+        shard_id = shard_by_case.get(case_id)
+        if shard_id is None:
+            raise PublisherError("runnable case has no matrix shard")
+        deliveries[case_id] = (
+            f"cxshard-{shard_id}-{suffix}", "sweep",
+            f"{run['run_id']}_{run['run_attempt']}_{shard_id}",
+        )
+    return deliveries
+
+
+def _document_git_run(document: dict[str, Any]) -> dict[str, Any] | None:
+    provenance = document.get("provenance")
+    if not isinstance(provenance, dict):
+        return None
+    value = provenance.get("git_run", provenance)
+    return value if isinstance(value, dict) else None
+
+
+def _run_matches(document: dict[str, Any], run: dict[str, Any]) -> bool:
+    git_run = _document_git_run(document)
+    if git_run is None:
+        return False
+    return (
+        str(git_run.get("run_id")) == run["run_id"]
+        and str(git_run.get("run_attempt")) == str(run["run_attempt"])
+        and git_run.get("source_sha") == run["source_sha"]
+        and (git_run.get("repo") or git_run.get("repository")) == run["repository"]
+    )
+
+
+def _case_matches(document: dict[str, Any], expected: dict[str, Any]) -> bool:
+    scheduled = {
+        key: value for key, value in expected.items()
+        if key not in {"sku", "case_id"} and not key.startswith("_")
+    }
+    return document.get("identity", {}).get("case_factors") == {
+        "case": scheduled,
+        "profile": identity.V1_CASE_PROFILE,
+        "sku": expected["sku"],
+    }
+
+
+def _outcome(document: dict[str, Any]) -> tuple[str, str | None]:
+    status = document["outcome"]["status"]
+    if status == "success":
+        return status, None
+    native = document["outcome"].get("reason")
+    reason = native if isinstance(native, str) and REASON.fullmatch(native) else {
+        "unsupported": "unsupported-capability", "failed": "execution-failed",
+        "invalid": "validation-failed", "diagnostic": "diagnostic-evidence",
+    }.get(status)
+    if reason is None:
+        raise PublisherError(f"unsupported native outcome {status!r}")
+    return status, reason
+
+
+def _attempt_record(
+    document: dict[str, Any], path: Path, root: Path, *, selected: bool
+) -> dict[str, Any]:
+    normalized = contracts.normalize_attempt(document)
+    runtime = normalized["runtime_fingerprint"]
+    runtime_sha = _sha_bytes(_canonical(runtime)) if runtime is not None else None
+    sample_record = None
+    evidence_ids: list[str] = []
+    series_ids: list[str] = []
+    if document["format"] == contracts.RAW_FORMAT:
+        sample_path = path.with_name(document["sample_artifact"]["path"])
+        sample_record = _file_metadata(sample_path, root)
+        evidence_ids = [row["evidence_id"] for row in document["measurement"]["rows"]]
+        series_ids = [document["identity"]["series_id"]]
+        declared = document["identity"]["series_factors"]["runtime_fingerprint_sha256"]
+        if runtime_sha != declared:
+            raise PublisherError("runtime fingerprint checksum differs from series identity")
+    status, reason = _outcome(document)
+    return {
+        "attempt_id": normalized["attempt_id"],
+        "allocation_id": normalized["allocation_id"],
+        "case_id": normalized["case_id"],
+        "outcome": status,
+        "reason": reason,
+        "selected": selected,
+        "document": _file_metadata(path, root),
+        "samples": sample_record,
+        "runtime_fingerprint_sha256": runtime_sha,
+        "series_ids": series_ids,
+        "evidence_ids": evidence_ids,
+    }
+
+
+def _validate_delivery_binding(
+    document: dict[str, Any], path: Path, raw_root: Path,
+    artifact_by_root: dict[str, str], expected_by_id: dict[str, dict[str, Any]],
+    expected_deliveries: dict[str, tuple[str, str, str]], run: dict[str, Any],
+) -> str:
+    case_id = document["identity"]["case_id"]
+    if case_id not in expected_by_id:
+        raise PublisherError("artifact contains an extra case outcome")
+    expected = expected_by_id[case_id]
+    if not _case_matches(document, expected):
+        raise PublisherError("attempt case coordinates differ from the requested matrix")
+    unsupported = document["outcome"]["status"] == "unsupported"
+    if (expected["_disposition"] == "unsupported") != unsupported:
+        raise PublisherError("terminal outcome differs from requested capability disposition")
+    if unsupported and document["outcome"]["reason"] != expected["_reason"]:
+        raise PublisherError("unsupported outcome reason differs from requested matrix")
+    if not _run_matches(document, run):
+        raise PublisherError("attempt provenance differs from publisher run metadata")
+    relative = path.relative_to(raw_root)
+    if len(relative.parts) < 2:
+        raise PublisherError("attempt document is outside a delivered artifact")
+    delivered_name = artifact_by_root.get(relative.parts[0])
+    expected_name, expected_job, expected_execution = expected_deliveries[case_id]
+    git_run = _document_git_run(document)
+    allocation = document["identity"]["allocation_factors"]
+    if (
+        git_run is None
+        or delivered_name != expected_name
+        or git_run["artifact"] != delivered_name
+        or git_run["job"] != expected_job
+        or allocation["execution_id"] != expected_execution
+    ):
+        raise PublisherError("attempt provenance differs from its delivered GHA shard")
+    return case_id
+
+
+def _parse_extracted(root: Path) -> tuple[list[tuple[Path, dict[str, Any]]], set[Path]]:
+    attempts: list[tuple[Path, dict[str, Any]]] = []
+    consumed_samples: set[Path] = set()
+    json_paths = sorted(path for path in root.rglob("*.json") if path.is_file())
+    for path in json_paths:
+        if path in consumed_samples:
+            continue
+        try:
+            document = contracts.strict_load(path)
+            artifact_safety.assert_publication_safe([document])
+            format_name = document.get("format") if isinstance(document, dict) else None
+            if format_name == contracts.SAMPLES_FORMAT:
+                _schema("samples-v1.schema.json", document)
+                # It must be claimed by a raw document; orphan checking happens after the scan.
+                continue
+            if format_name == contracts.RAW_FORMAT:
+                _schema("raw-case-v1.schema.json", document)
+                sample_path = path.with_name(document["sample_artifact"]["path"])
+                sample_document = contracts.strict_load(sample_path)
+                artifact_safety.assert_publication_safe([sample_document])
+                _schema("samples-v1.schema.json", sample_document)
+                validated = contracts.load_raw_attempt(path)
+                consumed_samples.add(sample_path)
+            elif format_name == contracts.TERMINAL_FORMAT:
+                _schema("terminal-outcome-v1.schema.json", document)
+                validated = contracts.validate_terminal_document(document)
+            else:
+                raise PublisherError(f"artifact contains unknown JSON document {path.name}")
+        except (
+            contracts.ContractError, artifact_safety.ArtifactSafetyError,
+            jsonschema.ValidationError, OSError,
+        ) as exc:
+            raise PublisherError(f"native contract rejected {path.name}: {exc}") from exc
+        attempts.append((path, validated))
+    orphan_samples = [
+        path for path in json_paths
+        if isinstance((doc := contracts.strict_load(path)), dict)
+        and doc.get("format") == contracts.SAMPLES_FORMAT
+        and path not in consumed_samples
+    ]
+    if orphan_samples:
+        raise PublisherError("artifact contains an orphan samples document")
+    if not attempts:
+        raise PublisherError("artifact contains zero native attempt documents")
+    return attempts, consumed_samples
+
+
+def build_bundle(
+    store: Store,
+    incoming_id: str,
+    incoming_path: Path,
+    run: dict[str, Any],
+) -> tuple[str, dict[str, Any], list[dict[str, Any]]]:
+    """Validate one exact workflow delivery and install its immutable private bundle."""
+    incoming_manifest = strict_load(incoming_path / "incoming.json")
+    _exact(
+        incoming_manifest,
+        {"format", "schema_version", "ingest_id", "run", "sources"},
+        "incoming",
+    )
+    artifact_safety.assert_publication_safe([incoming_manifest])
+    if (
+        incoming_manifest["format"] != "collectivex.incoming.v1"
+        or incoming_manifest["schema_version"] != 1
+        or incoming_manifest["ingest_id"] != incoming_id
+        or incoming_manifest["run"] != run
+        or _sha_bytes(_canonical({"run": run, "sources": incoming_manifest["sources"]}))
+        != incoming_id
+    ):
+        raise PublisherError("incoming manifest identity differs from archived delivery")
+    incoming_sources = _array(incoming_manifest["sources"], "incoming.sources", nonempty=True)
+    for index, record in enumerate(incoming_sources):
+        _exact(
+            record,
+            {"path", "sha256", "bytes", "kind", "artifact_name"},
+            f"incoming.sources[{index}]",
+        )
+        _resolve_bundle_file(incoming_path, record)
+    matrix_records = [record for record in incoming_sources if record["kind"] == "matrix"]
+    artifact_records = [record for record in incoming_sources if record["kind"] == "artifact"]
+    if (
+        len(matrix_records) != 1
+        or matrix_records[0]["artifact_name"] is not None
+        or not artifact_records
+        or any(ARTIFACT_NAME.fullmatch(record["artifact_name"] or "") is None
+               for record in artifact_records)
+        or len({record["artifact_name"] for record in artifact_records}) != len(artifact_records)
+    ):
+        raise PublisherError("incoming source catalog is invalid")
+    matrix_source = _resolve_bundle_file(incoming_path, matrix_records[0])
+    matrix_document = strict_load(matrix_source)
+    expected_cases = validate_matrix(matrix_document)
+    expected_by_id = {case["case_id"]: case for case in expected_cases}
+    expected_deliveries = _expected_deliveries(matrix_document, expected_cases, run)
+    if {record["artifact_name"] for record in artifact_records} != {
+        delivery[0] for delivery in expected_deliveries.values()
+    }:
+        raise PublisherError("incoming artifact archive set differs from requested matrix shards")
+    with store.staging(store.bundles, private=True) as stage:
+        source_copy = stage / "source"
+        raw_root = stage / "raw"
+        source_copy.mkdir(mode=0o700)
+        raw_root.mkdir(mode=0o700)
+        matrix_path = stage / "matrix.json"
+        _copy_source(matrix_source, matrix_path)
+        source_records: list[dict[str, Any]] = []
+        artifact_by_root: dict[str, str] = {}
+        for index, source_record in enumerate(artifact_records):
+            archive = _resolve_bundle_file(incoming_path, source_record)
+            copied = source_copy / f"artifact-{index:04d}.zip"
+            _copy_source(archive, copied)
+            source_records.append({
+                **_file_metadata(copied, stage),
+                "artifact_name": source_record["artifact_name"],
+            })
+            artifact_root = raw_root / f"artifact-{index:04d}"
+            artifact_root.mkdir(mode=0o700)
+            artifact_by_root[artifact_root.name] = source_record["artifact_name"]
+            extract_archive(copied, artifact_root)
+        parsed, consumed_samples = _parse_extracted(raw_root)
+        created_at = _latest_timestamp(
+            [document["generated_at"] for _, document in parsed]
+        )
+        consumed_files = {path for path, _ in parsed} | consumed_samples
+        extracted_files = {
+            path for path in raw_root.rglob("*")
+            if path.is_file() and not path.is_symlink()
+        }
+        if consumed_files != extracted_files:
+            raise PublisherError("artifact contains an unconsumed non-native member")
+        by_case: dict[str, list[tuple[Path, dict[str, Any]]]] = {}
+        for path, document in parsed:
+            case_id = _validate_delivery_binding(
+                document, path, raw_root, artifact_by_root, expected_by_id,
+                expected_deliveries, run,
+            )
+            by_case.setdefault(case_id, []).append((path, document))
+        missing = set(expected_by_id) - set(by_case)
+        if missing:
+            raise PublisherError(f"artifact is missing {len(missing)} requested case outcomes")
+        attempt_records: list[dict[str, Any]] = []
+        selections: list[dict[str, Any]] = []
+        selected_documents: list[dict[str, Any]] = []
+        runtime_hashes: set[str] = set()
+        outcome_counts = {name: 0 for name in OUTCOMES}
+        for case_id in sorted(expected_by_id):
+            case_attempts = by_case[case_id]
+            ordinals = [document["identity"]["attempt_ordinal"] for _, document in case_attempts]
+            allocations_for_case = {
+                document["identity"]["allocation_id"] for _, document in case_attempts
+            }
+            if len(allocations_for_case) != 1 or sorted(ordinals) != list(
+                range(1, len(ordinals) + 1)
+            ):
+                raise PublisherError(
+                    "case retries must retain contiguous ordinals in one allocation"
+                )
+            _, selected_document = max(
+                case_attempts, key=lambda item: item[1]["identity"]["attempt_ordinal"]
+            )
+            selected_id = selected_document["identity"]["attempt_id"]
+            selected_documents.append(selected_document)
+            selected_status, _ = _outcome(selected_document)
+            selections.append({
+                "case_id": case_id,
+                "selected_attempt_id": selected_id,
+                "outcome": selected_status,
+            })
+            outcome_counts[selected_status] += 1
+            for path, document in sorted(
+                case_attempts, key=lambda item: item[1]["identity"]["attempt_ordinal"]
+            ):
+                normalized = contracts.normalize_attempt(document)
+                if document["format"] == contracts.RAW_FORMAT:
+                    sample_path = path.with_name(document["sample_artifact"]["path"])
+                    if sample_path not in consumed_samples:
+                        raise PublisherError("validated raw attempt lost its samples document")
+                record = _attempt_record(
+                    document, path, stage,
+                    selected=normalized["attempt_id"] == selected_id,
+                )
+                if record["runtime_fingerprint_sha256"]:
+                    runtime_hashes.add(record["runtime_fingerprint_sha256"])
+                attempt_records.append(record)
+        # Every extracted byte is covered; the bundle manifest anchors this checksum catalog.
+        payload_records = [_file_metadata(path, stage) for path in _tree_files(stage)]
+        checksum_document = {
+            "format": "collectivex.checksums.v1",
+            "files": payload_records,
+        }
+        checksum_path = stage / "checksums.json"
+        _write_json(checksum_path, checksum_document, mode=0o600)
+        bundle = {
+            "format": FORMAT_BUNDLE,
+            "schema_version": 1,
+            "created_at": created_at,
+            "ingest_id": incoming_id,
+            "run": run,
+            "matrix": _file_metadata(matrix_path, stage),
+            "sources": source_records,
+            "attempts": attempt_records,
+            "coverage": {
+                "expected_cases": len(expected_cases),
+                "terminal_cases": len(selections),
+                "complete": len(selections) == len(expected_cases),
+                "outcome_counts": outcome_counts,
+                "selections": selections,
+            },
+            "runtime_fingerprints": sorted(runtime_hashes),
+            "checksums": _file_metadata(checksum_path, stage),
+            "validation": {
+                "policy": PUBLISHER_POLICY,
+                "passed": True,
+                "checks": [
+                    "archive-safety", "checksums", "exact-coverage", "identity",
+                    "native-schema", "privacy", "runtime-homogeneity", "terminal-outcomes",
+                ],
+            },
+        }
+        validate_bundle_manifest(bundle)
+        # Runtime homogeneity is scoped to a realized allocation, not across unlike SKUs.
+        by_allocation: dict[str, set[str]] = {}
+        for attempt in attempt_records:
+            fingerprint = attempt["runtime_fingerprint_sha256"]
+            if fingerprint:
+                by_allocation.setdefault(attempt["allocation_id"], set()).add(fingerprint)
+        if any(len(values) != 1 for values in by_allocation.values()):
+            raise PublisherError("runtime fingerprint is heterogeneous within an allocation")
+        bundle_bytes = _canonical(bundle) + b"\n"
+        bundle_id = _sha_bytes(bundle_bytes)
+        _write_bytes(stage / "bundle.json", bundle_bytes, mode=0o600)
+        store.complete(stage, bundle_id, private=True)
+        store.install(stage, store.bundles / bundle_id, private=True)
+    installed = load_bundle(store, bundle_id)
+    if installed["manifest"] != bundle:
+        raise PublisherError("existing bundle differs from validated manifest")
+    return bundle_id, bundle, selected_documents
+
+
+def _slug(value: Any, fallback: str = "unknown") -> str:
+    text = re.sub(r"[^a-z0-9_.-]+", "-", str(value or "").lower()).strip("-.")
+    return text[:128] if text and SAFE_ID.fullmatch(text[:128]) else fallback
+
+
+def _derived_id(prefix: str, value: Any) -> str:
+    return f"{prefix}{_sha_bytes(_canonical(value))}"
+
+
+def _git_run(document: dict[str, Any]) -> dict[str, Any]:
+    return _document_git_run(document) or {}
+
+
+def _public_attempt(document: dict[str, Any], *, selected: bool = False) -> dict[str, Any]:
+    normalized = contracts.normalize_attempt(document)
+    run = _git_run(document)
+    evidence = (
+        [{"evidence_id": row["evidence_id"], "point_id": row["point_id"]}
+         for row in document["measurement"]["rows"]]
+        if document["format"] == contracts.RAW_FORMAT else []
+    )
+    status, reason = _outcome(document)
+    failure_mode = document["outcome"].get("failure_mode")
+    if not isinstance(failure_mode, str) or REASON.fullmatch(failure_mode) is None:
+        failure_mode = None if status == "success" else reason
+    series_id = normalized["series_id"] if status == "success" and selected else None
+    return {
+        "attempt_id": normalized["attempt_id"],
+        "evidence": evidence,
+        "case_id": normalized["case_id"],
+        "allocation_id": normalized["allocation_id"],
+        "run_id": str(run["run_id"]),
+        "run_attempt": int(run["run_attempt"]),
+        "attempt_index": document["identity"]["attempt_ordinal"],
+        "selected": selected,
+        "outcome": status,
+        "failure_mode": failure_mode,
+        "reason": reason,
+        "series_id": series_id,
+        "completed_at": document["generated_at"],
+    }
+
+
+def _ratio(values: Sequence[float]) -> float | None:
+    return max(values) / min(values) if len(values) >= REQUIRED_ALLOCATIONS and min(values) > 0 else None
+
+
+def _eligibility_record(
+    allocations: Sequence[str],
+    *,
+    complete: bool,
+    correct: bool,
+    measured: bool,
+    stable_ordering: bool,
+    p50_ratio: float | None,
+    p99_ratio: float | None,
+    extra_reasons: Sequence[str] = (),
+) -> dict[str, Any]:
+    ids = sorted(set(allocations))
+    stable_p50 = p50_ratio is not None and p50_ratio <= P50_STABILITY_LIMIT
+    stable_p99 = p99_ratio is not None and p99_ratio <= P99_STABILITY_LIMIT
+    reasons = list(extra_reasons)
+    for condition, reason in (
+        (len(ids) >= REQUIRED_ALLOCATIONS, "insufficient-allocations"),
+        (complete, "incomplete-repeat-coverage"),
+        (correct, "correctness-failed"),
+        (measured, "missing-measured-roundtrip-p99"),
+        (stable_p50, "unstable-p50"),
+        (stable_p99, "unstable-p99"),
+        (stable_ordering, "unstable-ordering"),
+    ):
+        if not condition:
+            reasons.append(reason)
+    reasons = sorted(set(reasons))
+    decision = not reasons
+    return {
+        "decision_grade": decision,
+        "allocation_ids": ids,
+        "complete": complete,
+        "correct": correct,
+        "measured_roundtrip_p99": measured,
+        "stable_p50": stable_p50,
+        "stable_p99": stable_p99,
+        "stable_ordering": stable_ordering,
+        "p50_max_min_ratio": p50_ratio,
+        "p99_max_min_ratio": p99_ratio,
+        "reasons": reasons,
+    }
+
+
+def _aggregate_percentiles(values: Sequence[dict[str, Any]]) -> dict[str, float]:
+    return {
+        name: float(statistics.median(float(value[name]) for value in values))
+        for name in ("p50", "p90", "p95", "p99")
+    }
+
+
+def _aggregate_component(
+    rows: Sequence[dict[str, Any]], name: str
+) -> dict[str, Any] | None:
+    components = [row["components"][name] for row in rows]
+    if all(component["availability"] == "unavailable" for component in components):
+        return None
+    if any(component["availability"] == "unavailable" for component in components):
+        raise PublisherError("component availability differs across repeat allocations")
+    latency = _aggregate_percentiles([component["percentiles_us"] for component in components])
+    if name == "isolated_sum":
+        return {
+            "origin": "derived",
+            "latency_us": latency,
+            "logical_bytes": None,
+            "logical_payload_rate_gbps_at_latency_percentile": None,
+            "sample_count": None,
+        }
+    byte_values = {row["logical_bytes"][name] for row in rows}
+    if len(byte_values) != 1:
+        raise PublisherError("logical byte accounting differs across repeat allocations")
+    logical_bytes = byte_values.pop()
+    rates = {statistic: logical_bytes / (latency[statistic] * 1000.0) for statistic in latency}
+    return {
+        "origin": "measured",
+        "latency_us": latency,
+        "logical_bytes": logical_bytes,
+        "logical_payload_rate_gbps_at_latency_percentile": rates,
+        "sample_count": 512,
+    }
+
+
+def _exact_repeat_value(values: Sequence[Any], label: str) -> Any:
+    if not values or len({_canonical(value) for value in values}) != 1:
+        raise PublisherError(f"{label} differs across repeat allocations")
+    return values[0]
+
+
+def _eplb_descriptor(document: dict[str, Any]) -> dict[str, Any]:
+    value = document["case"]["eplb"]
+    return {
+        "enabled": value["enabled"],
+        "planner": value["planner"],
+        "mapping_sha256": value["mapping_hash"],
+        "logical_experts": value["num_logical_experts"],
+        "physical_experts": value["num_physical_experts"],
+        "redundant_experts": value["num_redundant"],
+        "reference_tokens_per_rank": value["reference_tokens_per_rank"],
+        "replicated_experts": value["replicated_experts"],
+        "max_replicas": value["max_replicas"],
+        "imbalance_before": value["imbalance_before"],
+        "imbalance_after": value["imbalance_after"],
+    }
+
+
+def _routing_facts(row: dict[str, Any]) -> dict[str, Any]:
+    routing = row["routing"]
+    return {
+        "fanout_mean": routing["fanout_mean"],
+        "recv_tokens_max": row["receive"]["max"],
+        "expert_load_cv": routing["expert_load_cv"],
+        "payload_rank_cv": routing["payload_rank_cv"],
+        "hotspot_ratio": routing["hotspot_ratio"],
+        "empty_expert_count": routing["empty_expert_count"],
+        "empty_rank_count": routing["empty_rank_count"],
+        "routed_copies": routing["routed_copies"],
+    }
+
+
+def _series_extra_reasons(documents: Sequence[dict[str, Any]]) -> list[str]:
+    reasons: set[str] = set()
+    for document in documents:
+        validity = document["outcome"]["validity"]
+        rows = document["measurement"]["rows"]
+        if validity.get("provenance_complete") is not True:
+            reasons.add("incomplete-provenance")
+        if validity.get("workload_source") != "canonical-serialized":
+            reasons.add("noncanonical-workload")
+        if validity.get("anomaly_free") is not True or any(row["anomalies"] for row in rows):
+            reasons.add("unresolved-anomaly")
+        if validity.get("semantic_correctness") != "pass":
+            reasons.add("semantic-correctness-failed")
+        if validity.get("measurement_conformance") != "conformant" or validity.get("sampling_conformance") != "conformant":
+            reasons.add("measurement-nonconformant")
+        scopes = {row["correctness"].get("scope") for row in rows}
+        if scopes != {"dispatch-metadata-and-transformed-combine"}:
+            reasons.add("expert-oracle-incomplete")
+    return sorted(reasons)
+
+
+BACKEND_LABELS = {
+    "deepep": "DeepEP V1",
+    "deepep-v2": "DeepEP V2",
+    "deepep-hybrid": "DeepEP Hybrid",
+    "uccl": "UCCL",
+    "mori": "MoRI",
+    "nccl-ep": "NCCL/RCCL reference",
+}
+
+
+def _build_series(
+    series_id: str,
+    documents: Sequence[dict[str, Any]],
+    expected_repeats: int,
+) -> tuple[dict[str, Any], dict[str, Any]]:
+    if not documents:
+        raise PublisherError("cannot aggregate an empty series")
+    first = documents[0]
+    if any(document["identity"]["series_id"] != series_id for document in documents):
+        raise PublisherError("series aggregation mixed identities")
+    allocations = [document["identity"]["allocation_id"] for document in documents]
+    if len(allocations) != len(set(allocations)):
+        raise PublisherError("series repeats reuse an allocation identity")
+    row_maps = [
+        {row["tokens_per_rank"]: row for row in document["measurement"]["rows"]}
+        for document in documents
+    ]
+    token_sets = {tuple(sorted(rows)) for rows in row_maps}
+    if len(token_sets) != 1:
+        raise PublisherError("series token coverage differs across allocations")
+    tokens = list(next(iter(token_sets)))
+    p50_ratios = [
+        _ratio([rows[token]["components"]["roundtrip"]["percentiles_us"]["p50"] for rows in row_maps])
+        for token in tokens
+    ]
+    p99_ratios = [
+        _ratio([rows[token]["components"]["roundtrip"]["percentiles_us"]["p99"] for rows in row_maps])
+        for token in tokens
+    ]
+    p50_ratio = max((value for value in p50_ratios if value is not None), default=None)
+    p99_ratio = max((value for value in p99_ratios if value is not None), default=None)
+    correct = all(
+        row["correctness"]["passed"]
+        for document in documents for row in document["measurement"]["rows"]
+    )
+    measured = all(
+        row["components"]["roundtrip"]["availability"] == "measured"
+        and row["components"]["roundtrip"]["percentiles_us"].get("p99") is not None
+        for document in documents for row in document["measurement"]["rows"]
+    )
+    eligibility = _eligibility_record(
+        allocations,
+        complete=len(documents) == expected_repeats,
+        correct=correct,
+        measured=measured,
+        # Ordering is defined only across alternatives in a controlled cohort.
+        stable_ordering=True,
+        p50_ratio=p50_ratio,
+        p99_ratio=p99_ratio,
+        extra_reasons=_series_extra_reasons(documents),
+    )
+    case = first["case"]
+    shape = case["shape"]
+    topology = first["topology"]
+    runtime = first["runtime_fingerprint"]
+    workload_id = first["workload"]["workload_id"]
+    if not identity.is_typed_id(workload_id, "workload"):
+        raise PublisherError("raw workload is not canonical")
+    backend_id = case["backend"]
+    resource_raw = first["implementation"]["resource_profile"]
+    public_config = contracts.public_series_config(
+        kernel_generation=first["implementation"]["kernel_generation"],
+        provenance=first["implementation"]["provenance"],
+        resource_profile=resource_raw,
+        resource_mode=case["resource_mode"],
+        device_product=topology["device_product"],
+    )
+    resource_profile = public_config["resource"]["profile"]
+    configured_units = public_config["resource"]["configured_units"]
+    units_kind = public_config["resource"]["comm_units_kind"]
+    resource_label = (
+        f"{configured_units} {str(units_kind).upper()}"
+        if configured_units is not None and units_kind
+        else resource_profile
+    )
+    eplb = _exact_repeat_value(
+        [_eplb_descriptor(document) for document in documents], "EPLB descriptor"
+    )
+    points: list[dict[str, Any]] = []
+    run_metrics: dict[str, dict[int, dict[str, float]]] = {}
+    for document, rows in zip(documents, row_maps, strict=True):
+        run_id = str(_git_run(document)["run_id"])
+        if run_id in run_metrics:
+            raise PublisherError("series has two allocations from one workflow run")
+        run_metrics[run_id] = {}
+        for token in tokens:
+            latency = rows[token]["components"]["roundtrip"]["percentiles_us"]
+            logical_bytes = rows[token]["logical_bytes"]["roundtrip"]
+            run_metrics[run_id][token] = {
+                "latency_us": {statistic: latency[statistic] for statistic in ("p50", "p99")},
+                "logical_payload_rate_gbps_at_latency_percentile": {
+                    statistic: logical_bytes / (latency[statistic] * 1000.0)
+                    for statistic in ("p50", "p99")
+                },
+            }
+    for token in tokens:
+        rows = [row_map[token] for row_map in row_maps]
+        routing = _exact_repeat_value(
+            [_routing_facts(row) for row in rows], "routing/load facts"
+        )
+        components = {
+            name: _aggregate_component(rows, name)
+            for name in ("dispatch", "combine", "roundtrip")
+        }
+        if components["dispatch"] is None:
+            components["isolated_sum"] = None
+        else:
+            latency = {
+                statistic: components["dispatch"]["latency_us"][statistic]
+                + components["combine"]["latency_us"][statistic]
+                for statistic in ("p50", "p90", "p95", "p99")
+            }
+            components["isolated_sum"] = {
+                "origin": "derived", "latency_us": latency, "logical_bytes": None,
+                "logical_payload_rate_gbps_at_latency_percentile": None, "sample_count": None,
+            }
+        points.append({
+            "point_id": rows[0]["point_id"],
+            "tokens_per_rank": token,
+            "global_tokens": token * case["ep_size"],
+            "correct": all(row["correctness"]["passed"] for row in rows),
+            "routing": routing,
+            "components": components,
+            "roundtrip_token_rate_at_latency_percentile": {
+                statistic: (token * case["ep_size"])
+                / (components["roundtrip"]["latency_us"][statistic] * 1e-6)
+                for statistic in ("p50", "p90", "p95", "p99")
+            },
+            "evidence_ids": [row["evidence_id"] for row in rows],
+        })
+    series = {
+        "series_id": series_id,
+        "label": (
+            f"{case['runner'].upper()} / {BACKEND_LABELS.get(backend_id, backend_id)} / "
+            f"EP{case['ep_size']} / {case['phase']} / {shape['routing']}"
+            f"{' + EPLB' if case['eplb']['enabled'] else ''} / {resource_label}"
+        ),
+        "status": "decision-grade" if eligibility["decision_grade"] else "diagnostic",
+        "case_ids": sorted({document["identity"]["case_id"] for document in documents}),
+        "allocation_ids": sorted(allocations),
+        "model": _slug(case["workload_name"]),
+        "suite": _slug(case["suite"]),
+        "phase": case["phase"],
+        "publication_tier": case["required_publication"],
+        "backend": {
+            "id": _slug(backend_id),
+            "label": BACKEND_LABELS.get(backend_id, backend_id),
+            "role": "reference" if backend_id == "nccl-ep" else "library",
+            **public_config["backend"],
+        },
+        "build": {
+            "implementation_contract_sha256": first["identity"]["series_factors"][
+                "implementation_contract_sha256"
+            ],
+            "public_config_sha256": first["identity"]["series_factors"][
+                "public_config_sha256"
+            ],
+            "routing_control_sha256": first["identity"]["series_factors"][
+                "routing_control_sha256"
+            ],
+            "runtime_fingerprint_sha256": first["identity"]["series_factors"][
+                "runtime_fingerprint_sha256"
+            ],
+            "image_digest": first["identity"]["series_factors"]["image_digest"],
+            "source_sha": first["identity"]["series_factors"]["source_sha"],
+            "squash_sha256": first["identity"]["series_factors"]["squash_sha256"],
+        },
+        "system": {
+            "sku": _slug(case["runner"]),
+            "label": public_config["system"]["label"],
+            "vendor": runtime["vendor"],
+            "topology_class": _slug(topology["topology_class"]),
+            "transport": _slug(topology["transport"]),
+            "world_size": topology["world_size"],
+            "ep_size": case["ep_size"],
+            "placement": topology["placement"],
+        },
+        "workload": {
+            "workload_id": workload_id,
+            "hidden": shape["hidden"],
+            "top_k": shape["topk"],
+            "experts": case["eplb"]["num_logical_experts"],
+            "routing": shape["routing"],
+            "eplb": case["eplb"]["enabled"],
+            "dispatch_dtype": shape["dispatch_dtype"],
+            "combine_dtype": shape["quant"]["combine_output_dtype"],
+            "activation_profile": shape["activation_profile"],
+        },
+        "eplb": eplb,
+        "resource": public_config["resource"],
+        "measurement": {
+            "contract": first["measurement"]["contract"],
+            "sampling_contract": first["measurement"]["sampling"]["contract"],
+            "iters": first["measurement"]["sampling"]["iterations_per_trial"],
+            "trials": first["measurement"]["sampling"]["trials"],
+            "warmups": first["measurement"]["sampling"]["warmup_iterations"],
+            "samples_per_component": first["measurement"]["sampling"]["samples_per_component"],
+            "headline_component": "roundtrip",
+            "headline_percentile": "p99",
+        },
+        "points": points,
+        "eligibility": eligibility,
+    }
+    internal = {
+        "documents": list(documents),
+        "run_metrics": run_metrics,
+        "series_factors": first["identity"]["series_factors"],
+    }
+    return series, internal
+
+
+def _resolve_bundle_file(root: Path, record: dict[str, Any]) -> Path:
+    path = root.joinpath(*PurePosixPath(record["path"]).parts)
+    try:
+        path.relative_to(root)
+    except ValueError as exc:
+        raise PublisherError("bundle record escapes its directory") from exc
+    if path.resolve() != path or path.is_symlink() or not path.is_file():
+        raise PublisherError("bundle record points to a missing or linked file")
+    if path.stat().st_size != record["bytes"] or _sha_file(path) != record["sha256"]:
+        raise PublisherError("bundle file checksum differs from its manifest")
+    return path
+
+
+def load_bundle(store: Store, bundle_id: str) -> dict[str, Any]:
+    if HEX64.fullmatch(bundle_id) is None:
+        raise PublisherError("bundle ID must be a SHA-256 digest")
+    root = store.bundles / bundle_id
+    if root.is_symlink() or not (root / "COMPLETE").is_file():
+        raise PublisherError(f"bundle {bundle_id} is missing or incomplete")
+    _verify_frozen_tree(root, private=True)
+    if (root / "COMPLETE").read_text().strip() != bundle_id:
+        raise PublisherError("bundle COMPLETE marker differs")
+    manifest_path = root / "bundle.json"
+    if _sha_file(manifest_path) != bundle_id:
+        raise PublisherError("bundle directory digest differs from bundle.json")
+    manifest = validate_bundle_manifest(strict_load(manifest_path))
+    checksum_path = _resolve_bundle_file(root, manifest["checksums"])
+    checksum_document = strict_load(checksum_path)
+    checksum_document = _exact(checksum_document, {"format", "files"}, "checksums")
+    if checksum_document["format"] != "collectivex.checksums.v1":
+        raise PublisherError("bundle checksum format is invalid")
+    records = [_file_record(value, f"checksums.files[{index}]")
+               for index, value in enumerate(_array(checksum_document["files"], "checksums.files"))]
+    _unique([record["path"] for record in records], "checksums.files[].path")
+    for record in records:
+        _resolve_bundle_file(root, record)
+    expected_paths = {
+        path.relative_to(root).as_posix() for path in _tree_files(root)
+        if path.name not in {"bundle.json", "checksums.json"}
+    }
+    if {record["path"] for record in records} != expected_paths:
+        raise PublisherError("bundle checksum catalog does not cover its payload exactly")
+    artifact_by_root: dict[str, str] = {}
+    for index, source in enumerate(manifest["sources"]):
+        _resolve_bundle_file(root, source)
+        archive_key = f"artifact-{index:04d}"
+        if source["path"] != f"source/{archive_key}.zip":
+            raise PublisherError("bundle source catalog order/path differs")
+        artifact_by_root[archive_key] = source["artifact_name"]
+    if len(set(artifact_by_root.values())) != len(artifact_by_root):
+        raise PublisherError("bundle source catalog repeats an artifact name")
+    matrix_path = _resolve_bundle_file(root, manifest["matrix"])
+    matrix_document = strict_load(matrix_path)
+    cases = validate_matrix(matrix_document)
+    expected_by_id = {case["case_id"]: case for case in cases}
+    expected_deliveries = _expected_deliveries(
+        matrix_document, cases, manifest["run"]
+    )
+    if {item["case_id"] for item in manifest["coverage"]["selections"]} != set(expected_by_id):
+        raise PublisherError("bundle selected coverage differs from requested matrix")
+    documents: dict[str, dict[str, Any]] = {}
+    runtime_fingerprints: set[str] = set()
+    for attempt in manifest["attempts"]:
+        document_path = _resolve_bundle_file(root, attempt["document"])
+        document = contracts.strict_load(document_path)
+        artifact_safety.assert_publication_safe([document])
+        if document.get("format") == contracts.RAW_FORMAT:
+            _schema("raw-case-v1.schema.json", document)
+            sample_path = document_path.with_name(document["sample_artifact"]["path"])
+            if attempt["samples"] is None:
+                raise PublisherError("raw attempt is missing its sample manifest record")
+            manifest_sample_path = _resolve_bundle_file(root, attempt["samples"])
+            if manifest_sample_path != sample_path:
+                raise PublisherError("sample manifest record points to the wrong raw evidence")
+            sample_document = contracts.strict_load(sample_path)
+            artifact_safety.assert_publication_safe([sample_document])
+            _schema("samples-v1.schema.json", sample_document)
+            document = contracts.load_raw_attempt(document_path)
+        else:
+            if attempt["samples"] is not None:
+                raise PublisherError("terminal attempt unexpectedly names a sample artifact")
+            _schema("terminal-outcome-v1.schema.json", document)
+            document = contracts.validate_terminal_document(document)
+        _validate_delivery_binding(
+            document, document_path, root / "raw", artifact_by_root,
+            expected_by_id, expected_deliveries, manifest["run"],
+        )
+        expected_record = _attempt_record(
+            document, document_path, root, selected=attempt["selected"]
+        )
+        if expected_record != attempt:
+            raise PublisherError("bundle attempt record differs from native document")
+        if attempt["runtime_fingerprint_sha256"]:
+            runtime_fingerprints.add(attempt["runtime_fingerprint_sha256"])
+        documents[attempt["attempt_id"]] = document
+    if sorted(runtime_fingerprints) != manifest["runtime_fingerprints"]:
+        raise PublisherError("bundle runtime fingerprint catalog differs from attempts")
+    selected = {
+        selection["case_id"]: documents[selection["selected_attempt_id"]]
+        for selection in manifest["coverage"]["selections"]
+    }
+    return {
+        "id": bundle_id,
+        "root": root,
+        "manifest": manifest,
+        "cases": cases,
+        "documents": documents,
+        "selected": selected,
+    }
+
+
+def _cohort_control(
+    kind: str, series: dict[str, Any], internal: dict[str, Any]
+) -> tuple[dict[str, Any], list[str], list[str], Any]:
+    binary_build = series["build"]
+    source = binary_build["source_sha"]
+    workload = series["workload"]
+    shape = {
+        key: workload[key]
+        for key in ("hidden", "top_k", "experts", "dispatch_dtype", "combine_dtype", "activation_profile")
+    }
+    common = {
+        "model": series["model"], "phase": series["phase"], "shape": shape,
+        "measurement": series["measurement"], "ep_size": series["system"]["ep_size"],
+    }
+    if kind == "library":
+        control = {**common, "system": series["system"], "workload": workload,
+                   "resource_mode": series["resource"]["mode"], "source": source}
+        return control, ["system", "workload", "phase", "measurement", "resource.mode", "source"], ["backend", "resource"], series["backend"]["id"]
+    if kind == "chip":
+        control = {**common, "backend": series["backend"], "source": source,
+                   "workload": workload, "resource_mode": series["resource"]["mode"]}
+        return control, ["backend", "source", "workload", "phase", "measurement", "resource.mode"], ["system", "resource"], series["system"]
+    if kind == "system":
+        control = {**common, "workload": workload, "source": source}
+        varying = [series["system"]["sku"], series["backend"]["id"], series["resource"]["profile"]]
+        return control, ["workload", "phase", "measurement", "source"], ["system", "backend", "resource"], varying
+    if kind == "routing":
+        control = {
+            **common,
+            "backend": series["backend"],
+            "system": series["system"],
+            "resource": series["resource"],
+            "build": _routing_build_control(binary_build),
+        }
+        varying = [
+            workload["routing"], workload["eplb"],
+            binary_build["implementation_contract_sha256"],
+        ]
+        return (
+            control,
+            ["backend", "implementation-static-build", "system", "model-shape", "phase", "measurement", "resource"],
+            ["workload.routing", "workload.eplb", "implementation-config"],
+            varying,
+        )
+    raise PublisherError(f"unknown cohort kind {kind}")
+
+
+def _cohort_ordering(
+    members: Sequence[dict[str, Any]], internals: dict[str, dict[str, Any]], tokens: Sequence[int]
+) -> tuple[bool, int]:
+    run_ids = set.intersection(*(
+        set(internals[member["series_id"]]["run_metrics"]) for member in members
+    ))
+    if len(run_ids) < REQUIRED_ALLOCATIONS:
+        return False, len(run_ids)
+    orders: list[tuple[str, str, int, str, tuple[str, ...]]] = []
+    for run_id in sorted(run_ids):
+        for token in tokens:
+            for measure in ("latency_us", "logical_payload_rate_gbps_at_latency_percentile"):
+                for statistic in ("p50", "p99"):
+                    ordered = tuple(
+                        member["series_id"]
+                        for member in sorted(
+                            members,
+                            key=lambda item: (
+                                internals[item["series_id"]]["run_metrics"][run_id][token][measure][statistic],
+                                item["series_id"],
+                            ),
+                            reverse=measure == "logical_payload_rate_gbps_at_latency_percentile",
+                        )
+                    )
+                    orders.append((measure, statistic, token, run_id, ordered))
+    for token in tokens:
+        for measure in ("latency_us", "logical_payload_rate_gbps_at_latency_percentile"):
+            for statistic in ("p50", "p99"):
+                observed = {
+                    entry[4]
+                    for entry in orders
+                    if entry[0] == measure and entry[1] == statistic and entry[2] == token
+                }
+                if len(observed) != 1:
+                    return False, len(run_ids)
+    return True, len(run_ids)
+
+
+def build_decisions(
+    series: Sequence[dict[str, Any]], internals: dict[str, dict[str, Any]]
+) -> tuple[list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]]]:
+    cohorts: list[dict[str, Any]] = []
+    for kind in ("library", "chip", "system", "routing"):
+        groups: dict[bytes, list[tuple[dict[str, Any], Any, list[str], list[str]]]] = {}
+        for item in series:
+            if kind == "library" and item["backend"]["role"] != "library":
+                continue
+            if kind == "system" and item["backend"]["role"] != "reference":
+                continue
+            control, controlled, varying, variant = _cohort_control(kind, item, internals[item["series_id"]])
+            groups.setdefault(_canonical(control), []).append((item, variant, controlled, varying))
+        for entries in groups.values():
+            variants = {_canonical(entry[1]) for entry in entries}
+            if len(entries) < 2 or len(variants) < 2:
+                continue
+            members = sorted((entry[0] for entry in entries), key=lambda item: item["series_id"])
+            token_sets = [set(point["tokens_per_rank"] for point in member["points"]) for member in members]
+            tokens = sorted(set.intersection(*token_sets))
+            same_points = len({tuple(sorted(values)) for values in token_sets}) == 1
+            ordering, aligned_runs = _cohort_ordering(members, internals, tokens) if tokens else (False, 0)
+            allocations = sorted({value for member in members for value in member["allocation_ids"]})
+            p50_ratio = max(
+                (member["eligibility"]["p50_max_min_ratio"] for member in members
+                 if member["eligibility"]["p50_max_min_ratio"] is not None), default=None
+            )
+            p99_ratio = max(
+                (member["eligibility"]["p99_max_min_ratio"] for member in members
+                 if member["eligibility"]["p99_max_min_ratio"] is not None), default=None
+            )
+            extra = {
+                reason for member in members for reason in member["eligibility"]["reasons"]
+                if reason not in {"unstable-ordering"}
+            }
+            if aligned_runs < REQUIRED_ALLOCATIONS:
+                extra.add("incomplete-aligned-repeats")
+            if kind == "routing" and sum(
+                member["workload"]["routing"] == "uniform"
+                and not member["workload"]["eplb"]
+                for member in members
+            ) != 1:
+                extra.add("missing-uniform-baseline")
+            if kind == "routing" and {
+                (member["workload"]["routing"], member["workload"]["eplb"])
+                for member in members
+            } != {("uniform", False), ("zipf", False), ("zipf", True)}:
+                extra.add("incomplete-routing-anchors")
+            if kind == "routing" and _routing_implementation_mismatch(members):
+                extra.add("implementation-config-mismatch")
+            if not tokens or (kind != "routing" and not same_points):
+                extra.add("unmatched-token-coverage")
+            eligibility = _eligibility_record(
+                allocations,
+                complete=all(member["eligibility"]["complete"] for member in members)
+                and bool(tokens) and (kind == "routing" or same_points),
+                correct=all(member["eligibility"]["correct"] for member in members),
+                measured=all(member["eligibility"]["measured_roundtrip_p99"] for member in members),
+                stable_ordering=ordering,
+                p50_ratio=p50_ratio,
+                p99_ratio=p99_ratio,
+                extra_reasons=sorted(extra),
+            )
+            member_ids = [member["series_id"] for member in members]
+            publication_tier = (
+                "comparable-experimental"
+                if any(member["publication_tier"] == "comparable-experimental" for member in members)
+                else "official"
+            )
+            controlled, varying = entries[0][2], entries[0][3]
+            cohort_id = _derived_id("cxcohort-v1-", {
+                "kind": kind, "series_ids": member_ids,
+                "controlled_factors": controlled, "varying_factors": varying,
+            })
+            kind_label = "Platform" if kind == "chip" else kind.title()
+            first = members[0]
+            routing_label = first["workload"]["routing"] + (
+                "+EPLB" if first["workload"]["eplb"] else ""
+            )
+            context = {
+                "library": (
+                    f"{first['system']['sku'].upper()} EP{first['system']['ep_size']} / "
+                    f"{first['phase']} / {routing_label}"
+                ),
+                "chip": (
+                    f"{first['backend']['label']} EP{first['system']['ep_size']} / "
+                    f"{first['phase']} / {routing_label}"
+                ),
+                "system": (
+                    f"Reference EP{first['system']['ep_size']} / {first['phase']} / "
+                    f"{routing_label}"
+                ),
+                "routing": (
+                    f"{first['system']['sku'].upper()} / {first['backend']['label']} / "
+                    f"EP{first['system']['ep_size']} / {first['phase']}"
+                ),
+            }[kind]
+            cohorts.append({
+                "cohort_id": cohort_id,
+                "kind": kind,
+                "label": f"{context} / {kind_label} contrast ({len(members)} series)",
+                "description": (
+                    "Publisher-controlled NCCL/RCCL system comparison"
+                    if kind == "system"
+                    else f"Publisher-controlled {kind_label.lower()} comparison"
+                ),
+                "series_ids": member_ids,
+                "controlled_factors": controlled,
+                "varying_factors": varying,
+                "publication_tier": publication_tier,
+                "eligibility": eligibility,
+            })
+    cohorts.sort(key=lambda item: item["cohort_id"])
+    series_by_id = {item["series_id"]: item for item in series}
+    rankings: list[dict[str, Any]] = []
+    recommendations: list[dict[str, Any]] = []
+    sensitivities: list[dict[str, Any]] = []
+    for cohort in cohorts:
+        if not cohort["eligibility"]["decision_grade"]:
+            continue
+        members = [series_by_id[series_id] for series_id in cohort["series_ids"]]
+        tokens = sorted(set.intersection(*(
+            {point["tokens_per_rank"] for point in member["points"]} for member in members
+        )))
+        for token in tokens:
+            for measure, objective, unit in (
+                ("latency_us", "min", "us"), ("logical_payload_rate_gbps_at_latency_percentile", "max", "GB/s")
+            ):
+                for statistic in ("p50", "p99"):
+                    metric = {
+                        "operation": "roundtrip", "statistic": statistic,
+                        "measure": measure, "objective": objective,
+                        "tokens_per_rank": token, "phase": members[0]["phase"],
+                    }
+                    entries = []
+                    for member in members:
+                        point_id, value, observed_unit = _metric_value(member, metric)
+                        if observed_unit != unit:
+                            raise PublisherError("publisher metric unit differs")
+                        entries.append({
+                            "rank": 0, "series_id": member["series_id"], "point_id": point_id,
+                            "value": value, "unit": unit,
+                        })
+                    entries.sort(key=lambda item: (item["value"], item["series_id"]), reverse=objective == "max")
+                    for rank, entry in enumerate(entries, 1):
+                        entry["rank"] = rank
+                    ranking_id = _derived_id("cxranking-v1-", {
+                        "cohort_id": cohort["cohort_id"], "metric": metric,
+                    })
+                    metric_label = _metric_label(measure, statistic)
+                    rankings.append({
+                        "ranking_id": ranking_id, "cohort_id": cohort["cohort_id"],
+                        "label": f"{cohort['kind'].title()} {metric_label} T={token}",
+                        "metric": metric, "entries": entries,
+                        "publication_tier": cohort["publication_tier"],
+                        "eligibility": cohort["eligibility"],
+                    })
+                    if cohort["publication_tier"] != "official":
+                        continue
+                    objective_name = (
+                        f"min-{statistic}-latency"
+                        if measure == "latency_us"
+                        else f"max-payload-rate-at-{statistic}-latency"
+                    )
+                    top = entries[0]
+                    recommendation_id = _derived_id("cxrecommendation-v1-", {
+                        "objective": objective_name, "ranking_id": ranking_id,
+                    })
+                    recommendations.append({
+                        "recommendation_id": recommendation_id,
+                        "cohort_id": cohort["cohort_id"],
+                        "label": f"Best {metric_label} at T={token}",
+                        "objective": objective_name,
+                        "series_id": top["series_id"], "point_id": top["point_id"],
+                        "value": top["value"], "unit": top["unit"],
+                        "rationale": "Top stable measured roundtrip result in a controlled cohort",
+                        "publication_tier": cohort["publication_tier"],
+                        "eligibility": cohort["eligibility"],
+                    })
+        if cohort["kind"] == "routing":
+            baseline = next(
+                (member for member in members
+                 if member["workload"]["routing"] == "uniform" and not member["workload"]["eplb"]),
+                None,
+            )
+            if baseline:
+                for candidate in members:
+                    if candidate is baseline:
+                        continue
+                    for token in tokens:
+                        for measure, objective in (("latency_us", "min"), ("logical_payload_rate_gbps_at_latency_percentile", "max")):
+                            for statistic in ("p50", "p99"):
+                                metric = {
+                                    "operation": "roundtrip", "statistic": statistic,
+                                    "measure": measure, "objective": objective,
+                                    "tokens_per_rank": token, "phase": baseline["phase"],
+                                }
+                                _, base_value, _ = _metric_value(baseline, metric)
+                                _, candidate_value, _ = _metric_value(candidate, metric)
+                                sensitivity_id = _derived_id("cxsensitivity-v1-", {
+                                    "baseline": baseline["series_id"], "candidate": candidate["series_id"],
+                                    "cohort": cohort["cohort_id"], "metric": metric,
+                                })
+                                sensitivities.append({
+                                    "sensitivity_id": sensitivity_id,
+                                    "cohort_id": cohort["cohort_id"],
+                                    "label": (
+                                        f"Routing sensitivity: "
+                                        f"{_metric_label(measure, statistic)} T={token}"
+                                    ),
+                                    "baseline_series_id": baseline["series_id"],
+                                    "candidate_series_id": candidate["series_id"],
+                                    "metric": metric,
+                                    "signed_change_ratio": (candidate_value - base_value) / base_value,
+                                    "publication_tier": cohort["publication_tier"],
+                                    "eligibility": cohort["eligibility"],
+                                })
+    rankings.sort(key=lambda item: item["ranking_id"])
+    recommendations.sort(key=lambda item: item["recommendation_id"])
+    sensitivities.sort(key=lambda item: item["sensitivity_id"])
+    return cohorts, rankings, recommendations, sensitivities
+
+
+def _require_runnable_promotion_success(
+    bundles: Sequence[dict[str, Any]], cases: dict[str, dict[str, Any]]
+) -> None:
+    for bundle in bundles:
+        for case_id, case in cases.items():
+            if case["_disposition"] != "runnable":
+                continue
+            status, _ = _outcome(bundle["selected"][case_id])
+            if status != "success":
+                raise PublisherError(
+                    "promotion requires every runnable matrix case to succeed "
+                    "in every selected bundle"
+                )
+            prior_statuses = {
+                _outcome(document)[0]
+                for document in bundle["documents"].values()
+                if document["identity"]["case_id"] == case_id
+            }
+            if prior_statuses != {"success"}:
+                raise PublisherError(
+                    "promotion rejects runnable cases with failed, invalid, or diagnostic retries"
+                )
+
+
+def _expected_chip_cohort_count(series: Sequence[dict[str, Any]]) -> int:
+    groups: dict[bytes, set[bytes]] = {}
+    for item in series:
+        control, variant = _public_cohort_factors("chip", item)
+        groups.setdefault(_canonical(control), set()).add(_canonical(variant))
+    return sum(len(variants) >= 2 for variants in groups.values())
+
+
+def _require_promotion_cohorts(
+    cohorts: Sequence[dict[str, Any]], series: Sequence[dict[str, Any]]
+) -> None:
+    eligible_kinds = {
+        cohort["kind"]
+        for cohort in cohorts
+        if cohort["eligibility"]["decision_grade"]
+    }
+    missing = [kind for kind in REQUIRED_COHORT_KINDS if kind not in eligible_kinds]
+    if missing:
+        raise PublisherError(
+            "promotion lacks decision-grade cohort kinds: " + ", ".join(missing)
+        )
+    for kind, expected in REQUIRED_PROMOTION_COHORT_COUNTS.items():
+        members = [cohort for cohort in cohorts if cohort["kind"] == kind]
+        if len(members) != expected or any(
+            not cohort["eligibility"]["decision_grade"] for cohort in members
+        ):
+            raise PublisherError(
+                f"promotion requires exactly {expected} decision-grade {kind} cohorts"
+            )
+
+    chip_cohorts = [cohort for cohort in cohorts if cohort["kind"] == "chip"]
+    expected_chips = _expected_chip_cohort_count(series)
+    if len(chip_cohorts) != expected_chips or any(
+        not cohort["eligibility"]["decision_grade"] for cohort in chip_cohorts
+    ):
+        raise PublisherError(
+            f"promotion requires all {expected_chips} derived chip cohorts to be decision-grade"
+        )
+
+    by_id = {item["series_id"]: item for item in series}
+    anchors = {("uniform", False), ("zipf", False), ("zipf", True)}
+    for cohort in (
+        item for item in cohorts
+        if item["kind"] == "routing" and item["eligibility"]["decision_grade"]
+    ):
+        observed = {
+            (by_id[series_id]["workload"]["routing"], by_id[series_id]["workload"]["eplb"]):
+            by_id[series_id]
+            for series_id in cohort["series_ids"]
+        }
+        if len(cohort["series_ids"]) != len(anchors) or set(observed) != anchors:
+            raise PublisherError(
+                "promotion routing cohorts require exact uniform, zipf, and zipf+EPLB anchors"
+            )
+        if (
+            observed[("uniform", False)]["build"]["implementation_contract_sha256"]
+            != observed[("zipf", False)]["build"]["implementation_contract_sha256"]
+        ):
+            raise PublisherError(
+                "promotion routing cohorts require identical off-EPLB generated implementation"
+            )
+
+
+def _require_promotion_series(series: Sequence[dict[str, Any]]) -> None:
+    if not series or any(item["status"] != "decision-grade" for item in series):
+        raise PublisherError("promotion has unstable or incomplete required series")
+
+
+def build_dataset(
+    store: Store,
+    bundle_ids: Sequence[str],
+    *,
+    promote: bool,
+) -> dict[str, Any]:
+    if not bundle_ids or len(bundle_ids) != len(set(bundle_ids)):
+        raise PublisherError("dataset requires unique explicit bundle IDs")
+    loaded = [load_bundle(store, bundle_id) for bundle_id in bundle_ids]
+    loaded.sort(key=lambda bundle: (
+        int(bundle["manifest"]["run"]["run_id"]),
+        bundle["manifest"]["run"]["run_attempt"],
+        bundle["id"],
+    ))
+    matrix_ids = {bundle["manifest"]["matrix"]["sha256"] for bundle in loaded}
+    case_sets = [{case["case_id"] for case in bundle["cases"]} for bundle in loaded]
+    if len(matrix_ids) != 1 or len({tuple(sorted(values)) for values in case_sets}) != 1:
+        raise PublisherError("dataset bundles do not share one exact requested matrix")
+    run_ids = [bundle["manifest"]["run"]["run_id"] for bundle in loaded]
+    if promote and (
+        len(loaded) != REQUIRED_ALLOCATIONS
+        or len(run_ids) != len(set(run_ids))
+    ):
+        raise PublisherError("promotion requires three independent complete workflow runs")
+    if promote and matrix_ids != {CANONICAL_FULL_V1_MATRIX_SHA256}:
+        raise PublisherError("promotion requires the canonical full-v1 matrix")
+    cases = {case["case_id"]: case for case in loaded[0]["cases"]}
+    if promote:
+        _require_runnable_promotion_success(loaded, cases)
+    all_documents = [
+        document for bundle in loaded for document in bundle["documents"].values()
+    ]
+    selected_ids = {
+        selection["selected_attempt_id"]
+        for bundle in loaded for selection in bundle["manifest"]["coverage"]["selections"]
+    }
+    public_attempts = [
+        _public_attempt(
+            document, selected=document["identity"]["attempt_id"] in selected_ids
+        )
+        for document in all_documents
+    ]
+    _unique([attempt["attempt_id"] for attempt in public_attempts], "dataset attempts")
+    selected_by_case: dict[str, list[dict[str, Any]]] = {
+        case_id: [bundle["selected"][case_id] for bundle in loaded]
+        for case_id in sorted(cases)
+    }
+    coverage: list[dict[str, Any]] = []
+    for case_id, case in sorted(cases.items()):
+        attempts = sorted(
+            (attempt for attempt in public_attempts if attempt["case_id"] == case_id),
+            key=lambda attempt: (
+                int(attempt["run_id"]), attempt["run_attempt"],
+                attempt["attempt_index"], attempt["attempt_id"],
+            ),
+        )
+        selected = _public_attempt(selected_by_case[case_id][-1], selected=True)
+        coverage.append({
+            "case_id": case_id,
+            "label": f"{case['sku'].upper()} / {case['backend']} / EP{case['ep']} / {case['phase']} / {case['routing']}",
+            "required": True,
+            "sku": _slug(case["sku"]),
+            "backend": _slug(case["backend"]),
+            "phase": case["phase"],
+            "disposition": case["_disposition"],
+            "selected_attempt_id": selected["attempt_id"],
+            "outcome": selected["outcome"],
+            "failure_mode": selected["failure_mode"],
+            "reason": case["_reason"] if case["_disposition"] == "unsupported" else selected["reason"],
+            "attempt_ids": [attempt["attempt_id"] for attempt in attempts],
+        })
+    by_series: dict[str, list[dict[str, Any]]] = {}
+    for case_documents in selected_by_case.values():
+        for document in case_documents:
+            if (
+                document["format"] == contracts.RAW_FORMAT
+                and document["outcome"]["status"] == "success"
+            ):
+                by_series.setdefault(document["identity"]["series_id"], []).append(document)
+    series: list[dict[str, Any]] = []
+    internals: dict[str, dict[str, Any]] = {}
+    for series_id, documents in sorted(by_series.items()):
+        item, internal = _build_series(series_id, documents, len(loaded))
+        series.append(item)
+        internals[series_id] = internal
+    cohorts, rankings, recommendations, sensitivities = build_decisions(series, internals)
+    allocation_ids = sorted({attempt["allocation_id"] for attempt in public_attempts})
+    status = "promoted" if promote else "diagnostic"
+    dataset = {
+        "format": FORMAT_PUBLIC,
+        "schema_version": 1,
+        "generated_at": _latest_timestamp(
+            [bundle["manifest"]["created_at"] for bundle in loaded]
+        ),
+        "source_bundle_ids": sorted(bundle_ids),
+        "promotion": {
+            "status": status,
+            "reason": None,
+            "matrix_id": next(iter(matrix_ids)),
+            "allocation_ids": allocation_ids,
+            "required_allocations": REQUIRED_ALLOCATIONS,
+            "requested_cases": len(coverage),
+            "terminal_cases": len(coverage),
+            "policy": POLICY,
+        },
+        "coverage": coverage,
+        "attempts": sorted(public_attempts, key=lambda attempt: attempt["attempt_id"]),
+        "series": series,
+        "cohorts": cohorts,
+        "rankings": rankings,
+        "recommendations": recommendations,
+        "sensitivities": sensitivities,
+    }
+    if promote:
+        _require_promotion_series(series)
+        _require_promotion_cohorts(cohorts, series)
+    validate_public_dataset(dataset)
+    return dataset
+
+
+def _quarantine_dataset(reason: str, generated_at: str) -> dict[str, Any]:
+    dataset = {
+        "format": FORMAT_PUBLIC,
+        "schema_version": 1,
+        "generated_at": generated_at,
+        "source_bundle_ids": [],
+        "promotion": {
+            "status": "quarantined",
+            "reason": reason,
+            "matrix_id": None,
+            "allocation_ids": [],
+            "required_allocations": REQUIRED_ALLOCATIONS,
+            "requested_cases": 0,
+            "terminal_cases": 0,
+            "policy": POLICY,
+        },
+        "coverage": [],
+        "attempts": [],
+        "series": [],
+        "cohorts": [],
+        "rankings": [],
+        "recommendations": [],
+        "sensitivities": [],
+    }
+    validate_public_dataset(dataset)
+    return dataset
+
+
+def quarantine_incoming(
+    store: Store, ingest_id: str, reason: str, generated_at: str
+) -> str:
+    if REASON.fullmatch(reason) is None:
+        raise PublisherError("quarantine reason must be a machine code")
+    public_reason = f"{reason}-{ingest_id}"
+    if REASON.fullmatch(public_reason) is None:
+        raise PublisherError("quarantine reason and incoming ID exceed the public reason contract")
+    manifest = {
+        "format": "collectivex.quarantine.v1",
+        "schema_version": 1,
+        "created_at": generated_at,
+        "incoming_id": ingest_id,
+        "reason": reason,
+    }
+    digest = _sha_bytes(_canonical(manifest))
+    with store.staging(store.quarantine, private=True) as stage:
+        _write_json(stage / "quarantine.json", manifest, mode=0o600)
+        store.complete(stage, digest, private=True)
+        store.install(stage, store.quarantine / digest, private=True)
+    if _sha_bytes(_canonical(strict_load(store.quarantine / digest / "quarantine.json"))) != digest:
+        raise PublisherError("existing quarantine object differs")
+    # The incoming digest distinguishes separate rejected deliveries while preserving
+    # byte-identical output when the operator retries the same immutable input.
+    dataset = _quarantine_dataset(public_reason, generated_at)
+    dataset_digest, size = store.install_dataset(dataset)
+    store.update_channel("latest-attempt", dataset_digest, size, generated_at)
+    return digest
+
+
+def _store_from_args(args: argparse.Namespace) -> Store:
+    root = args.store_root or os.environ.get("COLLECTIVEX_STORE_ROOT")
+    if not root:
+        raise PublisherError("COLLECTIVEX_STORE_ROOT or --store-root is required")
+    if not Path(root).is_absolute():
+        raise PublisherError("COLLECTIVEX_STORE_ROOT must be an absolute path")
+    return Store(root)
+
+
+def _run_metadata(args: argparse.Namespace) -> dict[str, Any]:
+    """Validate offline operator assertions about a completed successful GHA run.
+
+    The publisher deliberately performs no network access. The caller must preflight workflow
+    identity and conclusion against GitHub before supplying these values; artifact-internal
+    provenance is then required to match them exactly.
+    """
+    run = {
+        "repository": args.repository,
+        "run_id": args.run_id,
+        "run_attempt": args.run_attempt,
+        "source_sha": args.source_sha,
+    }
+    # Reuse the authoritative private schema constraints before any filesystem mutation.
+    if not re.fullmatch(r"[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+", run["repository"] or ""):
+        raise PublisherError("--repository must be owner/name")
+    if not re.fullmatch(r"[1-9][0-9]*", run["run_id"] or ""):
+        raise PublisherError("--run-id must be a positive decimal string")
+    if type(run["run_attempt"]) is not int or run["run_attempt"] < 1:
+        raise PublisherError("--run-attempt must be positive")
+    if not re.fullmatch(r"[0-9a-f]{40}", run["source_sha"] or ""):
+        raise PublisherError("--source-sha must be a 40-character lowercase Git SHA")
+    return run
+
+
+def _ingest_inputs(
+    args: argparse.Namespace,
+) -> tuple[dict[str, Any], Path, list[Path]]:
+    run = _run_metadata(args)
+    matrix = Path(args.matrix).absolute()
+    if matrix.is_symlink() or not matrix.is_file():
+        raise PublisherError("--matrix must be a regular non-symlink file")
+    artifacts = [Path(value).absolute() for value in args.artifact]
+    if not artifacts:
+        raise PublisherError("at least one --artifact is required")
+    names = [_artifact_name(path) for path in artifacts]
+    if len(names) != len(set(names)):
+        raise PublisherError("--artifact contains duplicate GHA names")
+    for path in artifacts:
+        if path.is_symlink() or not (path.is_dir() or path.is_file()):
+            raise PublisherError("--artifact must be a regular ZIP or real directory")
+    return run, matrix, artifacts
+
+
+def _bundle_ids(values: Sequence[str], *, promote: bool) -> list[str]:
+    bundle_ids = list(values)
+    if (
+        not bundle_ids
+        or len(bundle_ids) != len(set(bundle_ids))
+        or any(HEX64.fullmatch(value) is None for value in bundle_ids)
+    ):
+        raise PublisherError("bundle IDs must be unique SHA-256 digests")
+    if promote and len(bundle_ids) != REQUIRED_ALLOCATIONS:
+        raise PublisherError("promotion requires exactly three explicit bundle IDs")
+    return bundle_ids
+
+
+def ingest_command(args: argparse.Namespace) -> dict[str, Any]:
+    run, matrix, artifacts = _ingest_inputs(args)
+    store = _store_from_args(args)
+    with store.locked():
+        ingest_id, incoming, _ = archive_incoming(
+            store, matrix, artifacts, run
+        )
+        try:
+            bundle_id, _, _ = build_bundle(store, ingest_id, incoming, run)
+            dataset = build_dataset(store, [bundle_id], promote=False)
+            dataset_id, size = store.install_dataset(dataset)
+            store.update_channel(
+                "latest-attempt", dataset_id, size, dataset["generated_at"]
+            )
+            store.verify_channel("latest-attempt")
+            return {
+                "status": "accepted", "incoming_id": ingest_id,
+                "bundle_id": bundle_id, "dataset_sha256": dataset_id,
+                "channel": "latest-attempt",
+            }
+        except (
+            PublisherError, contracts.ContractError, artifact_safety.ArtifactSafetyError,
+            jsonschema.ValidationError,
+        ) as exc:
+            # Invalid delivery bytes provide no trusted timestamp. A fixed sentinel keeps
+            # repeated quarantine of the same immutable incoming object content-idempotent.
+            generated_at = "1970-01-01T00:00:00Z"
+            quarantine_id = quarantine_incoming(
+                store, ingest_id, "artifact-validation-failed", generated_at
+            )
+            raise PublisherError(
+                f"incoming {ingest_id} quarantined as {quarantine_id}: {exc}"
+            ) from exc
+
+
+def promote_command(args: argparse.Namespace) -> dict[str, Any]:
+    bundle_ids = _bundle_ids(args.bundle, promote=True)
+    store = _store_from_args(args)
+    with store.locked():
+        dataset = build_dataset(store, bundle_ids, promote=True)
+        digest, size = store.install_dataset(dataset)
+        store.update_channel("dev-latest", digest, size, dataset["generated_at"])
+        store.verify_channel("dev-latest")
+    return {
+        "status": "promoted", "bundle_ids": bundle_ids,
+        "dataset_sha256": digest, "channel": "dev-latest",
+    }
+
+
+def verify_command(args: argparse.Namespace) -> dict[str, Any]:
+    bundle_ids = _bundle_ids(args.bundle, promote=False) if args.bundle else []
+    channels = args.channel or ["latest-attempt"]
+    if any(channel not in {"latest-attempt", "dev-latest"} for channel in channels):
+        raise PublisherError("unknown channel")
+    store = _store_from_args(args)
+    if args.channel is None and (store.channels / "dev-latest.json").is_file():
+        channels.append("dev-latest")
+    with store.locked():
+        pointers = {channel: store.verify_channel(channel) for channel in channels}
+        bundles = [load_bundle(store, bundle_id)["id"] for bundle_id in bundle_ids]
+    return {"status": "verified", "channels": pointers, "bundle_ids": bundles}
+
+
+def _parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="CollectiveX isolated filesystem publisher")
+    parser.add_argument("--store-root", help="defaults to COLLECTIVEX_STORE_ROOT")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    ingest = subparsers.add_parser("ingest", help="archive and validate one complete GHA run")
+    ingest.add_argument("--matrix", required=True)
+    ingest.add_argument("--artifact", action="append", required=True)
+    ingest.add_argument("--repository", required=True)
+    ingest.add_argument("--run-id", required=True)
+    ingest.add_argument("--run-attempt", required=True, type=int)
+    ingest.add_argument("--source-sha", required=True)
+    promote = subparsers.add_parser("promote", help="publish explicit independent bundles")
+    promote.add_argument("--bundle", action="append", required=True)
+    verify = subparsers.add_parser("verify", help="verify immutable targets and pointers")
+    verify.add_argument("--channel", action="append", choices=["latest-attempt", "dev-latest"])
+    verify.add_argument("--bundle", action="append", default=[])
+    return parser
+
+
+def main() -> int:
+    args = _parser().parse_args()
+    try:
+        if args.command == "ingest":
+            result = ingest_command(args)
+        elif args.command == "promote":
+            result = promote_command(args)
+        elif args.command == "verify":
+            result = verify_command(args)
+        else:
+            raise PublisherError(f"unknown command {args.command!r}")
+    except (
+        PublisherError, contracts.ContractError, artifact_safety.ArtifactSafetyError,
+        jsonschema.ValidationError, OSError,
+    ) as exc:
+        print(json.dumps({"status": "error", "error": str(exc)}), file=sys.stderr)
+        return 2
+    print(json.dumps(result, sort_keys=True))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/experimental/CollectiveX/requirements.txt b/experimental/CollectiveX/requirements.txt
new file mode 100644
index 0000000000..f68f97d83d
--- /dev/null
+++ b/experimental/CollectiveX/requirements.txt
@@ -0,0 +1,8 @@
+# Host-side matrix generation. GPU libraries are supplied by benchmark images.
+PyYAML==6.0.2
+
+# Canonical workload serialization.
+numpy>=1.26,<3
+
+# Host-only strict artifact publisher schemas (never imported by GPU execution).
+jsonschema==4.25.1
diff --git a/experimental/CollectiveX/runtime/common.sh b/experimental/CollectiveX/runtime/common.sh
new file mode 100644
index 0000000000..3720afcf07
--- /dev/null
+++ b/experimental/CollectiveX/runtime/common.sh
@@ -0,0 +1,1686 @@
+# shellcheck shell=bash
+# CollectiveX — shared launcher helpers (sourced, not executed).
+#
+# Cluster-generic scaffolding only (Slurm/container/build/staging); no
+# model-serving. Logging goes to stderr so functions can `echo` a single
+# result on stdout.
+
+_CX_COMMON_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+CX_SQUASH_FORMAT_VERSION="repro-v1"
+CX_SQUASH_SOURCE_DATE_EPOCH=1
+CX_DEEPEP_V2_COMMIT="fa8a9b16898204afd347c663b89e65ef87dc6ce6" # pragma: allowlist secret
+CX_DEEPEP_V2_TREE="29809e75c5874e6609dac4804e7b651d5226959f" # pragma: allowlist secret
+CX_DEEPEP_V2_FMT_COMMIT="a4c7e17133ee9cb6a2f45545f6e974dd3c393efa" # pragma: allowlist secret
+CX_DEEPEP_HYBRID_COMMIT="e0a5b1d9848ab3e7b4a67842bf06f067bfac67f8" # pragma: allowlist secret
+CX_DEEPEP_HYBRID_TREE="d77aeab7f1bb52b615666fe178d26ced41fae08e" # pragma: allowlist secret
+unset COLLECTIVEX_OPERATOR_CONFIG_LOADED COLLECTIVEX_EPHEMERAL_CONFIG_PATH
+
+cx_log() { printf '[collectivex] %s\n' "$*" >&2; }
+cx_die() { printf '[collectivex] FATAL: %s\n' "$*" >&2; exit 1; }
+
+# Public failure telemetry is a closed vocabulary. Raw scheduler, container,
+# host, and filesystem diagnostics stay in the mode-0600 private logs.
+cx_set_failure_stage() {
+  local stage="$1"
+  case "$stage" in
+    setup|repository-stage|registry-verification|scheduler-allocation|container-import) ;;
+    container-hash|container-launch|backend-setup|execution|artifact-collection) ;;
+    *) cx_die "invalid launcher failure stage" ;;
+  esac
+  export CX_FAILSAFE_MODE="$stage"
+}
+
+cx_fail_stage() {
+  local stage="$1" log_path="${2:-}" diagnostic="unknown"
+  cx_set_failure_stage "$stage"
+  if [ -n "$log_path" ] && [ -f "$log_path" ]; then
+    if grep -aEqi 'no space left|disk quota|quota exceeded' "$log_path"; then
+      diagnostic="storage-capacity"
+    elif grep -aEqi 'permission denied|operation not permitted|read-only file system|source mount (creation|ownership validation|permission inspection|permission normalization|permission validation) failed' "$log_path"; then
+      diagnostic="storage-permission"
+    elif grep -aEqi 'outside one realized LSA domain|lsa(Size| team| domain).*(mismatch|invalid|expected)|ranks.*not in (one|the same) nvlink.domain' "$log_path"; then
+      diagnostic="accelerator-topology"
+    elif grep -aEqi 'cuda driver version is insufficient|call requires newer driver|cudaErrorCallRequiresNewerDriver|CUDA_ERROR_SYSTEM_DRIVER_MISMATCH|unsupported toolchain' "$log_path"; then
+      diagnostic="accelerator-driver"
+    elif grep -aEqi 'ncclDevCommCreate|ncclCommWindowRegister|ncclGetLsa(Device)?Pointer' "$log_path"; then
+      diagnostic="nccl-device-api"
+    elif grep -aEqi 'NVCC (PTX )?compilation failed|cuobjdump failed|invalid device (kernel )?image|no kernel image is available' "$log_path"; then
+      diagnostic="jit-toolchain"
+    elif grep -aEqi 'cuda out of memory|CUDA_ERROR_OUT_OF_MEMORY|out of memory.*cuda' "$log_path"; then
+      diagnostic="accelerator-memory"
+    elif grep -aEqi 'does not match its pinned image contract|requires the exact pinned|version mismatch' "$log_path"; then
+      diagnostic="backend-version"
+    elif grep -aEqi 'nvshmem is unavailable|build-tool installation failed' "$log_path"; then
+      diagnostic="backend-dependency"
+    elif grep -aEqi 'revision fetch failed|submodule fetch failed|package installation failed|staged source is invalid|source (pin resolution|seed validation|seed copy|checkout creation|publication validation|existing source validation) failed' "$log_path"; then
+      diagnostic="backend-source"
+    elif grep -aEqi 'failed to mount|squashfs|enroot|pyxis|mount.*invalid argument|invalid argument.*mount' "$log_path"; then
+      diagnostic="container-runtime"
+    elif grep -aEqi 'backend preparation failed|build (failed|is incomplete)|cache (mount identity )?validation failed|import failed' "$log_path"; then
+      diagnostic="backend-build"
+    elif grep -aEqi 'command not found|not found on this runner|git lookup failed' "$log_path"; then
+      diagnostic="missing-runtime"
+    elif grep -aEqi 'too many requests|rate.?limit' "$log_path"; then
+      diagnostic="registry-rate-limit"
+    elif grep -aEqi 'timed out|operation timeout|wait timeout after|watchdog.*timeout|timeout: sending signal|connection reset|could not resolve|TLS|certificate' "$log_path"; then
+      diagnostic="network-or-timeout"
+    elif grep -aEqi 'salloc:|srun:.*(unable to create step|step creation|invalid partition|invalid account)|unable to create step|job allocation' "$log_path"; then
+      diagnostic="scheduler"
+    elif grep -aEqi 'SHARD done: [0-9]+/[0-9]+ case\(s\) failed|WARN: .* run failed rc=|completed with invalid semantic evidence' "$log_path"; then
+      diagnostic="benchmark-case-failure"
+    elif [ -s "$log_path" ]; then
+      diagnostic="unclassified"
+    else
+      diagnostic="empty-log"
+    fi
+  fi
+  cx_log "ERROR: failure-class=$stage diagnostic=$diagnostic"
+  return 1
+}
+
+# Runner-local deployment settings are strict JSON kept outside the checkout.
+# Only the selected runner's allowlisted values are exported; the document is
+# never sourced or evaluated as shell.
+cx_load_operator_config() {
+  [ -n "${COLLECTIVEX_OPERATOR_CONFIG_LOADED:-}" ] \
+    && [ "$COLLECTIVEX_OPERATOR_CONFIG_LOADED" = "$$" ] && return 0
+  local config_path generated=0 parsed_path config_log key value
+  unset CX_PARTITION CX_ACCOUNT CX_SQUASH_DIR CX_STAGE_DIR CX_ENROOT_CACHE_PATH
+  unset ENROOT_CACHE_PATH
+  unset CX_EXCLUDE_NODES CX_NODELIST CX_LOCK_DIR CX_MASTER_PORT
+  config_path="${COLLECTIVEX_OPERATOR_CONFIG:-${XDG_CONFIG_HOME:-${HOME}/.config}/inferencex/collectivex.json}"
+  if [ -n "${COLLECTIVEX_OPERATOR_CONFIG_CONTENT:-}" ]; then
+    umask 077
+    if [[ "${CX_JOB_ROOT:-}" =~ ^/tmp/inferencex-collectivex-[0-9]+-[0-9]+-[A-Za-z0-9._-]+$ ]] \
+        && [ -d "$CX_JOB_ROOT" ] && [ ! -L "$CX_JOB_ROOT" ] \
+        && [ "$(stat -c '%u:%a' "$CX_JOB_ROOT" 2>/dev/null)" = "$(id -u):700" ]; then
+      config_path="$CX_JOB_ROOT/operator-config.json"
+      (set -C; : > "$config_path") 2>/dev/null \
+        || cx_die "cannot create ephemeral runner configuration"
+    else
+      config_path="$(mktemp /tmp/inferencex-collectivex-config.XXXXXX)" \
+        || cx_die "cannot create ephemeral runner configuration"
+    fi
+    COLLECTIVEX_EPHEMERAL_CONFIG_PATH="$config_path"
+    generated=1
+    if ! printf '%s' "$COLLECTIVEX_OPERATOR_CONFIG_CONTENT" > "$config_path"; then
+      unset COLLECTIVEX_OPERATOR_CONFIG_CONTENT
+      rm -f -- "$config_path"
+      unset COLLECTIVEX_EPHEMERAL_CONFIG_PATH
+      cx_die "cannot materialize runner configuration"
+    fi
+  elif [ "${COLLECTIVEX_OPERATOR_CONFIG_REQUIRED:-0}" = 1 ]; then
+    unset COLLECTIVEX_OPERATOR_CONFIG_CONTENT
+    cx_die "runner configuration is unavailable"
+  fi
+  unset COLLECTIVEX_OPERATOR_CONFIG_CONTENT COLLECTIVEX_OPERATOR_CONFIG_REQUIRED
+  if [ ! -e "$config_path" ]; then
+    COLLECTIVEX_OPERATOR_CONFIG_LOADED="$$"
+    return 0
+  fi
+  umask 077
+  parsed_path="$(mktemp /tmp/inferencex-collectivex-parsed.XXXXXX)" || {
+    [ "$generated" = 0 ] || rm -f -- "$config_path"
+    cx_die "cannot parse runner configuration"
+  }
+  config_log="$(cx_private_log_path operator-config)"
+  if ! python3 - "$config_path" "${CX_RUNNER:-${CX_SHARD_SKU:-${CX_PUBLIC_RUNNER:-}}}" \
+      > "$parsed_path" 2> "$config_log" <<'PY'
+import json
+import os
+import posixpath
+import re
+import stat
+import sys
+
+RUNNERS = {
+    "h100-dgxc", "h200-dgxc", "b200-dgxc", "b300",
+    "gb200", "gb300", "mi325x", "mi355x",
+}
+FIELDS = {
+    "partition": "CX_PARTITION",
+    "account": "CX_ACCOUNT",
+    "squash_dir": "CX_SQUASH_DIR",
+    "stage_dir": "CX_STAGE_DIR",
+    "enroot_cache_path": "CX_ENROOT_CACHE_PATH",
+    "exclude_nodes": "CX_EXCLUDE_NODES",
+    "nodelist": "CX_NODELIST",
+    "lock_dir": "CX_LOCK_DIR",
+}
+REQUIRED = {
+    "h100-dgxc": {"partition", "account", "squash_dir"},
+    "h200-dgxc": {"partition", "squash_dir"},
+    "b200-dgxc": {"partition", "account", "squash_dir"},
+    "b300": {"partition", "account", "squash_dir", "stage_dir"},
+    "gb200": {"partition", "account", "storage_roots"},
+    "gb300": {"partition", "account", "squash_dir", "stage_dir", "enroot_cache_path"},
+    "mi325x": {"partition", "squash_dir"},
+    "mi355x": {"partition", "squash_dir"},
+}
+ALLOWED = {
+    "h100-dgxc": REQUIRED["h100-dgxc"] | {"exclude_nodes", "stage_dir"},
+    "h200-dgxc": REQUIRED["h200-dgxc"] | {"account", "exclude_nodes", "stage_dir"},
+    "b200-dgxc": REQUIRED["b200-dgxc"] | {"exclude_nodes", "stage_dir"},
+    "b300": REQUIRED["b300"] | {"exclude_nodes"},
+    "gb200": REQUIRED["gb200"],
+    "gb300": REQUIRED["gb300"],
+    "mi325x": REQUIRED["mi325x"] | {"exclude_nodes", "nodelist", "stage_dir", "lock_dir"},
+    "mi355x": REQUIRED["mi355x"] | {"exclude_nodes", "nodelist", "stage_dir", "lock_dir"},
+}
+TOKEN = re.compile(r"^[A-Za-z0-9_.\[\],-]+$")
+PATH = re.compile(r"^/[A-Za-z0-9._/+\-]+$")
+IPV4 = re.compile(r"(?<!\d)(?:\d{1,3}\.){3}\d{1,3}(?!\d)")
+
+def pairs(items):
+    result = {}
+    for key, value in items:
+        if key in result:
+            raise ValueError
+        result[key] = value
+    return result
+
+def valid_path(value):
+    return (
+        isinstance(value, str) and len(value) <= 1024 and PATH.fullmatch(value)
+        and posixpath.normpath(value) == value and not IPV4.search(value)
+    )
+
+try:
+    path, runner = sys.argv[1:]
+    if runner not in RUNNERS:
+        raise ValueError
+    metadata = os.lstat(path)
+    if (
+        not stat.S_ISREG(metadata.st_mode) or metadata.st_uid != os.getuid()
+        or stat.S_IMODE(metadata.st_mode) != 0o600 or metadata.st_size > 65536
+    ):
+        raise ValueError
+    flags = os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0)
+    descriptor = os.open(path, flags)
+    try:
+        opened = os.fstat(descriptor)
+        if (opened.st_dev, opened.st_ino) != (metadata.st_dev, metadata.st_ino):
+            raise ValueError
+        payload = b""
+        while len(payload) <= 65536:
+            chunk = os.read(descriptor, 65537 - len(payload))
+            if not chunk:
+                break
+            payload += chunk
+        document = json.loads(
+            payload.decode("utf-8"),
+            object_pairs_hook=pairs,
+            parse_constant=lambda _: (_ for _ in ()).throw(ValueError()),
+        )
+    finally:
+        os.close(descriptor)
+    if (
+        set(document) != {"schema_version", "runners"}
+        or type(document["schema_version"]) is not int
+        or document["schema_version"] != 1
+    ):
+        raise ValueError
+    runners = document["runners"]
+    if (
+        not isinstance(runners, dict) or not runners or set(runners) - RUNNERS
+        or runner not in runners
+    ):
+        raise ValueError
+    selected = None
+    for name, config in runners.items():
+        if not isinstance(config, dict) or not REQUIRED[name].issubset(config):
+            raise ValueError
+        if set(config) - ALLOWED[name]:
+            raise ValueError
+        for field, value in config.items():
+            if field == "storage_roots":
+                if (
+                    not isinstance(value, list) or not 1 <= len(value) <= 16
+                    or len(value) != len(set(value)) or not all(valid_path(item) for item in value)
+                ):
+                    raise ValueError
+            elif field.endswith(("_dir", "_path")):
+                if not valid_path(value):
+                    raise ValueError
+            elif (
+                not isinstance(value, str) or not value or len(value) > 512
+                or not TOKEN.fullmatch(value) or IPV4.search(value)
+            ):
+                raise ValueError
+        if name == runner:
+            selected = dict(config)
+    if selected is None:
+        raise ValueError
+    roots = selected.pop("storage_roots", None)
+    if roots is not None:
+        for root in roots:
+            squash = posixpath.join(root, "collectivex", "containers")
+            stage = posixpath.join(root, "collectivex", "stage")
+            probes = []
+            try:
+                for directory in (squash, stage):
+                    os.makedirs(directory, mode=0o700, exist_ok=True)
+                    probe = posixpath.join(directory, f".write-probe-{os.getpid()}")
+                    fd = os.open(probe, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o600)
+                    os.close(fd)
+                    probes.append(probe)
+                selected.update(squash_dir=squash, stage_dir=stage)
+                break
+            except OSError:
+                pass
+            finally:
+                for probe in probes:
+                    try:
+                        os.unlink(probe)
+                    except OSError:
+                        pass
+        else:
+            raise ValueError
+    for field, value in selected.items():
+        key = FIELDS[field]
+        sys.stdout.buffer.write(key.encode() + b"\0" + value.encode() + b"\0")
+except (KeyError, OSError, TypeError, UnicodeError, ValueError):
+    raise SystemExit(1)
+PY
+  then
+    rm -f -- "$parsed_path"
+    [ "$generated" = 0 ] || rm -f -- "$config_path"
+    unset COLLECTIVEX_EPHEMERAL_CONFIG_PATH
+    unset COLLECTIVEX_OPERATOR_CONFIG COLLECTIVEX_OPERATOR_CONFIG_EPHEMERAL
+    cx_die "runner-local configuration failed"
+  fi
+  while IFS= read -r -d '' key && IFS= read -r -d '' value; do
+    printf -v "$key" '%s' "$value"
+    export "${key?}"
+  done < "$parsed_path"
+  rm -f -- "$parsed_path"
+  if [ "$generated" = 1 ] || [ "${COLLECTIVEX_OPERATOR_CONFIG_EPHEMERAL:-0}" = 1 ]; then
+    rm -f -- "$config_path" || cx_die "cannot remove ephemeral runner configuration"
+  fi
+  unset COLLECTIVEX_EPHEMERAL_CONFIG_PATH
+  unset COLLECTIVEX_OPERATOR_CONFIG COLLECTIVEX_OPERATOR_CONFIG_EPHEMERAL
+  COLLECTIVEX_OPERATOR_CONFIG_LOADED="$$"
+}
+
+cx_private_log_path() {
+  local label="$1" tag="${COLLECTIVEX_EXECUTION_ID:-manual_$$}" path
+  path="$(python3 - "$tag" "$label" <<'PY' 2>/dev/null
+import os
+import re
+import shutil
+import stat
+import sys
+import time
+
+tag, label = sys.argv[1:]
+if not all(re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9._-]*", value) for value in (tag, label)):
+    raise SystemExit(1)
+root = f"/tmp/inferencex-collectivex-{os.getuid()}"
+old_umask = os.umask(0o077)
+flags = os.O_RDONLY | os.O_DIRECTORY | getattr(os, "O_NOFOLLOW", 0)
+try:
+    try:
+        os.mkdir(root, 0o700)
+    except FileExistsError:
+        pass
+    root_fd = os.open(root, flags)
+    try:
+        metadata = os.fstat(root_fd)
+        if metadata.st_uid != os.getuid() or stat.S_IMODE(metadata.st_mode) != 0o700:
+            raise OSError("unsafe root")
+        cutoff = time.time() - 86400
+        for entry in os.scandir(root):
+            try:
+                if (
+                    entry.name != tag and entry.is_dir(follow_symlinks=False)
+                    and entry.stat(follow_symlinks=False).st_mtime < cutoff
+                ):
+                    shutil.rmtree(entry.path)
+            except OSError:
+                pass
+        try:
+            os.mkdir(tag, 0o700, dir_fd=root_fd)
+        except FileExistsError:
+            pass
+        directory_fd = os.open(tag, flags, dir_fd=root_fd)
+        try:
+            metadata = os.fstat(directory_fd)
+            if metadata.st_uid != os.getuid() or stat.S_IMODE(metadata.st_mode) != 0o700:
+                raise OSError("unsafe directory")
+            log_flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | getattr(os, "O_NOFOLLOW", 0)
+            log_fd = os.open(f"{label}.log", log_flags, 0o600, dir_fd=directory_fd)
+            os.close(log_fd)
+        finally:
+            os.close(directory_fd)
+    finally:
+        os.close(root_fd)
+finally:
+    os.umask(old_umask)
+print(f"{root}/{tag}/{label}.log", end="")
+PY
+)" || cx_die "cannot create private runtime log"
+  printf '%s' "$path"
+}
+
+# Manual successes delete diagnostics immediately. Canonical workflow logs survive
+# until artifact upload succeeds; failed logs remain private for debugging, and a
+# later run prunes abandoned directories older than 24 hours.
+cx_cleanup_private_logs() {
+  local rc="$1" tag="${COLLECTIVEX_EXECUTION_ID:-manual_$$}"
+  [ "$rc" = 0 ] || return 0
+  python3 - "$tag" <<'PY' >/dev/null 2>&1 || true
+import os
+import re
+import shutil
+import stat
+import sys
+
+tag = sys.argv[1]
+if not re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9._-]*", tag):
+    raise SystemExit(1)
+root = f"/tmp/inferencex-collectivex-{os.getuid()}"
+flags = os.O_RDONLY | os.O_DIRECTORY | getattr(os, "O_NOFOLLOW", 0)
+root_fd = os.open(root, flags)
+try:
+    metadata = os.fstat(root_fd)
+    if metadata.st_uid != os.getuid() or stat.S_IMODE(metadata.st_mode) != 0o700:
+        raise SystemExit(1)
+finally:
+    os.close(root_fd)
+path = os.path.join(root, tag)
+if os.path.isdir(path) and not os.path.islink(path):
+    shutil.rmtree(path)
+PY
+}
+
+# Explicit Slurm export boundary. Operator config, runner credentials, HOME,
+# workspace paths, and unrelated service secrets never enter the container.
+cx_container_exports() {
+  printf '%s' 'COLLECTIVEX_SOURCE_SHA,COLLECTIVEX_ARTIFACT_NAME,COLLECTIVEX_EXECUTION_ID,COLLECTIVEX_CONTROL_SHA256,COLLECTIVEX_IMAGE,COLLECTIVEX_IMAGE_DIGEST,COLLECTIVEX_IMAGE_DIGEST_VERIFIED,COLLECTIVEX_SQUASH_SHA256,GITHUB_REF_NAME,GITHUB_REF,GITHUB_REPOSITORY,GITHUB_JOB,GITHUB_RUN_ID,GITHUB_RUN_ATTEMPT,GITHUB_SHA,CX_RUNNER,CX_BENCH,CX_NODES,CX_GPUS_PER_NODE,CX_SCALE_UP_DOMAIN,CX_SHARD_FILE,CX_SHARD_SKU,CX_NGPUS,CX_TS,CX_TOPO,CX_TRANSPORT,CX_PHASE,CX_ROUTING,CX_EPLB,CX_CASE_ID,CX_SUITE,CX_WORKLOAD_NAME,CX_REQUIRED_PUBLICATION,CX_HIDDEN,CX_TOPK,CX_EXPERTS,CX_TOKENS_LADDER,CX_CANONICAL,CX_ITERS,CX_TRIALS,CX_WARMUP,CX_SAMPLES_PER_POINT,CX_WARMUP_SEMANTICS,CX_SEED,CX_RUN_TIMEOUT,CX_NCCL_HOME,CX_ALLOW_MNNVL,CX_ATTEMPT_ID,CX_RUNTIME_MARKER,CX_MORI_KERNEL_TYPE,CX_WORKLOAD_DIR,CX_BACKEND_CACHE_ROOT,CX_BACKEND_CACHE_SENTINEL_SHA256,CX_BACKEND_SOURCE_ROOT,NCCL_CUMEM_ENABLE,NCCL_MNNVL_ENABLE,MC_FORCE_MNNVL,MORI_DISABLE_AUTO_XGMI,MORI_ENABLE_SDMA,MORI_APP_LOG_LEVEL,MORI_SHMEM_LOG_LEVEL,MORI_IO_LOG_LEVEL'
+  printf '%s' ',MORI_COMMIT'
+}
+
+# Host-side utility steps need only the basic login paths. They never receive
+# the complete Actions or runner environment.
+cx_host_exports() {
+  printf '%s' 'HOME,PATH,USER,XDG_CACHE_HOME,ENROOT_CACHE_PATH'
+}
+
+cx_prepare_runtime_marker() {
+  local mount_src="$1" tag="${COLLECTIVEX_EXECUTION_ID:-${CX_TS:-}}" marker
+  [[ "$tag" =~ ^[A-Za-z0-9][A-Za-z0-9._-]*$ ]] \
+    || cx_die "cannot create runtime stage marker"
+  marker=".shards/runtime-stage-${tag}.txt"
+  mkdir -p "$mount_src/experimental/CollectiveX/.shards" >/dev/null 2>&1 \
+    || cx_die "cannot create runtime stage marker"
+  rm -f -- "$mount_src/experimental/CollectiveX/$marker" >/dev/null 2>&1 \
+    || cx_die "cannot reset runtime stage marker"
+  export CX_RUNTIME_MARKER="$marker"
+}
+
+cx_write_runtime_stage() {
+  local stage="$1" marker="${CX_RUNTIME_MARKER:-}"
+  [ -n "$marker" ] || return 0
+  [[ "$marker" =~ ^\.shards/runtime-stage-[A-Za-z0-9][A-Za-z0-9._-]*\.txt$ ]] \
+    || return 1
+  case "$stage" in backend-setup|execution) ;; *) return 1 ;; esac
+  printf '%s\n' "$stage" > "$marker"
+}
+
+cx_adopt_runtime_stage() {
+  local mount_src="$1" marker="${CX_RUNTIME_MARKER:-}" stage=""
+  [ -n "$marker" ] || return 0
+  if [[ "$marker" =~ ^\.shards/runtime-stage-[A-Za-z0-9][A-Za-z0-9._-]*\.txt$ ]] \
+      && [ -f "$mount_src/experimental/CollectiveX/$marker" ]; then
+    IFS= read -r stage < "$mount_src/experimental/CollectiveX/$marker" || true
+    rm -f -- "$mount_src/experimental/CollectiveX/$marker" >/dev/null 2>&1 || true
+    case "$stage" in
+      backend-setup|execution) cx_set_failure_stage "$stage" ;;
+    esac
+  fi
+}
+
+cx_require_vars() {
+  local name
+  local -a missing=()
+  for name in "$@"; do
+    [ -n "${!name:-}" ] || missing+=("$name")
+  done
+  [ "${#missing[@]}" -eq 0 ] || cx_die \
+    "missing runner-local configuration: ${missing[*]} (set them in COLLECTIVEX_OPERATOR_CONFIG)"
+}
+
+cx_require_single_node() {
+  [ "${CX_NODES:-1}" = "1" ] || cx_die "$1 supports one-node EP only"
+}
+
+# A set shard path is an execution contract, never a hint. Validate it before
+# staging/allocation and again in-container so a missing or stale control file
+# cannot silently fall back to a manual single-case run.
+cx_validate_shard_control() {
+  local cx_root="$1" shard="${CX_SHARD_FILE:-}" path expected_sku control_sha256
+  [ -n "$shard" ] || return 0
+  expected_sku="${CX_SHARD_SKU:-}"
+  [ -n "$expected_sku" ] || cx_die "CX_SHARD_SKU is required with CX_SHARD_FILE"
+  [ -n "${CX_BENCH:-}" ] || cx_die "CX_BENCH is required with CX_SHARD_FILE"
+  [[ "${CX_NODES:-}" =~ ^[1-9][0-9]*$ ]] \
+    || cx_die "positive CX_NODES is required with CX_SHARD_FILE"
+  path="$shard"
+  [ -f "$path" ] || path="${cx_root%/}/$shard"
+  [ -f "$path" ] || cx_die "shard control does not exist"
+  [ -s "$path" ] || cx_die "shard control is empty"
+  python3 "${cx_root%/}/sweep_matrix.py" \
+    --validate-control "$path" --expect-sku "$expected_sku" \
+    --expect-backend "$CX_BENCH" --expect-nodes "$CX_NODES" >/dev/null 2>&1 \
+    || cx_die "invalid shard control"
+  control_sha256="$(sha256sum "$path" | awk '{print $1}')"
+  [[ "$control_sha256" =~ ^[0-9a-f]{64}$ ]] \
+    || cx_die "cannot hash shard control"
+  export COLLECTIVEX_CONTROL_SHA256="$control_sha256"
+}
+
+cx_apply_timing_profile() {
+  [ -n "${CX_TIMING:-}" ] || return 0
+  local iters trials warmup extra
+  IFS=: read -r iters trials warmup extra <<< "$CX_TIMING"
+  [[ "$iters" =~ ^[1-9][0-9]*$ && "$trials" =~ ^[1-9][0-9]*$ \
+    && "$warmup" =~ ^[1-9][0-9]*$ && -z "$extra" ]] \
+    || cx_die "CX_TIMING must be positive iters:trials:warmup"
+  export CX_ITERS="$iters" CX_TRIALS="$trials" CX_WARMUP="$warmup"
+}
+
+# Allocate via salloc's stable grant message and assign JOB_ID in this shell.
+# Raw scheduler output remains in the bounded private execution log.
+cx_salloc_jobid() {
+  local log job_id salloc_rc=0
+  log="$(cx_private_log_path scheduler-allocation)"
+  CX_ALLOCATION_REQUESTED=1
+  # salloc has no portable --parsable option. Parse the stable grant message
+  # used by the production launchers, while also accepting a bare ID from
+  # site wrappers.
+  salloc "$@" --no-shell > "$log" 2>&1 || salloc_rc=$?
+  job_id="$(sed -nE \
+    -e 's/^([0-9]+)(;[^[:space:]]+)?$/\1/p' \
+    -e 's/.*Granted job allocation ([0-9]+).*/\1/p' \
+    "$log" | head -n1)"
+  if [ -n "$job_id" ]; then
+    [[ "$job_id" =~ ^[0-9]+$ ]] || return 1
+    JOB_ID="$job_id"
+  fi
+  if [ "$salloc_rc" != 0 ]; then
+    cx_fail_stage scheduler-allocation "$log"
+    return 1
+  fi
+  [ -n "$JOB_ID" ] || {
+    cx_fail_stage scheduler-allocation "$log"
+    return 1
+  }
+}
+
+cx_cancel_job() {
+  local job_id="$1" active attempt
+  [[ "$job_id" =~ ^[0-9]+$ ]] || return 1
+  scancel "$job_id" >/dev/null 2>&1 || true
+  for ((attempt = 0; attempt < 60; attempt++)); do
+    if ! active="$(squeue -h -j "$job_id" -o %A 2>/dev/null)"; then
+      sleep 2
+      continue
+    fi
+    [ -n "$active" ] || return 0
+    sleep 2
+  done
+  cx_log "ERROR: scheduled allocation did not terminate during cleanup"
+  return 1
+}
+
+cx_write_cleanup_guard() {
+  local state="$1" root="${CX_JOB_ROOT:-}" safe unsafe
+  [[ "$root" =~ ^/tmp/inferencex-collectivex-[0-9]+-[0-9]+-[A-Za-z0-9._-]+$ ]] \
+    && [ -d "$root" ] && [ ! -L "$root" ] \
+    && [ "$(stat -c '%u:%a' "$root" 2>/dev/null)" = "$(id -u):700" ] || return 0
+  safe="$root/cleanup-safe"
+  unsafe="$root/cleanup-unsafe"
+  umask 077
+  case "$state" in
+    safe) : > "$safe" && rm -f -- "$unsafe" ;;
+    unsafe) rm -f -- "$safe" && : > "$unsafe" ;;
+    *) return 1 ;;
+  esac
+}
+
+# Single multi-arch container for ALL NVIDIA SKUs: tag `v0.5.11-cu130` is an OCI
+# image index covering linux/amd64 (B200) + linux/arm64 (GB200); enroot import
+# pulls the matching arch. (cu130 = CUDA 13, system nccl.h in /usr/include, torch 2.9.x.)
+# Import remains tag-based because Enroot cannot reliably import a digest-qualified
+# Docker Hub reference non-interactively. The registry digest is resolved and checked
+# immediately before import, then recorded as verified provenance.
+CX_IMAGE_MULTIARCH_DIGEST="sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975"
+# (v0.5.12-cu130 was rejected: its 62 layers overflow enroot's overlay-based
+# squash creation on these nodes — "failed to mount overlay ... Invalid argument".
+# v0.5.11-cu130 imports cleanly.)
+# Runtime setup verifies the image-bundled DeepEP build for the detected GPU target.
+CX_IMAGE_MULTIARCH="lmsysorg/sglang:v0.5.11-cu130"
+
+# AMD (ROCm/CDNA): separate single-arch images bundle MoRI.
+CX_IMAGE_AMD_MORI="rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2"
+CX_IMAGE_AMD_MORI_DIGEST="sha256:24c3b30d64475937abbb6498e3b29528649adcb836dde7a468979f767809b0e8"
+CX_MORI_COMMIT_MI355="99bc0a3a6e7a70aacc6372cd9a4275ccfb4de567" # pragma: allowlist secret
+CX_IMAGE_AMD_MORI_MI325="rocm/sgl-dev:sglang-0.5.14-rocm720-mi35x-mori-0701"
+CX_IMAGE_AMD_MORI_MI325_DIGEST="sha256:ea42375343c2ef8f73b3bdb9e1b7b435556e3ca92aba5e3f74ada29ba217fabc"
+CX_MORI_COMMIT_MI325="bf99bdf18fc69887a346913ca01c315c2aa9bd4c" # pragma: allowlist secret
+cx_default_image() {
+  case "$1" in
+    mi325x*) echo "$CX_IMAGE_AMD_MORI_MI325" ;;
+    mi355x*) echo "$CX_IMAGE_AMD_MORI" ;;
+    b200*|gb200*|b300*|gb300*|h100*|h200*) echo "$CX_IMAGE_MULTIARCH" ;;
+    *) cx_die "no default image for runner prefix: $1" ;;
+  esac
+}
+
+cx_resolve_registry_digest() {
+  local image="$1" repository reference token digest registry
+  if [[ "$image" == *@* ]]; then
+    cx_die "digest-qualified image overrides are unsupported; configure a tag and pinned digest"
+  fi
+  registry="${image%%/*}"
+  if [[ "$image" == */* && ( "$registry" == *.* || "$registry" == *:* || "$registry" = localhost ) ]]; then
+    case "$registry" in
+      docker.io|registry-1.docker.io) image="${image#*/}" ;;
+      *) cx_die "only Docker Hub images are supported by the registry verifier" ;;
+    esac
+  fi
+  repository="${image%:*}"
+  reference="${image##*:}"
+  [ "$repository" != "$image" ] || { repository="$image"; reference=latest; }
+  [ -n "$repository" ] && [ -n "$reference" ] \
+    || cx_die "configured image reference is malformed"
+  [[ "$repository" == */* ]] || repository="library/$repository"
+  token="$(curl -fsSLG --connect-timeout 10 --max-time 30 --retry 2 \
+    --retry-delay 1 --retry-all-errors 'https://auth.docker.io/token' \
+    --data-urlencode 'service=registry.docker.io' \
+    --data-urlencode "scope=repository:${repository}:pull" \
+    | python3 -c 'import json,sys; print(json.load(sys.stdin)["token"])')" \
+    || cx_die "cannot authenticate to the image registry"
+  digest="$(curl -fsSI --connect-timeout 10 --max-time 30 --retry 2 \
+    --retry-delay 1 --retry-all-errors \
+    -H "Authorization: Bearer $token" \
+    -H 'Accept: application/vnd.oci.image.index.v1+json, application/vnd.oci.image.manifest.v1+json, application/vnd.docker.distribution.manifest.list.v2+json, application/vnd.docker.distribution.manifest.v2+json' \
+    "https://registry-1.docker.io/v2/${repository}/manifests/${reference}" \
+    | tr -d '\r' | awk 'tolower($1)=="docker-content-digest:" {print $2; exit}')" \
+    || cx_die "cannot resolve the configured image digest"
+  [[ "$digest" =~ ^sha256:[0-9a-f]{64}$ ]] \
+    || cx_die "registry returned an invalid image digest"
+  printf '%s' "$digest"
+}
+
+cx_verify_registry_image() {
+  local image="$1" expected actual
+  expected="${CX_IMAGE_DIGEST:-$(cx_default_image_digest "$image")}"
+  [[ "$expected" =~ ^sha256:[0-9a-f]{64}$ ]] \
+    || cx_die "a pinned digest is required for the configured image"
+  actual="$(cx_resolve_registry_digest "$image")"
+  [ "$actual" = "$expected" ] \
+    || cx_die "configured image tag no longer matches its pinned digest"
+  export COLLECTIVEX_IMAGE="$image" COLLECTIVEX_IMAGE_DIGEST="$actual"
+  export COLLECTIVEX_IMAGE_DIGEST_VERIFIED=1
+}
+
+cx_default_image_digest() {
+  case "$1" in
+    "$CX_IMAGE_MULTIARCH") printf '%s' "$CX_IMAGE_MULTIARCH_DIGEST" ;;
+    "$CX_IMAGE_AMD_MORI") printf '%s' "$CX_IMAGE_AMD_MORI_DIGEST" ;;
+    "$CX_IMAGE_AMD_MORI_MI325") printf '%s' "$CX_IMAGE_AMD_MORI_MI325_DIGEST" ;;
+  esac
+}
+
+# Canonical workflow runs must not inherit benchmark controls from a persistent
+# self-hosted runner service. Manual/SSH diagnostics retain their explicit
+# overrides by leaving COLLECTIVEX_CANONICAL_GHA unset.
+cx_gha_workspace_stage_root() {
+  local workspace="${GITHUB_WORKSPACE:-}"
+  python3 - "$workspace" <<'PY'
+import os
+import stat
+import sys
+
+workspace = sys.argv[1]
+try:
+    if (
+        not os.path.isabs(workspace)
+        or os.path.realpath(workspace) != workspace
+        or not os.path.isdir(workspace)
+    ):
+        raise OSError
+    metadata = os.stat(workspace, follow_symlinks=False)
+    # GitHub runner workspaces are runner-owned but commonly writable by the
+    # trusted runner-service group. Keep the child mode 0700 and reject world write.
+    if metadata.st_uid != os.getuid() or stat.S_IMODE(metadata.st_mode) & stat.S_IWOTH:
+        raise OSError
+except OSError:
+    raise SystemExit(1)
+print(workspace, end="")
+PY
+}
+
+# Create a per-UID cache under validated cluster-local storage. Only the fixed
+# /cx-cache mount enters the container; the operator host path does not.
+cx_prepare_backend_cache() {
+  local stage_parent="$1" cache info sentinel_sha256
+  unset CX_PREPARED_BACKEND_CACHE CX_BACKEND_CACHE_SENTINEL_SHA256
+  info="$(python3 - "$stage_parent" <<'PY'
+import hashlib
+import os
+import secrets
+import stat
+import sys
+
+configured_parent = sys.argv[1]
+try:
+    if (
+        not os.path.isabs(configured_parent)
+        or "\n" in configured_parent
+        or "\r" in configured_parent
+    ):
+        raise OSError
+    parent = os.path.realpath(configured_parent)
+    if not os.path.isdir(parent):
+        raise OSError
+    flags = os.O_RDONLY | os.O_DIRECTORY | getattr(os, "O_NOFOLLOW", 0)
+    parent_fd = os.open(parent, flags)
+    try:
+        probe_name = f".collectivex-owner-probe-{os.getpid()}-{secrets.token_hex(8)}"
+        os.mkdir(probe_name, 0o700, dir_fd=parent_fd)
+        try:
+            probe_fd = os.open(probe_name, flags, dir_fd=parent_fd)
+            try:
+                probe = os.fstat(probe_fd)
+                if stat.S_IMODE(probe.st_mode) & 0o777 != 0o700:
+                    raise OSError
+                realized_owner = probe.st_uid
+            finally:
+                os.close(probe_fd)
+        finally:
+            os.rmdir(probe_name, dir_fd=parent_fd)
+        for generation in (3, 4):
+            name = f".collectivex-backend-cache-v{generation}-{os.getuid()}"
+            try:
+                os.mkdir(name, 0o700, dir_fd=parent_fd)
+            except FileExistsError:
+                pass
+            try:
+                cache_fd = os.open(name, flags, dir_fd=parent_fd)
+                try:
+                    metadata = os.fstat(cache_fd)
+                    if (
+                        metadata.st_uid != realized_owner
+                        or stat.S_IMODE(metadata.st_mode) & 0o777 != 0o700
+                    ):
+                        raise OSError
+                    sentinel_name = ".collectivex-mount-sentinel-v1"
+                    temporary_name = (
+                        f"{sentinel_name}.tmp.{os.getpid()}.{secrets.token_hex(8)}"
+                    )
+                    create_flags = (
+                        os.O_WRONLY | os.O_CREAT | os.O_EXCL
+                        | getattr(os, "O_NOFOLLOW", 0)
+                    )
+                    payload = secrets.token_bytes(32)
+                    temporary_fd = os.open(
+                        temporary_name, create_flags, 0o600, dir_fd=cache_fd
+                    )
+                    try:
+                        try:
+                            view = memoryview(payload)
+                            try:
+                                while view:
+                                    written = os.write(temporary_fd, view)
+                                    if written <= 0:
+                                        raise OSError
+                                    view = view[written:]
+                                os.fsync(temporary_fd)
+                            finally:
+                                view.release()
+                        finally:
+                            os.close(temporary_fd)
+                        try:
+                            os.link(
+                                temporary_name,
+                                sentinel_name,
+                                src_dir_fd=cache_fd,
+                                dst_dir_fd=cache_fd,
+                                follow_symlinks=False,
+                            )
+                        except FileExistsError:
+                            pass
+                    finally:
+                        try:
+                            os.unlink(temporary_name, dir_fd=cache_fd)
+                        except FileNotFoundError:
+                            pass
+                    sentinel_fd = os.open(
+                        sentinel_name,
+                        os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0),
+                        dir_fd=cache_fd,
+                    )
+                    try:
+                        sentinel = os.fstat(sentinel_fd)
+                        payload = os.read(sentinel_fd, 33)
+                        if (
+                            not stat.S_ISREG(sentinel.st_mode)
+                            or sentinel.st_uid != realized_owner
+                            or stat.S_IMODE(sentinel.st_mode) & 0o777 != 0o600
+                            or sentinel.st_size != 32
+                            or len(payload) != 32
+                        ):
+                            raise OSError
+                        sentinel_sha256 = hashlib.sha256(payload).hexdigest()
+                    finally:
+                        os.close(sentinel_fd)
+                finally:
+                    os.close(cache_fd)
+            except OSError:
+                if generation == 3:
+                    continue
+                raise
+            break
+    finally:
+        os.close(parent_fd)
+except OSError:
+    raise SystemExit(1)
+print(sentinel_sha256, os.path.join(parent, name), end="")
+PY
+)" || return 1
+  sentinel_sha256="${info%% *}"
+  cache="${info#* }"
+  [ "$cache" != "$info" ] && [[ "$sentinel_sha256" =~ ^[0-9a-f]{64}$ ]] \
+    && [[ "$cache" = /* ]] || return 1
+  export CX_PREPARED_BACKEND_CACHE="$cache"
+  export CX_BACKEND_CACHE_SENTINEL_SHA256="$sentinel_sha256"
+}
+
+cx_verify_backend_cache_mount() {
+  python3 - "${CX_BACKEND_CACHE_ROOT:-}" \
+    "${CX_BACKEND_CACHE_SENTINEL_SHA256:-}" <<'PY'
+import hashlib
+import os
+import re
+import stat
+import sys
+
+root, expected = sys.argv[1:]
+try:
+    if (
+        not os.path.isabs(root)
+        or os.path.realpath(root) != root
+        or re.fullmatch(r"[0-9a-f]{64}", expected) is None
+    ):
+        raise OSError
+    flags = os.O_RDONLY | os.O_DIRECTORY | getattr(os, "O_NOFOLLOW", 0)
+    root_fd = os.open(root, flags)
+    try:
+        root_item = os.fstat(root_fd)
+        if (
+            not stat.S_ISDIR(root_item.st_mode)
+            or stat.S_IMODE(root_item.st_mode) & 0o777 != 0o700
+        ):
+            raise OSError
+        sentinel_fd = os.open(
+            ".collectivex-mount-sentinel-v1",
+            os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0),
+            dir_fd=root_fd,
+        )
+        try:
+            sentinel = os.fstat(sentinel_fd)
+            payload = os.read(sentinel_fd, 33)
+            if (
+                not stat.S_ISREG(sentinel.st_mode)
+                or sentinel.st_uid != root_item.st_uid
+                or stat.S_IMODE(sentinel.st_mode) & 0o777 != 0o600
+                or sentinel.st_size != 32
+                or len(payload) != 32
+                or hashlib.sha256(payload).hexdigest() != expected
+            ):
+                raise OSError
+        finally:
+            os.close(sentinel_fd)
+    finally:
+        os.close(root_fd)
+except OSError:
+    raise SystemExit(1)
+PY
+}
+
+cx_git() {
+  GIT_CONFIG_NOSYSTEM=1 GIT_CONFIG_GLOBAL=/dev/null GIT_TERMINAL_PROMPT=0 \
+    git -c credential.helper= "$@"
+}
+
+cx_git_in_tree() {
+  local directory="$1" canonical
+  shift
+  [[ "$directory" = /* ]] && [ -d "$directory" ] && [ ! -L "$directory" ] \
+    || return 1
+  [[ "$directory" != *'*'* && "$directory" != *$'\n'* && "$directory" != *$'\r'* ]] \
+    || return 1
+  canonical="$(cd -P -- "$directory" && pwd -P)" || return 1
+  cx_git -c "safe.directory=$canonical" -C "$canonical" "$@"
+}
+
+cx_fetch_revision() {
+  local repository="$1" revision="$2" destination="$3" attempt
+  for attempt in 1 2 3; do
+    rm -rf -- "$destination"
+    if cx_git init -q "$destination" \
+        && cx_git_in_tree "$destination" remote add origin "$repository" \
+        && cx_git_in_tree "$destination" fetch -q --no-tags --depth 1 origin "$revision" \
+        && cx_git_in_tree "$destination" -c advice.detachedHead=false \
+          checkout -q --detach FETCH_HEAD \
+        && [ "$(cx_git_in_tree "$destination" rev-parse HEAD)" = "$revision" ]; then
+      return 0
+    fi
+    [ "$attempt" = 3 ] || sleep $((attempt * 5))
+  done
+  return 1
+}
+
+cx_backend_source_pin() {
+  case "$1" in
+    deepep-v2)
+      printf '%s|%s|%s' \
+        "$CX_DEEPEP_V2_COMMIT" "$CX_DEEPEP_V2_TREE" "$CX_DEEPEP_V2_FMT_COMMIT"
+      ;;
+    deepep-hybrid)
+      printf '%s|%s|' "$CX_DEEPEP_HYBRID_COMMIT" "$CX_DEEPEP_HYBRID_TREE"
+      ;;
+    *) return 1 ;;
+  esac
+}
+
+cx_backend_source_path() {
+  local root="$1" backend="$2" revision tree fmt pin
+  pin="$(cx_backend_source_pin "$backend")" || return 1
+  IFS='|' read -r revision tree fmt <<< "$pin"
+  printf '%s/%s-%s' "$root" "$backend" "$revision"
+}
+
+cx_backend_source_is_valid() {
+  local backend="$1" source="$2" revision tree fmt pin status ignored
+  pin="$(cx_backend_source_pin "$backend")" || return 1
+  IFS='|' read -r revision tree fmt <<< "$pin"
+  [ -d "$source" ] && [ ! -L "$source" ] \
+    && [ "$(cx_git_in_tree "$source" rev-parse HEAD 2>/dev/null)" = "$revision" ] \
+    && [ "$(cx_git_in_tree "$source" rev-parse 'HEAD^{tree}' 2>/dev/null)" = "$tree" ] \
+    || return 1
+  status="$(cx_git_in_tree "$source" status --porcelain --untracked-files=all \
+    --ignore-submodules=none 2>/dev/null)" || return 1
+  [ -z "$status" ] || return 1
+  ignored="$(cx_git_in_tree "$source" ls-files --others --ignored --exclude-standard \
+    2>/dev/null)" || return 1
+  [ -z "$ignored" ] || return 1
+  [ -z "$fmt" ] \
+    || [ "$(cx_git_in_tree "$source/third-party/fmt" rev-parse HEAD 2>/dev/null)" = "$fmt" ]
+}
+
+cx_extension_pair_sha256() {
+  python3 - "$1" "$2" "$3" <<'PY'
+import hashlib
+import os
+from pathlib import Path
+import stat
+import sys
+
+root = Path(sys.argv[1])
+digest = hashlib.sha256()
+try:
+    if root.is_symlink() or not root.is_dir():
+        raise OSError
+    for pattern in sys.argv[2:]:
+        matches = list(root.glob(pattern))
+        if len(matches) != 1 or matches[0].is_symlink():
+            raise OSError
+        path = matches[0]
+        descriptor = os.open(path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+        try:
+            metadata = os.fstat(descriptor)
+            if not stat.S_ISREG(metadata.st_mode):
+                raise OSError
+            file_digest = hashlib.sha256()
+            with os.fdopen(descriptor, "rb", closefd=False) as stream:
+                for chunk in iter(lambda: stream.read(1024 * 1024), b""):
+                    file_digest.update(chunk)
+            digest.update(path.name.encode("utf-8") + b"\0")
+            digest.update(str(metadata.st_size).encode("ascii") + b"\0")
+            digest.update(file_digest.digest())
+        finally:
+            os.close(descriptor)
+except (OSError, UnicodeError):
+    raise SystemExit(1)
+print(digest.hexdigest(), end="")
+PY
+}
+
+# Acquire source before compute allocation, preferring the verified same-run GHA seed.
+_cx_prepare_backend_source() {
+  local mount_src="$1" backend="$2" root source temporary revision tree fmt pin
+  local root_mode stage_mode root_owner stage_owner
+  local seed_root="${CX_BACKEND_SOURCE_SEED_ROOT:-}" seed seed_mode
+  root="$mount_src/experimental/CollectiveX/.cx_sources"
+  CX_BACKEND_SOURCE_STEP="source mount creation"
+  if [ ! -e "$root" ] && [ ! -L "$root" ]; then
+    mkdir -m 700 -- "$root" || return 1
+  fi
+  CX_BACKEND_SOURCE_STEP="source mount ownership validation"
+  [ -d "$mount_src" ] && [ ! -L "$mount_src" ] \
+    && [ -d "$root" ] && [ ! -L "$root" ] || return 1
+  stage_owner="$(stat -c '%u' "$mount_src" 2>/dev/null)" || return 1
+  root_owner="$(stat -c '%u' "$root" 2>/dev/null)" || return 1
+  [ "$root_owner" = "$stage_owner" ] || return 1
+  stage_mode="$(stat -c '%a' "$mount_src" 2>/dev/null)" || return 1
+  case "$stage_mode" in 700|[1-7]700) ;; *) return 1 ;; esac
+  # Shared stage parents may retain harmless special bits despite mkdir -m.
+  CX_BACKEND_SOURCE_STEP="source mount permission inspection"
+  root_mode="$(stat -c '%a' "$root" 2>/dev/null)" || return 1
+  case "$root_mode" in
+    700|[1-7]700) ;;
+    *)
+      CX_BACKEND_SOURCE_STEP="source mount permission normalization"
+      chmod 700 "$root" || return 1
+      CX_BACKEND_SOURCE_STEP="source mount permission validation"
+      root_mode="$(stat -c '%a' "$root" 2>/dev/null)" || return 1
+      case "$root_mode" in 700|[1-7]700) ;; *) return 1 ;; esac
+      ;;
+  esac
+  CX_BACKEND_SOURCE_STEP="git lookup"
+  command -v git >/dev/null || return 1
+  CX_BACKEND_SOURCE_STEP="source pin resolution"
+  source="$(cx_backend_source_path "$root" "$backend")" || return 1
+  if [ -e "$source" ] || [ -L "$source" ]; then
+    CX_BACKEND_SOURCE_STEP="existing source validation"
+    cx_backend_source_is_valid "$backend" "$source"
+    return
+  fi
+  if [ -n "$seed_root" ]; then
+    CX_BACKEND_SOURCE_STEP="source seed validation"
+    [[ "$seed_root" = /* ]] && [ -d "$seed_root" ] && [ ! -L "$seed_root" ] \
+      || return 1
+    seed_mode="$(stat -c '%a' "$seed_root" 2>/dev/null)" || return 1
+    case "$seed_mode" in 700|[1-7]700) ;; *) return 1 ;; esac
+    seed="$(cx_backend_source_path "$seed_root" "$backend")" || return 1
+    cx_backend_source_is_valid "$backend" "$seed" || return 1
+    CX_BACKEND_SOURCE_STEP="source seed copy"
+    temporary="$(mktemp -d "$root/.${backend}.XXXXXX")" || return 1
+    if ! cp -R -- "$seed/." "$temporary/" \
+        || ! cx_backend_source_is_valid "$backend" "$temporary" \
+        || ! mv -- "$temporary" "$source"; then
+      rm -rf -- "$temporary"
+      return 1
+    fi
+    return
+  fi
+  if [ "${COLLECTIVEX_CANONICAL_GHA:-0}" = 1 ]; then
+    CX_BACKEND_SOURCE_STEP="source seed validation"
+    return 1
+  fi
+  CX_BACKEND_SOURCE_STEP="source checkout creation"
+  temporary="$(mktemp -d "$root/.${backend}.XXXXXX")" || return 1
+  CX_BACKEND_SOURCE_STEP="source pin resolution"
+  pin="$(cx_backend_source_pin "$backend")" || {
+    rm -rf -- "$temporary"
+    return 1
+  }
+  IFS='|' read -r revision tree fmt <<< "$pin"
+  CX_BACKEND_SOURCE_STEP="revision fetch"
+  if ! cx_fetch_revision \
+      https://github.com/deepseek-ai/DeepEP "$revision" "$temporary"; then
+    rm -rf -- "$temporary"
+    return 1
+  fi
+  CX_BACKEND_SOURCE_STEP="submodule fetch"
+  if [ -n "$fmt" ] && ! cx_git_in_tree "$temporary" \
+      -c "safe.directory=$temporary/third-party/fmt" \
+      submodule update -q --init --depth 1 third-party/fmt; then
+    rm -rf -- "$temporary"
+    return 1
+  fi
+  CX_BACKEND_SOURCE_STEP="source publication validation"
+  if ! cx_backend_source_is_valid "$backend" "$temporary" \
+      || ! mv -- "$temporary" "$source"; then
+    rm -rf -- "$temporary"
+    return 1
+  fi
+}
+
+cx_prepare_backend_source() {
+  local log backend="$2" CX_BACKEND_SOURCE_STEP="initialization"
+  log="$(cx_private_log_path "backend-source-$backend")" || return 1
+  if _cx_prepare_backend_source "$@" > "$log" 2>&1; then
+    return 0
+  fi
+  printf '%s failed\n' "$CX_BACKEND_SOURCE_STEP" >> "$log"
+  cx_log "ERROR: backend-source-step=${CX_BACKEND_SOURCE_STEP// /-}"
+  cx_fail_stage backend-setup "$log"
+}
+
+cx_materialize_backend_source() {
+  local backend="$1" destination="$2" source parent temporary
+  [ -n "${CX_BACKEND_SOURCE_ROOT:-}" ] || return 1
+  source="$(cx_backend_source_path "$CX_BACKEND_SOURCE_ROOT" "$backend")" || return 1
+  cx_backend_source_is_valid "$backend" "$source" || return 1
+  parent="${destination%/*}"
+  [ "$parent" != "$destination" ] && [ -d "$parent" ] && [ ! -L "$parent" ] \
+    || return 1
+  temporary="$(mktemp -d "$parent/.collectivex-source.XXXXXX")" || return 1
+  if ! cp -R -- "$source/." "$temporary/" \
+      || ! cx_backend_source_is_valid "$backend" "$temporary"; then
+    rm -rf -- "$temporary"
+    return 1
+  fi
+  if ! rm -rf -- "$destination" || ! mv -- "$temporary" "$destination"; then
+    rm -rf -- "$temporary"
+    return 1
+  fi
+  if ! cx_backend_source_is_valid "$backend" "$destination"; then
+    rm -rf -- "$destination"
+    return 1
+  fi
+  return 0
+}
+
+cx_lock_canonical_gha_env() {
+  local runner="$1" expected_nodes expected_gpn expected_world trusted_lock_dir=""
+  [ "${COLLECTIVEX_CANONICAL_GHA:-0}" = 1 ] || return 0
+  [ "${GITHUB_ACTIONS:-}" = true ] \
+    || cx_die "canonical CollectiveX execution requires GitHub Actions"
+  [ -n "${CX_SHARD_FILE:-}" ] && [ "${CX_SHARD_SKU:-}" = "$runner" ] \
+    || cx_die "canonical CollectiveX execution requires a matched shard"
+  [[ "${GITHUB_RUN_ID:-}" =~ ^[1-9][0-9]*$ \
+    && "${GITHUB_RUN_ATTEMPT:-}" =~ ^[1-9][0-9]*$ \
+    && "${COLLECTIVEX_SOURCE_SHA:-}" =~ ^[0-9a-f]{40,64}$ ]] \
+    || cx_die "canonical CollectiveX workflow identity is incomplete"
+
+  # cx_load_operator_config clears inherited values before setting this process marker.
+  # Preserve only its validated AMD lock path; direct runner-service values stay untrusted.
+  [ "${COLLECTIVEX_OPERATOR_CONFIG_LOADED:-}" != "$$" ] \
+    || trusted_lock_dir="${CX_LOCK_DIR:-}"
+  unset CX_NCCL_HOME CX_MASTER_PORT CX_MORI_KERNEL_TYPE CX_LOCK_DIR
+  unset MORI_COMMIT MORI_DISABLE_AUTO_XGMI MORI_ENABLE_SDMA
+  unset MORI_APP_LOG_LEVEL MORI_SHMEM_LOG_LEVEL MORI_IO_LOG_LEVEL
+  unset NCCL_CUMEM_ENABLE NCCL_MNNVL_ENABLE MC_FORCE_MNNVL
+  unset CX_BACKEND_CACHE_ROOT CX_BACKEND_CACHE_SENTINEL_SHA256
+  unset CX_PREPARED_BACKEND_CACHE CX_BACKEND_SOURCE_ROOT
+
+  [ -n "${CX_SQUASH_DIR:-}" ] \
+    || cx_die "canonical CollectiveX execution requires shared container storage"
+
+  case "$runner" in
+    h100-dgxc|h200-dgxc|b200-dgxc|b300)
+      expected_nodes=1; expected_gpn=8
+      CX_IMAGE="$CX_IMAGE_MULTIARCH"
+      CX_IMAGE_DIGEST="$CX_IMAGE_MULTIARCH_DIGEST"
+      CX_NCCL_HOME=/usr
+      ;;
+    gb200|gb300)
+      expected_nodes="${CX_NODES:-}"; expected_gpn=4
+      [ "$expected_nodes" = 1 ] || [ "$expected_nodes" = 2 ] \
+        || cx_die "canonical GB execution requires one or two trays"
+      CX_IMAGE="$CX_IMAGE_MULTIARCH"
+      CX_IMAGE_DIGEST="$CX_IMAGE_MULTIARCH_DIGEST"
+      CX_NCCL_HOME=/usr
+      CX_MASTER_PORT=29551
+      ;;
+    mi325x)
+      expected_nodes=1; expected_gpn=8
+      CX_STAGE_DIR="$(cx_gha_workspace_stage_root)" \
+        || cx_die "canonical AMD staging workspace is unsafe"
+      CX_IMAGE="$CX_IMAGE_AMD_MORI_MI325"
+      CX_IMAGE_DIGEST="$CX_IMAGE_AMD_MORI_MI325_DIGEST"
+      CX_MORI_KERNEL_TYPE=asyncll
+      MORI_COMMIT="$CX_MORI_COMMIT_MI325"
+      MORI_DISABLE_AUTO_XGMI=0
+      MORI_ENABLE_SDMA=1
+      MORI_APP_LOG_LEVEL=info
+      MORI_SHMEM_LOG_LEVEL=info
+      MORI_IO_LOG_LEVEL=info
+      ;;
+    mi355x)
+      expected_nodes=1; expected_gpn=8
+      CX_STAGE_DIR="$(cx_gha_workspace_stage_root)" \
+        || cx_die "canonical AMD staging workspace is unsafe"
+      CX_IMAGE="$CX_IMAGE_AMD_MORI"
+      CX_IMAGE_DIGEST="$CX_IMAGE_AMD_MORI_DIGEST"
+      CX_MORI_KERNEL_TYPE=intranode
+      MORI_COMMIT="$CX_MORI_COMMIT_MI355"
+      ;;
+    *) cx_die "canonical CollectiveX runner is not registered" ;;
+  esac
+  case "$runner:$trusted_lock_dir" in
+    mi325x:?*|mi355x:?*) export CX_LOCK_DIR="$trusted_lock_dir" ;;
+  esac
+  CX_STAGE_DIR="${CX_STAGE_DIR:-$CX_SQUASH_DIR/.stage}"
+  export CX_STAGE_DIR
+  [ "${CX_NODES:-}" = "$expected_nodes" ] \
+    && [ "${CX_GPUS_PER_NODE:-}" = "$expected_gpn" ] \
+    || cx_die "canonical CollectiveX placement differs from the shard"
+  expected_world=$((expected_nodes * expected_gpn))
+  CX_NGPUS="$expected_world"
+  CX_SEED=67
+  case "$runner" in mi325x|mi355x) CX_RUN_TIMEOUT=1800 ;; *) CX_RUN_TIMEOUT=900 ;; esac
+  unset CX_PUBLIC_RUNNER CX_GB_PRODUCT CX_DRYRUN CX_TIMING CX_ALLOW_MNNVL
+  unset CX_ENROOT_LOCAL_IMPORT COLLECTIVEX_IMAGE COLLECTIVEX_IMAGE_DIGEST
+  unset COLLECTIVEX_IMAGE_DIGEST_VERIFIED COLLECTIVEX_SQUASH_SHA256
+  export CX_IMAGE CX_IMAGE_DIGEST CX_NGPUS CX_SEED CX_RUN_TIMEOUT
+  case "$runner" in
+    h100-dgxc|h200-dgxc|b200-dgxc|b300) export CX_NCCL_HOME ;;
+    gb200|gb300) export CX_NCCL_HOME CX_MASTER_PORT ;;
+    mi325x)
+      export CX_MORI_KERNEL_TYPE MORI_COMMIT MORI_DISABLE_AUTO_XGMI MORI_ENABLE_SDMA
+      export MORI_APP_LOG_LEVEL MORI_SHMEM_LOG_LEVEL MORI_IO_LOG_LEVEL
+      ;;
+    mi355x) export CX_MORI_KERNEL_TYPE MORI_COMMIT ;;
+  esac
+}
+
+cx_reverify_registry_image() {
+  local image="$1" actual
+  [[ "${COLLECTIVEX_IMAGE_DIGEST:-}" =~ ^sha256:[0-9a-f]{64}$ ]] \
+    && [ "${COLLECTIVEX_IMAGE_DIGEST_VERIFIED:-0}" = 1 ] || return 1
+  actual="$(cx_resolve_registry_digest "$image")" || return 1
+  [ "$actual" = "$COLLECTIVEX_IMAGE_DIGEST" ] || {
+    cx_log "ERROR: configured image tag changed during container import"
+    return 1
+  }
+}
+
+cx_export_squash_identity() {
+  local image="$1" digest log
+  log="$(cx_private_log_path container-hash)"
+  digest="$(sha256sum "$image" 2>> "$log" | awk '{print $1}')"
+  [[ "$digest" =~ ^[0-9a-f]{64}$ ]] \
+    || { cx_fail_stage container-hash "$log"; return 1; }
+  export COLLECTIVEX_SQUASH_SHA256="$digest"
+}
+
+cx_squash_path() {
+  local squash_dir="$1" image="$2" key platform
+  [[ "${COLLECTIVEX_IMAGE_DIGEST:-}" =~ ^sha256:[0-9a-f]{64}$ ]] \
+    || return 1
+  case "${CX_IMAGE_PLATFORM:-}" in
+    linux/amd64) platform="" ;;
+    linux/arm64) platform="_linux_arm64" ;;
+    *) return 1 ;;
+  esac
+  key="${CX_SQUASH_FORMAT_VERSION}${platform}_${COLLECTIVEX_IMAGE_DIGEST#sha256:}_$(
+    printf '%s' "$image" | sed 's#[/:@#]#_#g'
+  )"
+  printf '%s' "$squash_dir/${key}.sqsh"
+}
+
+# cx_ensure_squash <squash_dir> <image>  ->  echoes the squash file path.
+# Imports via Enroot only if a valid squash is not already present, under a lock.
+cx_ensure_squash() {
+  local squash_dir="$1" image="$2" key sq locks lock_fd log
+  local enroot_local="" import_rc=0 machine
+  log="$(cx_private_log_path container-import)"
+  machine="$(uname -m)"
+  case "${CX_IMAGE_PLATFORM:-}:$machine" in
+    linux/amd64:x86_64|linux/amd64:amd64|linux/arm64:aarch64|linux/arm64:arm64) ;;
+    *) cx_fail_stage container-import "$log"; return 1 ;;
+  esac
+  mkdir -p "$squash_dir" 2>> "$log" \
+    || { cx_fail_stage container-import "$log"; return 1; }
+  sq="$(cx_squash_path "$squash_dir" "$image")" \
+    || { cx_fail_stage container-import "$log"; return 1; }
+  key="${sq##*/}"
+  key="${key%.sqsh}"
+  locks="$squash_dir/.locks"
+  mkdir -p "$locks" 2>> "$log" \
+    || { cx_fail_stage container-import "$log"; return 1; }
+  { exec {lock_fd}>"$locks/${key}.lock"; } 2>> "$log" \
+    || { cx_fail_stage container-import "$log"; return 1; }
+  flock -w 900 "$lock_fd" 2>> "$log" \
+    || { cx_fail_stage container-import "$log"; return 1; }
+  if unsquashfs -l "$sq" >/dev/null 2>&1; then
+    cx_log "container squash ready"
+  else
+    cx_log "importing configured container image"
+    rm -f "$sq" 2>> "$log" \
+      || { cx_fail_stage container-import "$log"; return 1; }
+    # </dev/null: never block on an interactive password prompt.
+    if [ "${CX_ENROOT_LOCAL_IMPORT:-0}" = 1 ]; then
+      enroot_local="$(mktemp -d /tmp/inferencex-collectivex-enroot.XXXXXX)" \
+        || { cx_fail_stage container-import "$log"; return 1; }
+      (
+        trap 'rm -rf -- "$enroot_local"' EXIT
+        export ENROOT_TEMP_PATH="$enroot_local/tmp"
+        export ENROOT_CACHE_PATH="$enroot_local/cache"
+        export ENROOT_DATA_PATH="$enroot_local/data"
+        export ENROOT_RUNTIME_PATH="$enroot_local/run"
+        mkdir -p "$ENROOT_TEMP_PATH" "$ENROOT_CACHE_PATH" \
+          "$ENROOT_DATA_PATH" "$ENROOT_RUNTIME_PATH"
+        SOURCE_DATE_EPOCH="$CX_SQUASH_SOURCE_DATE_EPOCH" \
+          enroot import -o "$sq" "docker://$image" </dev/null
+      ) >> "$log" 2>&1 || import_rc=$?
+      rm -rf -- "$enroot_local" >/dev/null 2>&1 || true
+      [ "$import_rc" = 0 ] \
+        || { cx_fail_stage container-import "$log"; return 1; }
+    else
+      SOURCE_DATE_EPOCH="$CX_SQUASH_SOURCE_DATE_EPOCH" \
+        enroot import -o "$sq" "docker://$image" </dev/null >> "$log" 2>&1 \
+        || { cx_fail_stage container-import "$log"; return 1; }
+    fi
+    unsquashfs -l "$sq" >> "$log" 2>&1 \
+      || { cx_fail_stage container-import "$log"; return 1; }
+  fi
+  if ! cx_reverify_registry_image "$image" >> "$log" 2>&1; then
+    flock -u "$lock_fd" >/dev/null 2>&1 || true
+    exec {lock_fd}>&-
+    cx_fail_stage container-import "$log"
+    return 1
+  fi
+  flock -u "$lock_fd"
+  exec {lock_fd}>&-
+  echo "$sq"
+}
+
+# Import on an allocated compute node so multiarch tags resolve for the target
+# architecture. The squash directory must be shared with the submit host.
+cx_ensure_squash_on_job() {
+  local job_id="$1" squash_dir="$2" image="$3" lock_dir="${4:-}" sq key lock log
+  [[ "$job_id" =~ ^[0-9]+$ ]] || return 1
+  sq="$(cx_squash_path "$squash_dir" "$image")" || return 1
+  key="${sq##*/}"
+  key="${key%.sqsh}"
+  [ -n "$lock_dir" ] || lock_dir="$squash_dir/.locks"
+  lock="$lock_dir/${key}.lock"
+  log="$(cx_private_log_path container-import)"
+  if ! srun --jobid="$job_id" --nodes=1 --ntasks=1 --chdir=/tmp \
+      --export="$(cx_host_exports)" \
+      bash -s -- "$sq" "$lock" "$image" "$CX_SQUASH_SOURCE_DATE_EPOCH" \
+      "$CX_IMAGE_PLATFORM" \
+      > "$log" 2>&1 <<'BASH'
+set -euo pipefail
+sq="$1"; lock="$2"; image="$3"; source_date_epoch="$4"; platform="$5"
+machine="$(uname -m)"
+case "$platform:$machine" in
+  linux/amd64:x86_64|linux/amd64:amd64|linux/arm64:aarch64|linux/arm64:arm64) ;;
+  *) exit 13 ;;
+esac
+compute_home="$(mktemp -d /tmp/inferencex-collectivex-home.XXXXXX)"
+trap 'rm -rf -- "$compute_home"' EXIT
+export HOME="$compute_home" XDG_CACHE_HOME="$compute_home/.cache"
+export ENROOT_TEMP_PATH="$compute_home/enroot-tmp"
+export ENROOT_CACHE_PATH="$compute_home/enroot-cache"
+export ENROOT_DATA_PATH="$compute_home/enroot-data"
+export ENROOT_RUNTIME_PATH="$compute_home/enroot-run"
+mkdir -p "$(dirname "$sq")" "$(dirname "$lock")" \
+  "$ENROOT_TEMP_PATH" "$ENROOT_CACHE_PATH" "$ENROOT_DATA_PATH" "$ENROOT_RUNTIME_PATH"
+exec 9>"$lock"
+flock -w 900 9
+if unsquashfs -l "$sq" >/dev/null 2>&1; then
+  echo 'container squash ready'
+else
+  rm -f -- "$sq"
+  SOURCE_DATE_EPOCH="$source_date_epoch" \
+    enroot import -o "$sq" "docker://$image" </dev/null
+  unsquashfs -l "$sq" >/dev/null 2>&1
+fi
+BASH
+  then
+    cx_fail_stage container-import "$log"
+    return 1
+  fi
+  if ! cx_reverify_registry_image "$image" >> "$log" 2>&1; then
+    cx_fail_stage container-import "$log"
+    return 1
+  fi
+  printf '%s' "$sq"
+}
+
+cx_preflight_allocation() {
+  local job_id="$1" nodes="$2" mount_src="$3" squash="$4" shard="${5:-}"
+  local log rc=0 runtime shard_path=""
+  runtime="$mount_src/experimental/CollectiveX/runtime/run_in_container.sh"
+  [ -z "$shard" ] || shard_path="$mount_src/experimental/CollectiveX/$shard"
+  log="$(cx_private_log_path allocation-preflight)"
+  srun --jobid="$job_id" --nodes="$nodes" --ntasks="$nodes" --ntasks-per-node=1 \
+    --chdir=/tmp \
+    --export="$(cx_host_exports)" bash -s -- "$runtime" "$shard_path" "$squash" \
+    "$CX_IMAGE_PLATFORM" \
+    > "$log" 2>&1 <<'BASH' || rc=$?
+set -euo pipefail
+machine="$(uname -m)"
+case "$4:$machine" in
+  linux/amd64:x86_64|linux/amd64:amd64|linux/arm64:aarch64|linux/arm64:arm64) ;;
+  *) exit 13 ;;
+esac
+test -r "$1" || exit 10
+[ -z "$2" ] || test -r "$2" || exit 11
+test -r "$3" || exit 12
+unsquashfs -s "$3" >/dev/null 2>&1 || exit 12
+BASH
+  [ "$rc" = 0 ] && return 0
+  case "$rc" in
+    10|11) cx_fail_stage repository-stage "$log" ;;
+    12) cx_fail_stage container-hash "$log" ;;
+    *) cx_fail_stage container-launch "$log" ;;
+  esac
+  return 1
+}
+
+# cx_stage_repo <repo_root> <stage_dir>  ->  echoes the mount-source root.
+# Stage only the public benchmark tree onto compute-visible storage. Canonical
+# GHA requires an operator-configured base; manual diagnostics use an isolated
+# directory under the already-required squash storage so ignored private notes
+# are never mounted into a compute container.
+cx_stage_repo() {
+  local repo_root="$1" stage_dir="${2:-}" log tag safe_tag
+  cx_validate_shard_control "$repo_root/experimental/CollectiveX"
+  if [ "${COLLECTIVEX_CANONICAL_GHA:-0}" = 1 ] \
+      && { [ -z "$stage_dir" ] || [ "$stage_dir" = "$repo_root" ]; }; then
+    cx_die "canonical CollectiveX execution requires compute-visible staging"
+  fi
+  # Concurrency isolation. Under GHA the per-config concurrency fan-out runs many
+  # same-SKU dispatches at once, all staging into the SAME shared base dir; a
+  # shared dir + `rsync --delete` lets one job unlink/replace a file a peer is
+  # mid-read of -> "error reading input file: Stale file handle" on the next
+  # `srun ... run_in_container.sh`. Give each EXECUTING job its own subdir keyed on
+  # a workflow-provided execution id. Manual runs use the launcher PID.
+  tag="${COLLECTIVEX_EXECUTION_ID:-${GITHUB_RUN_ID:-manual-$$}}"
+  safe_tag="$(printf '%s' "$tag" | tr -c 'A-Za-z0-9._-' '_')"
+  if [ -z "$stage_dir" ] || [ "$stage_dir" = "$repo_root" ]; then
+    [ -n "${CX_SQUASH_DIR:-}" ] \
+      || cx_die "manual CollectiveX staging requires CX_SQUASH_DIR"
+    stage_dir="${CX_SQUASH_DIR%/}/.collectivex-stage-$safe_tag"
+  else
+    stage_dir="${stage_dir%/}/job_$safe_tag"
+  fi
+  mkdir -p "${stage_dir%/*}" 2>/dev/null \
+    || cx_die "cannot create the configured stage base"
+  if [ -e "$stage_dir" ] || [ -L "$stage_dir" ]; then
+    cx_die "refusing to reuse a pre-existing execution stage"
+  fi
+  mkdir -m 700 "$stage_dir" 2>/dev/null \
+    || cx_die "cannot create the configured stage directory"
+  mkdir -m 700 "$stage_dir/experimental" 2>/dev/null \
+    || cx_die "cannot create the configured stage directory"
+  cx_log "staging CollectiveX on compute-visible storage"
+  log="$(cx_private_log_path repository-stage)"
+  if ! rsync -a --delete --delete-excluded \
+      --exclude='__pycache__/' --exclude='results/' --exclude='.cx_workloads/' \
+      --exclude='.cx_backend/' --exclude='.cx_sources/' \
+      --exclude='configs/platforms.yaml' --exclude='private-infra.md' \
+      --exclude='goal.md' --exclude='notes.md' \
+      "$repo_root/experimental/CollectiveX" "$stage_dir/experimental/" > "$log" 2>&1; then
+    rm -rf -- "$stage_dir" >/dev/null 2>&1 \
+      || cx_log "ERROR: cannot remove the incomplete execution stage"
+    cx_fail_stage repository-stage "$log" || true
+    return 1
+  fi
+  echo "$stage_dir"
+}
+
+# cx_collect_results <mount_src> <repo_root>
+# When the run used a staged (compute-visible) mount, copy result JSONs back to
+# the original checkout's results/ so the workflow's upload-artifact (which reads
+# the checkout, not the stage dir) finds them. No-op when no staging was used.
+cx_collect_results() {
+  local mount_src="$1" repo_root="$2" dst log
+  local -a files
+  [ "$mount_src" = "$repo_root" ] && return 0
+  log="$(cx_private_log_path "artifact-collection-$$-${RANDOM}")"
+  dst="$repo_root/experimental/CollectiveX/results"
+  mkdir -p "$dst" 2>> "$log" \
+    || { cx_log "ERROR: cannot create checkout result directory"; return 1; }
+  shopt -s nullglob
+  files=("$mount_src/experimental/CollectiveX/results/"*.json)
+  shopt -u nullglob
+  [ "${#files[@]}" -gt 0 ] || { cx_log "ERROR: staged run produced no result JSON"; return 1; }
+  cp -- "${files[@]}" "$dst/" >> "$log" 2>&1 \
+    || { cx_log "ERROR: staged result collection failed"; return 1; }
+  cx_log "collected staged results for artifact validation"
+}
+
+cx_cleanup_stage() {
+  local mount_src="$1" repo_root="$2" base="${CX_STAGE_DIR:-}" tag safe_tag expected
+  tag="${COLLECTIVEX_EXECUTION_ID:-${GITHUB_RUN_ID:-manual-$$}}"
+  safe_tag="$(printf '%s' "$tag" | tr -c 'A-Za-z0-9._-' '_')"
+  [ "$mount_src" != "$repo_root" ] || return 0
+  if [ -n "$base" ] && [ "$base" != "$repo_root" ]; then
+    expected="${base%/}/job_$safe_tag"
+  else
+    [ -n "${CX_SQUASH_DIR:-}" ] \
+      || { cx_log "ERROR: cannot identify the generated stage directory"; return 1; }
+    expected="${CX_SQUASH_DIR%/}/.collectivex-stage-$safe_tag"
+  fi
+  if [ "$mount_src" != "$expected" ] || [ "$mount_src" = / ] \
+      || { [ -n "$base" ] && [ "$mount_src" = "$base" ]; }; then
+    cx_log "ERROR: refusing to remove an unrecognized stage directory"
+    return 1
+  fi
+  rm -rf -- "$mount_src" >/dev/null 2>&1 || {
+    cx_log "ERROR: cannot remove generated stage directory"
+    return 1
+  }
+  cx_log "removed generated per-execution stage directory"
+}
+
+# Return success only when a benchmark output is a complete JSON result object.
+# Callers use this before synthesizing a terminal outcome so an emitted invalid result
+# is not shadowed by a second record for the same attempt.
+cx_has_result_doc() {
+  local path="$1"
+  python3 "$_CX_COMMON_ROOT/contracts.py" probe "$path" >/dev/null 2>&1
+}
+
+cx_result_doc_is() {
+  local path="$1" expected="$2"
+  python3 "$_CX_COMMON_ROOT/contracts.py" probe "$path" --status "$expected" \
+    >/dev/null 2>&1
+}
+
+# A rank-zero result can be written before another rank or backend teardown fails. Preserve its
+# measurements, but make the distributed command's nonzero terminal status authoritative.
+cx_demote_result_doc() {
+  local path="$1" rc="$2"
+  python3 "$_CX_COMMON_ROOT/contracts.py" demote "$path" --return-code "$rc"
+}
+
+cx_quarantine_result_doc() {
+  python3 "$_CX_COMMON_ROOT/contracts.py" quarantine-invalid "$1"
+}
+
+# cx_emit_ep_failed_case <out> <backend> <phase> <return-code>
+# Preserve failures from rack launchers that invoke run_ep.py directly and therefore cannot use
+# run_in_container.sh's emitter. Case identity is read from the exported CX_* variables.
+cx_emit_ep_failed_case() {
+  local out="$1" backend="$2" phase="$3" rc="$4"
+  local -a args=(emit-terminal --out "$out" --backend "$backend" --phase "$phase"
+    --return-code "$rc")
+  [ -z "${CX_FAILURE_MODE:-}" ] || args+=(--failure-mode "$CX_FAILURE_MODE")
+  if ! python3 "$_CX_COMMON_ROOT/contracts.py" "${args[@]}"
+  then
+    cx_log "ERROR: could not preserve terminal outcome"
+    return 1
+  fi
+}
+
+cx_case_attempt_exists() {
+  local out_dir="$1" case_id="$2"
+  python3 - "$_CX_COMMON_ROOT" "$out_dir" "$case_id" <<'PY'
+import pathlib, sys
+
+sys.path.insert(0, sys.argv[1])
+import contracts
+
+sample_paths = set()
+referenced_samples = set()
+found = False
+
+def quarantine(path, document):
+    sample = document.get("sample_artifact") if isinstance(document, dict) else None
+    if (
+        isinstance(sample, dict)
+        and isinstance(sample.get("path"), str)
+        and pathlib.Path(sample["path"]).name == sample["path"]
+    ):
+        sample_path = path.with_name(sample["path"])
+        if sample_path.is_file():
+            sample_path.replace(sample_path.with_name(sample_path.name + ".quarantine"))
+    if path.is_file():
+        path.replace(path.with_name(path.name + ".quarantine"))
+
+for path in pathlib.Path(sys.argv[2]).glob("*.json"):
+    document = None
+    try:
+        document = contracts.strict_load(path)
+        if not isinstance(document, dict):
+            continue
+        if document.get("format") == contracts.RAW_FORMAT:
+            document = contracts.load_raw_attempt(path)
+            referenced_samples.add(path.with_name(document["sample_artifact"]["path"]))
+        elif document.get("format") == contracts.TERMINAL_FORMAT:
+            document = contracts.validate_terminal_document(document)
+        elif document.get("format") == contracts.SAMPLES_FORMAT:
+            contracts.validate_samples_document(document)
+            sample_paths.add(path)
+            continue
+        else:
+            continue
+    except (contracts.ContractError, OSError, ValueError):
+        quarantine(path, document)
+        continue
+    if document["identity"]["case_id"] == sys.argv[3]:
+        found = True
+for orphan in sample_paths - referenced_samples:
+    quarantine(orphan, {})
+raise SystemExit(0 if found else 1)
+PY
+}
+
+# Emit one setup-failure record per requested case. Rack launchers call this when
+# backend preparation fails before rank processes can start.
+cx_emit_setup_failures() {
+  local root="$1" out_dir="$2" backend="$3" rc="$4" shard="${CX_SHARD_FILE:-}" path
+  local phase case_id suite workload required routing eplb ep hidden topk experts nodes
+  local gpn domain ladder canonical timing
+  local cases_file expected emitted=0 covered=0
+  mkdir -p "$out_dir" || return 1
+  export CX_FAILURE_MODE="${CX_FAILSAFE_MODE:-setup}" CX_ATTEMPT_ID=1
+  if [ -z "$shard" ]; then
+    local phases="${CX_PHASE:-decode}"
+    [ "$phases" = both ] && phases="decode prefill"
+    for phase in $phases; do
+      if [ -n "${CX_CASE_ID:-}" ] && cx_case_attempt_exists "$out_dir" "$CX_CASE_ID"; then
+        continue
+      fi
+      cx_emit_ep_failed_case "$out_dir/failed_${backend}_${phase}_${CX_TS:-setup}-a01.json" \
+        "$backend" "$phase" "$rc" || return 1
+    done
+    unset CX_FAILURE_MODE
+    return 0
+  fi
+  path="$shard"
+  [ -f "$path" ] || path="${root%/}/$shard"
+  [ -f "$path" ] || {
+    unset CX_FAILURE_MODE
+    cx_log "ERROR: cannot emit setup failures without shard control"
+    return 1
+  }
+  export COLLECTIVEX_CONTROL_SHA256
+  COLLECTIVEX_CONTROL_SHA256="$(sha256sum "$path" | awk '{print $1}')"
+  [[ "$COLLECTIVEX_CONTROL_SHA256" =~ ^[0-9a-f]{64}$ ]] || {
+    unset CX_FAILURE_MODE COLLECTIVEX_CONTROL_SHA256
+    cx_log "ERROR: cannot hash shard for setup-failure records"
+    return 1
+  }
+  cases_file="$(mktemp)" || return 1
+  if ! python3 - "$path" > "$cases_file" <<'PY'
+import json, sys
+
+with open(sys.argv[1]) as handle:
+    cases = json.load(handle)["cases"]
+for case in cases:
+    fields = (
+        case["phase"], case["case_id"], case["suite"], case["workload"],
+        case["required_publication"], case["routing"], "1" if case["eplb"] else "",
+        case["ep"], case["hidden"], case["topk"], case["experts"], case["nodes"],
+        case["gpus_per_node"], case["scale_up_domain"], case["ladder"],
+        "1" if case["canonical"] else "", case["timing"],
+    )
+    print("|".join(map(str, fields)))
+PY
+  then
+    rm -f "$cases_file"
+    unset CX_FAILURE_MODE
+    return 1
+  fi
+  expected="$(wc -l < "$cases_file" | tr -d ' ')"
+  [ "$expected" -gt 0 ] || { rm -f "$cases_file"; unset CX_FAILURE_MODE; return 1; }
+  while IFS='|' read -r phase case_id suite workload required routing eplb ep hidden topk experts \
+      nodes gpn domain ladder canonical timing; do
+    export CX_CASE_ID="$case_id" CX_SUITE="$suite" CX_WORKLOAD_NAME="$workload"
+    export CX_REQUIRED_PUBLICATION="$required" CX_ROUTING="$routing" CX_EPLB="$eplb"
+    export CX_EP="$ep" CX_NGPUS="$ep" CX_HIDDEN="$hidden" CX_TOPK="$topk" CX_EXPERTS="$experts"
+    export CX_NODES="$nodes" CX_GPUS_PER_NODE="$gpn" CX_SCALE_UP_DOMAIN="$domain"
+    export CX_TOKENS_LADDER="$ladder" CX_CANONICAL="$canonical"
+    IFS=: read -r CX_ITERS CX_TRIALS CX_WARMUP <<< "$timing"
+    export CX_ITERS CX_TRIALS CX_WARMUP CX_SAMPLES_PER_POINT="$((CX_ITERS * CX_TRIALS))"
+    if cx_case_attempt_exists "$out_dir" "$case_id"; then
+      covered=$((covered + 1))
+      continue
+    fi
+    cx_emit_ep_failed_case "$out_dir/failed_${case_id}-a01.json" "$backend" "$phase" "$rc" || return 1
+    emitted=$((emitted + 1))
+  done < "$cases_file"
+  rm -f "$cases_file"
+  unset CX_FAILURE_MODE
+  [ "$((emitted + covered))" -eq "$expected" ] || {
+    cx_log "ERROR: covered $((emitted + covered))/$expected terminal cases"
+    return 1
+  }
+}
+
+cx_launcher_cleanup() {
+  local rc="$1" source_root="${MOUNT_SRC:-${REPO_ROOT:-}}" out_dir allocation_stopped=1
+  trap - EXIT
+  if [ -n "${COLLECTIVEX_EPHEMERAL_CONFIG_PATH:-}" ]; then
+    rm -f -- "$COLLECTIVEX_EPHEMERAL_CONFIG_PATH" >/dev/null 2>&1 || true
+    unset COLLECTIVEX_EPHEMERAL_CONFIG_PATH
+  fi
+  if [ -n "${JOB_ID:-}" ]; then
+    if ! cx_cancel_job "$JOB_ID"; then
+      allocation_stopped=0
+      [ "$rc" != 0 ] || rc=1
+    fi
+  elif [ "${CX_ALLOCATION_REQUESTED:-0}" = 1 ]; then
+    allocation_stopped=0
+    [ "$rc" != 0 ] || rc=1
+  fi
+  if [ "$allocation_stopped" = 1 ]; then
+    cx_write_cleanup_guard safe || true
+  else
+    cx_write_cleanup_guard unsafe || true
+  fi
+  [ "$allocation_stopped" = 1 ] || source_root="${REPO_ROOT:-$source_root}"
+  if [ "$rc" != 0 ] && [ -n "${REPO_ROOT:-}" ] && [ -n "${CX_BENCH:-}" ]; then
+    cx_log "ERROR: terminal-failure-class=${CX_FAILSAFE_MODE:-setup}"
+    [ -d "$source_root/experimental/CollectiveX" ] || source_root="$REPO_ROOT"
+    out_dir="$source_root/experimental/CollectiveX/results"
+    cx_emit_setup_failures \
+      "$source_root/experimental/CollectiveX" "$out_dir" "$CX_BENCH" "$rc" || true
+    [ "$source_root" = "$REPO_ROOT" ] \
+      || cx_collect_results "$source_root" "$REPO_ROOT" || true
+  fi
+  if [ "$allocation_stopped" = 1 ] && [ -n "${REPO_ROOT:-}" ] \
+      && [ "$source_root" != "$REPO_ROOT" ]; then
+    if ! cx_cleanup_stage "$source_root" "$REPO_ROOT"; then
+      [ "$rc" != 0 ] || rc=1
+    fi
+  fi
+  [ "${COLLECTIVEX_CANONICAL_GHA:-0}" = 1 ] || cx_cleanup_private_logs "$rc"
+  exit "$rc"
+}
+
+cx_install_launcher_fail_safe() {
+  trap 'cx_launcher_cleanup "$?"' EXIT
+}
diff --git a/experimental/CollectiveX/runtime/run_in_container.sh b/experimental/CollectiveX/runtime/run_in_container.sh
new file mode 100644
index 0000000000..119efa7ffc
--- /dev/null
+++ b/experimental/CollectiveX/runtime/run_in_container.sh
@@ -0,0 +1,1002 @@
+#!/usr/bin/env bash
+# CollectiveX — generic in-container benchmark dispatcher (single-node).
+#
+# Runs INSIDE the container under `srun` for single-node shards. The GB EP8 launcher invokes
+# run_ep.py directly across nodes. The SKU adapter handles allocation/container/transport-env;
+# this script selects one EP backend from CX_BENCH and writes result JSON under results/.
+#
+# Required env (exported by the adapter): CX_RUNNER CX_NGPUS CX_TS CX_TOPO
+# Selector: CX_BENCH = deepep | deepep-v2 | mori | uccl | nccl-ep | deepep-hybrid
+# EP knobs passed to tests/run_ep.py:
+#   CX_PHASE = decode | prefill | both (default decode)   <- picks the token sweep
+#   CX_TOKENS_LADDER (space/comma sep; blank = phase default)
+#   CX_HIDDEN CX_TOPK CX_EXPERTS CX_ROUTING CX_SEED CX_ITERS
+set -euo pipefail
+
+cd /ix/experimental/CollectiveX
+# shellcheck source=../runtime/common.sh
+source runtime/common.sh
+mkdir -p results
+cx_write_runtime_stage backend-setup || cx_die "cannot record runtime stage"
+
+: "${CX_RUNNER:?CX_RUNNER not set}"
+: "${CX_NGPUS:?CX_NGPUS not set}"
+: "${CX_TS:?CX_TS not set}"
+: "${CX_TOPO:?CX_TOPO not set}"
+CX_BENCH="${CX_BENCH:-deepep}"
+CX_TRANSPORT="${CX_TRANSPORT:-}"
+
+cx_apply_timing_profile
+
+cx_log "in-container: runner=$CX_RUNNER ngpus=$CX_NGPUS bench=$CX_BENCH topo=$CX_TOPO"
+
+# Blank ladders use the phase default in tests/run_ep.py.
+cx_ep_ladder() {
+  printf '%s' "${CX_TOKENS_LADDER:-}"
+}
+
+# Canonical workload staging. Every SKU/backend generates identical canonical array bytes and
+# content IDs in-container; the NPZ container bytes themselves are not an identity boundary. When CX_CANONICAL=1
+# (and CX_WORKLOAD_DIR not already provided) we generate routing traces for the run's ladder
+# into a NON-results dir (.cx_workloads/ — so the *.manifest.json never pollute the results glob) and
+# point run_ep at it. Raw attempts remain diagnostic until the publisher validates full coverage.
+cx_stage_canonical() {
+  [ "${CX_CANONICAL:-0}" = "1" ] || return 0
+  [ -n "${CX_WORKLOAD_DIR:-}" ] && return 0
+  local dir="$PWD/.cx_workloads"
+  local ladder; ladder="$(cx_ep_ladder)"
+  # cover both phase ladders when none is given, so either phase finds its files.
+  [ -z "$ladder" ] && ladder="1 2 4 8 16 32 64 128 256 512 1024 2048 4096"
+  cx_log "staging canonical workloads (routing=${CX_ROUTING:-uniform} ep=$CX_NGPUS ladder='$ladder')"
+  python3 tests/make_workloads.py --out-dir "$dir" --routing "${CX_ROUTING:-uniform}" \
+    --ep "$CX_NGPUS" --hidden "${CX_HIDDEN:-7168}" --topk "${CX_TOPK:-8}" \
+    --experts "${CX_EXPERTS:-256}" --seed "${CX_SEED:-67}" --tokens-ladder "$ladder" \
+    || { cx_log "ERROR: canonical workload staging failed"; return 1; }
+  export CX_WORKLOAD_DIR="$dir"
+  cx_log "canonical workloads staged at $dir"
+}
+
+# run_ep_suite <backend>
+# One tests/run_ep.py invocation per phase (decode/prefill/both); dispatch and
+# combine are timed separately inside it. One JSON per (backend, phase).
+# Preserve a failed case with its full scheduled identity instead of letting it vanish.
+emit_failed_case() {  # backend phase rc
+  cx_emit_ep_failed_case \
+    "results/failed_${CX_RUNNER}_${1}_${2}_${CX_TS}.json" "$1" "$2" "$3" || true
+}
+
+run_ep_suite() {
+  local backend="$1" phase phases ladder failure_kind rc=0 rc_run
+  ladder="$(cx_ep_ladder)"
+  phases="${CX_PHASE:-decode}"
+  [ "$phases" = "both" ] && phases="decode prefill"
+  if ! cx_stage_canonical; then
+    for phase in $phases; do
+      emit_failed_case "$backend" "$phase" 2
+    done
+    return 1
+  fi
+  for phase in $phases; do
+    cx_log "ep backend=$backend phase=$phase ngpus=$CX_NGPUS ladder='${ladder:-<phase-default>}'"
+    local out="results/${CX_RUNNER}_${backend}_${phase}_${CX_TS}.json"
+    local -a EPARGS=(--backend "$backend" --phase "$phase" --tokens-ladder "$ladder"
+      --hidden "${CX_HIDDEN:-7168}" --topk "${CX_TOPK:-8}" --experts "${CX_EXPERTS:-256}"
+      --routing "${CX_ROUTING:-uniform}" --seed "${CX_SEED:-67}" --iters "${CX_ITERS:-8}"
+      --trials "${CX_TRIALS:-64}" --warmup "${CX_WARMUP:-32}"
+      --gpus-per-node "${CX_GPUS_PER_NODE:-0}" --scale-up-domain "${CX_SCALE_UP_DOMAIN:-0}"
+      --case-id "${CX_CASE_ID:-}" --suite "${CX_SUITE:-}" --workload-name "${CX_WORKLOAD_NAME:-}"
+      --required-publication "${CX_REQUIRED_PUBLICATION:-}"
+      --runner "$CX_RUNNER" --topology-class "$CX_TOPO" --transport "$CX_TRANSPORT"
+      --out "$out")
+    [ -n "${CX_EPLB:-}" ] && EPARGS+=(--eplb)
+    [ -n "${CX_WORKLOAD_DIR:-}" ] && EPARGS+=(--workload-dir "$CX_WORKLOAD_DIR")
+    cx_write_runtime_stage execution || cx_die "cannot record runtime stage"
+    if timeout -k 30 "${CX_RUN_TIMEOUT:-900}" \
+      torchrun --nproc_per_node="$CX_NGPUS" tests/run_ep.py "${EPARGS[@]}"; then
+      rc_run=0
+    else
+      rc_run=$?
+    fi
+    if [ "$rc_run" = 0 ] && cx_result_doc_is "$out" invalid; then
+      cx_log "WARN: $backend $phase completed with invalid semantic evidence"
+      rc=1
+      continue
+    fi
+    if [ "$rc_run" = 0 ] && ! cx_result_doc_is "$out" success; then
+      rc_run=1
+    fi
+    if [ "$rc_run" != 0 ]; then
+      failure_kind=failed
+      [ "$rc_run" != 124 ] && [ "$rc_run" != 137 ] || failure_kind="timed out"
+      if [ "$failure_kind" = "timed out" ]; then
+        cx_log "WARN: $backend $phase run timed out rc=$rc_run (limit=${CX_RUN_TIMEOUT:-900}s)"
+      else
+        cx_log "WARN: $backend $phase run failed rc=$rc_run"
+      fi
+      if cx_has_result_doc "$out"; then
+        cx_demote_result_doc "$out" "$rc_run" \
+          || { cx_quarantine_result_doc "$out"; emit_failed_case "$backend" "$phase" "$rc_run"; }
+        cx_log "preserved benchmark output as a failed attempt"
+      else
+        cx_quarantine_result_doc "$out"
+        emit_failed_case "$backend" "$phase" "$rc_run"
+      fi
+      rc=1
+    fi
+  done
+  return "$rc"
+}
+
+# Resolve and verify the actual CUDA target before compiling source kernels.
+cx_cuda_arch() {
+  local expected detected
+  case "$CX_RUNNER" in
+    h100*|h200*) expected="9.0" ;;
+    b200*|gb200*) expected="10.0" ;;
+    b300*|gb300*) expected="10.3" ;;
+    *) cx_log "ERROR: no CUDA target registered for $CX_RUNNER"; return 1 ;;
+  esac
+  detected="$(python3 - <<'PY'
+import torch
+
+major, minor = torch.cuda.get_device_capability()
+print(f"{major}.{minor}")
+PY
+)" || return 1
+  [ "$detected" = "$expected" ] || {
+    cx_log "ERROR: $CX_RUNNER expected CUDA target $expected, detected $detected"
+    return 1
+  }
+  printf '%s' "$detected"
+}
+
+cx_nvidia_package_root() {
+  local package="$1" component="$2"
+  python3 - "$package" "$component" <<'PY'
+from importlib import metadata
+from pathlib import Path, PurePosixPath
+import sys
+
+package, component = sys.argv[1:]
+try:
+    distribution = metadata.distribution(package)
+    prefix = f"nvidia/{component}/"
+    entries = [str(entry).replace("\\", "/") for entry in distribution.files or ()]
+    if not any(entry.startswith(prefix) for entry in entries):
+        raise ValueError
+    root = Path(distribution.locate_file(PurePosixPath("nvidia") / component)).resolve()
+    if not root.is_dir():
+        raise ValueError
+except (metadata.PackageNotFoundError, OSError, TypeError, ValueError):
+    raise SystemExit(1)
+print(root, end="")
+PY
+}
+
+cx_prepare_cuda_cccl() {
+  local cccl="" candidate cuda_home nvcc
+  nvcc="$(command -v nvcc)" \
+    || { cx_log "ERROR: CUDA nvcc is unavailable"; return 1; }
+  nvcc="$(readlink -f -- "$nvcc")" \
+    || { cx_log "ERROR: CUDA nvcc cannot be resolved"; return 1; }
+  case "$nvcc" in
+    */bin/nvcc) cuda_home="${nvcc%/bin/nvcc}" ;;
+    *) cx_log "ERROR: CUDA nvcc has an unexpected path"; return 1 ;;
+  esac
+  [ -x "$cuda_home/bin/nvcc" ] && [ -d "$cuda_home/include" ] \
+    && [ -d "$cuda_home/lib64" ] \
+    || { cx_log "ERROR: CUDA toolkit root is incomplete"; return 1; }
+  for candidate in "$cuda_home"/targets/*/include/cccl; do
+    if [ -d "$candidate" ]; then
+      cccl="$candidate"
+      break
+    fi
+  done
+  [ -n "$cccl" ] || { cx_log "ERROR: CUDA CCCL headers are unavailable"; return 1; }
+  export CUDA_HOME="$cuda_home" CX_CUDA_CCCL="$cccl"
+  export CPATH="$cccl:${CPATH:-}"
+  export NVCC_PREPEND_FLAGS="-I$cccl ${NVCC_PREPEND_FLAGS:-}"
+}
+
+cx_prepare_deepep_toolchain() {
+  local packaged overlay path root temporary
+  packaged="$(cx_nvidia_package_root nvidia-nvshmem-cu12 nvshmem)" \
+    || { cx_log "ERROR: nvidia.nvshmem is unavailable"; return 1; }
+  root="$(cx_deepep_v2_root)" || return 1
+  overlay="$root/nvshmem-overlay"
+  if ! (
+    umask 077
+    exec 8>"$root/nvshmem-overlay.lock" || exit 1
+    flock 8 || exit 1
+    if [ ! -d "$overlay" ]; then
+      temporary="$root/.nvshmem-overlay.$$"
+      rm -rf "$temporary" || exit 1
+      mkdir -p "$temporary/lib" || exit 1
+      ln -s "$packaged/include" "$temporary/include" || exit 1
+      for path in "$packaged"/lib/*; do
+        ln -s "$path" "$temporary/lib/${path##*/}" || exit 1
+      done
+      [ ! -e "$packaged/lib/libnvshmem_host.so.3" ] \
+        || ln -sf "$packaged/lib/libnvshmem_host.so.3" \
+          "$temporary/lib/libnvshmem_host.so" || exit 1
+      mv "$temporary" "$overlay" || exit 1
+    fi
+    [ ! -L "$overlay" ] \
+      && [ "$(readlink -f "$overlay/include")" = "$(readlink -f "$packaged/include")" ] \
+      && [ -e "$overlay/lib/libnvshmem_host.so" ] \
+      && [ -e "$overlay/lib/libnvshmem_device.a" ]
+  ); then
+    cx_log "ERROR: DeepEP V2 NVSHMEM overlay is invalid"
+    return 1
+  fi
+  NVSHMEM_DIR="$overlay"
+  export NVSHMEM_DIR
+  cx_prepare_cuda_cccl || return 1
+  export LD_LIBRARY_PATH="$NVSHMEM_DIR/lib:${LD_LIBRARY_PATH:-}"
+}
+
+cx_probe_deepep() {
+  local expected_record_sha256 expected_version expected_wheel_sha256
+  if [ "${COLLECTIVEX_IMAGE:-}" != "$CX_IMAGE_MULTIARCH" ] \
+      || [ "${COLLECTIVEX_IMAGE_DIGEST:-}" != "$CX_IMAGE_MULTIARCH_DIGEST" ] \
+      || [ "${COLLECTIVEX_IMAGE_DIGEST_VERIFIED:-0}" != 1 ]; then
+    cx_log "ERROR: DeepEP V1 requires the exact pinned multi-architecture image"
+    return 1
+  fi
+  cx_cuda_arch >/dev/null || return 1
+  case "$CX_RUNNER" in
+    gb200|gb300)
+      expected_version="1.1.0+814e508"
+      expected_wheel_sha256="784dabec0877b6cf72619b7e93eda7e2f365648487bd37fc3ff6960e53669313"
+      expected_record_sha256="2671cff7baf8c2c214ff4bac721af875d513130670bec57601998bd1aae82882"
+      DEEPEP_COMMIT="814e508537c6ffc775d59f6f1b9ba43f3a65968c"
+      ;;
+    *)
+      expected_version="1.2.1"
+      expected_wheel_sha256="7c02c29306ea0fe2dd474618e72e0f310f260187a9c0700a656d2f6964e8c307"
+      expected_record_sha256="6548e9c504a12b2471af4b7f4d9546321210a57a456b5dc55bd4a8dad0f932ac"
+      DEEPEP_COMMIT="9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee"
+      ;;
+  esac
+  export DEEPEP_COMMIT
+  python3 - "$expected_version" "$expected_wheel_sha256" "$expected_record_sha256" <<'PY' || {
+import base64
+import csv
+import hashlib
+import importlib.metadata as metadata
+import io
+import json
+from pathlib import Path
+import sys
+
+import deep_ep
+from deep_ep import Buffer
+
+distribution = metadata.distribution("deep_ep")
+assert distribution.version == sys.argv[1]
+assert Buffer.__name__ == "Buffer"
+recorded_files = {
+    Path(distribution.locate_file(entry)).resolve() for entry in distribution.files or ()
+}
+buffer_module = sys.modules.get(Buffer.__module__)
+assert Path(deep_ep.__file__).resolve() in recorded_files
+assert buffer_module is not None and Path(buffer_module.__file__).resolve() in recorded_files
+direct_url = json.loads(distribution.read_text("direct_url.json"))
+assert direct_url["archive_info"]["hashes"]["sha256"] == sys.argv[2]
+record_entry = next(
+    entry for entry in distribution.files or ()
+    if str(entry).endswith(".dist-info/RECORD")
+)
+record = distribution.locate_file(record_entry).read_bytes()
+assert hashlib.sha256(record).hexdigest() == sys.argv[3]
+for path, encoded_digest, size in csv.reader(io.StringIO(record.decode())):
+    if not encoded_digest:
+        continue
+    algorithm, expected = encoded_digest.split("=", 1)
+    assert algorithm == "sha256"
+    payload = distribution.locate_file(path).read_bytes()
+    observed = base64.urlsafe_b64encode(hashlib.sha256(payload).digest()).decode().rstrip("=")
+    assert observed == expected
+    assert not size or len(payload) == int(size)
+PY
+    cx_log "ERROR: container DeepEP build does not match its pinned image contract"
+    return 1
+  }
+  cx_log "DeepEP image build ready ($DEEPEP_COMMIT)"
+}
+
+# DeepEP V2 is PR #605's ElasticBuffer implementation with upstream PR #630's pure scale-up
+# initialization fix. Canonical launchers stage the pinned source and mount a private cluster-local
+# build cache at /cx-cache.
+cx_deepep_v2_root() {
+  local arch cpu base identity key image_digest
+  arch="$(cx_cuda_arch)" || return 1
+  cpu="$(uname -m)"
+  [[ "$cpu" =~ ^[A-Za-z0-9._-]+$ ]] || return 1
+  base="${CX_BACKEND_CACHE_ROOT:-}"
+  [[ "$base" = /* ]] || return 1
+  image_digest="${COLLECTIVEX_IMAGE_DIGEST:-manual-unverified}"
+  [[ "$image_digest" = manual-unverified || "$image_digest" =~ ^sha256:[0-9a-f]{64}$ ]] \
+    || return 1
+  # Bump the recipe generation whenever the build procedure changes. Benchmark-only
+  # source revisions must reuse the same immutable environment instead of leaking GBs.
+  identity="deepep-v2-cache-v2|$cpu|sm${arch/./}|image=$image_digest|recipe=aot-persistent-nvshmem-active-cuda-maxjobs16-v2|$CX_DEEPEP_V2_COMMIT|$CX_DEEPEP_V2_TREE|$CX_DEEPEP_V2_FMT_COMMIT|pip=26.1.2|setuptools=82.0.1|wheel=0.47.0|ninja=1.13.0|numpy=2.2.6|torch=2.10.0+cu130|nccl=2.30.4|nvshmem=3.3.9|max-jobs=16"
+  key="$(printf '%s' "$identity" | sha256sum | awk '{print $1}')"
+  [[ "$key" =~ ^[0-9a-f]{64}$ ]] || return 1
+  printf '%s/deepep-v2-%s' "$base" "$key"
+}
+
+cx_activate_deepep_v2() {
+  local root venv stage_root
+  root="$(cx_deepep_v2_root)" || return 1
+  venv="$root/venv"
+  [ -x "$venv/bin/python" ] \
+    || { cx_log "ERROR: DeepEP V2 venv interpreter is unavailable"; return 1; }
+  export VIRTUAL_ENV="$venv"
+  export PATH="$venv/bin:${PATH#"$venv/bin:"}"
+  EP_NCCL_ROOT_DIR="$(cx_nvidia_package_root nvidia-nccl-cu13 nccl)" \
+    || { cx_log "ERROR: DeepEP V2 NCCL package root is unavailable"; return 1; }
+  EP_NVSHMEM_ROOT_DIR="$(cx_nvidia_package_root nvidia-nvshmem-cu12 nvshmem)" \
+    || { cx_log "ERROR: DeepEP V2 NVSHMEM package root is unavailable"; return 1; }
+  export EP_NCCL_ROOT_DIR EP_NVSHMEM_ROOT_DIR
+  export LD_LIBRARY_PATH="$EP_NCCL_ROOT_DIR/lib:$EP_NVSHMEM_ROOT_DIR/lib:${LD_LIBRARY_PATH:-}"
+  case "${CX_BACKEND_SOURCE_ROOT:-}" in
+    /*/.cx_sources) stage_root="${CX_BACKEND_SOURCE_ROOT%/.cx_sources}" ;;
+    *) cx_log "ERROR: DeepEP V2 job-local source root is unavailable"; return 1 ;;
+  esac
+  [ -d "$stage_root" ] && [ ! -L "$stage_root" ] \
+    || { cx_log "ERROR: DeepEP V2 job-local stage is invalid"; return 1; }
+  # JIT CUBINs are evidence from this shard, not part of the persistent AOT environment.
+  # Keeping them on the isolated staged tree prevents a prior driver/topology attempt
+  # from seeding a later run; all ranks and cases in this shard still share one cold build.
+  export EP_JIT_CACHE_DIR="$stage_root/.cx_backend/deepep-v2-jit"
+  export EP_REUSE_NCCL_COMM=1
+  export DEEPEP_V2_PR=605 DEEPEP_V2_FIX_PR=630
+  DEEPEP_V2_COMMIT="$CX_DEEPEP_V2_COMMIT"
+  DEEPEP_V2_TREE="$CX_DEEPEP_V2_TREE"
+  DEEPEP_V2_FMT_COMMIT="$CX_DEEPEP_V2_FMT_COMMIT"
+  export DEEPEP_V2_COMMIT DEEPEP_V2_TREE DEEPEP_V2_FMT_COMMIT
+  [ ! -L "$stage_root/.cx_backend" ] && [ ! -L "$EP_JIT_CACHE_DIR" ] \
+    || { cx_log "ERROR: DeepEP V2 JIT cache path is unsafe"; return 1; }
+  if ! mkdir -p "$EP_JIT_CACHE_DIR" \
+      || ! chmod 700 "$stage_root/.cx_backend" "$EP_JIT_CACHE_DIR"; then
+    cx_log "ERROR: DeepEP V2 JIT cache is unavailable"
+    return 1
+  fi
+  unset EP_SUPPRESS_NCCL_CHECK
+}
+
+cx_enable_deepep_v2_jit_reproducibility() {
+  local seed="collectivex-deepep-v2-fa8a9b1" cccl
+  [ -n "${CUDA_HOME:-}" ] \
+    || { cx_log "ERROR: active CUDA toolkit is unavailable"; return 1; }
+  cccl="${CX_CUDA_CCCL:-}"
+  case "$cccl" in
+    "$CUDA_HOME"/targets/*/include/cccl) ;;
+    *) cx_log "ERROR: CUDA CCCL headers differ from the active toolkit"; return 1 ;;
+  esac
+  [ -d "$cccl" ] || { cx_log "ERROR: CUDA CCCL headers are unavailable"; return 1; }
+  CPATH="$cccl"
+  NVCC_PREPEND_FLAGS="--frandom-seed=$seed -I$cccl"
+  DEEPEP_V2_JIT_RANDOM_SEED="$seed"
+  EP_JIT_DUMP_SASS=1
+  unset EP_JIT_DEBUG EP_JIT_DUMP_ASM EP_JIT_DUMP_PTX EP_JIT_WITH_LINEINFO
+  unset EP_JIT_PTXAS_VERBOSE EP_JIT_PRINT_COMPILER_COMMAND EP_JIT_NVCC_COMPILER
+  unset EP_JIT_CPP_STANDARD EP_JIT_PTXAS_CHECK EP_GIN_GDAKI_DEBUG EP_NUM_TOPK_IDX_BITS
+  export CPATH DEEPEP_V2_JIT_RANDOM_SEED EP_JIT_DUMP_SASS NVCC_PREPEND_FLAGS
+}
+
+cx_probe_deepep_v2() {
+  python3 - <<'PY'
+import ctypes
+import importlib.metadata as metadata
+import inspect
+import os
+
+import torch
+
+assert torch.__version__ == "2.10.0+cu130", torch.__version__
+assert metadata.version("nvidia-nccl-cu13") == "2.30.4"
+assert metadata.version("nvidia-nvshmem-cu12") == "3.3.9"
+assert metadata.version("numpy") == "2.2.6"
+
+import deep_ep
+assert deep_ep.__version__ == "2.0.0", deep_ep.__version__
+assert metadata.version("deep_ep") == "2.0.0+fa8a9b1"
+assert inspect.isclass(deep_ep.ElasticBuffer)
+assert deep_ep.ElasticBuffer.__name__ == "ElasticBuffer"
+assert os.environ.get("EP_SUPPRESS_NCCL_CHECK") is None
+with open("/proc/self/maps", encoding="utf-8") as handle:
+    loaded_nccl = {
+        os.path.realpath(line.rstrip().split()[-1])
+        for line in handle
+        if "libnccl.so" in line and os.path.isfile(line.rstrip().split()[-1])
+    }
+assert len(loaded_nccl) == 1
+runtime_version = ctypes.c_int()
+assert ctypes.CDLL(loaded_nccl.pop()).ncclGetVersion(ctypes.byref(runtime_version)) == 0
+assert runtime_version.value == 23004, runtime_version.value
+PY
+}
+
+cx_deepep_v2_content_sha256() {
+  python3 - <<'PY'
+import hashlib
+from importlib import metadata
+import os
+from pathlib import Path, PurePosixPath
+import stat
+
+distribution = metadata.distribution("deep_ep")
+entries = sorted(distribution.files or (), key=lambda entry: entry.as_posix())
+if not entries:
+    raise SystemExit(1)
+venv_path = Path(os.environ["VIRTUAL_ENV"]).absolute()
+if venv_path.is_symlink() or not venv_path.is_dir():
+    raise SystemExit(1)
+venv = venv_path.resolve(strict=True)
+digest = hashlib.sha256()
+extension = False
+for entry in entries:
+    relative = PurePosixPath(entry.as_posix())
+    if (
+        relative.is_absolute()
+        or ".." in relative.parts
+        or not relative.parts
+        or not (
+            relative.parts[0] == "deep_ep"
+            or relative.parts[0].startswith("deep_ep-")
+            and relative.parts[0].endswith(".dist-info")
+        )
+    ):
+        raise SystemExit(1)
+    path = Path(distribution.locate_file(entry)).absolute()
+    resolved = path.resolve(strict=True)
+    try:
+        path.relative_to(venv_path)
+        resolved.relative_to(venv)
+    except ValueError:
+        raise SystemExit(1)
+    parent = path.parent
+    while parent != venv_path:
+        if parent.is_symlink():
+            raise SystemExit(1)
+        parent = parent.parent
+    item = os.lstat(path)
+    if not stat.S_ISREG(item.st_mode):
+        raise SystemExit(1)
+    descriptor = os.open(path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+    try:
+        opened = os.fstat(descriptor)
+        if (opened.st_dev, opened.st_ino) != (item.st_dev, item.st_ino):
+            raise SystemExit(1)
+        file_digest = hashlib.sha256()
+        while chunk := os.read(descriptor, 1024 * 1024):
+            file_digest.update(chunk)
+    finally:
+        os.close(descriptor)
+    name = relative.as_posix()
+    extension |= name.startswith("deep_ep/") and name.endswith(".so")
+    digest.update(name.encode())
+    digest.update(b"\0")
+    digest.update(str(item.st_size).encode())
+    digest.update(b"\0")
+    digest.update(file_digest.digest())
+if not extension:
+    raise SystemExit(1)
+print(digest.hexdigest(), end="")
+PY
+}
+
+cx_deepep_v2_marker_content_sha256() {
+  local root="$1" marker="$2" revision="$3" tree="$4" fmt_revision="$5" cache_key="$6"
+  python3 - "$root" "$marker" "$revision" "$tree" "$fmt_revision" "$cache_key" <<'PY'
+import os
+import re
+import stat
+import sys
+
+root, marker, revision, tree, fmt_revision, cache_key = sys.argv[1:]
+try:
+    root_item = os.lstat(root)
+    marker_item = os.lstat(marker)
+    children = [os.lstat(os.path.join(root, name)) for name in ("source", "venv")]
+    if (
+        not stat.S_ISDIR(root_item.st_mode)
+        or stat.S_IMODE(root_item.st_mode) & 0o777 != 0o700
+        or not stat.S_ISREG(marker_item.st_mode)
+        or marker_item.st_uid != root_item.st_uid
+        or stat.S_IMODE(marker_item.st_mode) & 0o777 != 0o600
+        or marker_item.st_size > 1024
+        or any(
+            not stat.S_ISDIR(child.st_mode)
+            or child.st_uid != root_item.st_uid
+            or stat.S_IMODE(child.st_mode) & 0o022
+            for child in children
+        )
+    ):
+        raise OSError
+    descriptor = os.open(marker, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+    try:
+        opened = os.fstat(descriptor)
+        if (opened.st_dev, opened.st_ino) != (marker_item.st_dev, marker_item.st_ino):
+            raise OSError
+        payload = os.read(descriptor, 1025)
+    finally:
+        os.close(descriptor)
+    lines = payload.decode("ascii").splitlines()
+    if lines[:4] != [revision, tree, fmt_revision, cache_key] or len(lines) != 5:
+        raise ValueError
+    if not re.fullmatch(r"[0-9a-f]{64}", lines[4]):
+        raise ValueError
+except (OSError, UnicodeError, ValueError):
+    raise SystemExit(1)
+print(lines[4], end="")
+PY
+}
+
+cx_deepep_v2_cache_is_valid() {
+  local root="$1" marker="$2" revision="$3" tree="$4" fmt_revision="$5" cache_key="$6"
+  local expected_content actual_content
+  expected_content="$(
+    cx_deepep_v2_marker_content_sha256 \
+      "$root" "$marker" "$revision" "$tree" "$fmt_revision" "$cache_key"
+  )" || return 1
+  [ -d "$root/source" ] && [ ! -L "$root/source" ] \
+    && [ "$(cx_git_in_tree "$root/source" rev-parse 'HEAD^{tree}' 2>/dev/null)" = "$tree" ] \
+    && [ "$(cx_git_in_tree "$root/source/third-party/fmt" rev-parse HEAD 2>/dev/null)" = "$fmt_revision" ] \
+    || return 1
+  cx_activate_deepep_v2 || return 1
+  actual_content="$(cx_deepep_v2_content_sha256)" || return 1
+  [ "$actual_content" = "$expected_content" ]
+}
+
+cx_build_deepep_v2() {
+  local root venv source marker marker_tmp lock_path arch cache_key cache_ready content_sha256
+  local revision="fa8a9b16898204afd347c663b89e65ef87dc6ce6"
+  local tree="29809e75c5874e6609dac4804e7b651d5226959f"
+  local fmt_revision="a4c7e17133ee9cb6a2f45545f6e974dd3c393efa"
+  cx_verify_backend_cache_mount \
+    || { cx_log "ERROR: DeepEP V2 cache mount identity validation failed"; return 1; }
+  arch="$(cx_cuda_arch)" || return 1
+  root="$(cx_deepep_v2_root)" || return 1
+  cache_key="${root##*/deepep-v2-}"
+  [[ "$cache_key" =~ ^[0-9a-f]{64}$ ]] || return 1
+  venv="$root/venv"; source="$root/source"; marker="$root/.collectivex-complete"
+  lock_path="${root}.lock"
+  command -v flock >/dev/null || { cx_log "ERROR: flock is required for DeepEP V2"; return 1; }
+  mkdir -p "${root%/*}" || return 1
+  cx_log "DeepEP V2: preparing PR #605 implementation with upstream PR #630 fix ($revision)"
+  if ! (
+    [ ! -L "$lock_path" ] \
+      || { cx_log "ERROR: DeepEP V2 cache lock is unsafe"; exit 1; }
+    (umask 077; : >> "$lock_path") && chmod 600 "$lock_path" \
+      || { cx_log "ERROR: DeepEP V2 cache-lock-create failed"; exit 1; }
+    exec 9<>"$lock_path" \
+      || { cx_log "ERROR: DeepEP V2 cache-lock-open failed"; exit 1; }
+    flock 9 \
+      || { cx_log "ERROR: DeepEP V2 cache-lock-acquire failed"; exit 1; }
+    cache_ready=0
+    if [ -e "$marker" ] || [ -L "$marker" ]; then
+      if (
+        cx_deepep_v2_cache_is_valid \
+          "$root" "$marker" "$revision" "$tree" "$fmt_revision" "$cache_key"
+      ); then
+        cache_ready=1
+      else
+        cx_log "ERROR: published DeepEP V2 cache failed integrity validation; refusing reset"
+        exit 1
+      fi
+    fi
+    if [ "$cache_ready" != 1 ]; then
+      if [ -e "$root" ] || [ -L "$root" ]; then
+        rm -rf "$root" \
+          || { cx_log "ERROR: incomplete DeepEP V2 cache-reset failed"; exit 1; }
+      fi
+      mkdir -m 700 "$root" \
+        || { cx_log "ERROR: DeepEP V2 cache-create failed"; exit 1; }
+      python3 -m venv "$venv" \
+        || { cx_log "ERROR: DeepEP V2 venv creation failed"; exit 1; }
+      "$venv/bin/python" -m pip install -q --disable-pip-version-check --no-input \
+        "pip==26.1.2" "setuptools==82.0.1" "wheel==0.47.0" "ninja==1.13.0" \
+        "numpy==2.2.6" "nvidia-nvshmem-cu12==3.3.9" >&2 2>&1 \
+        || { cx_log "ERROR: DeepEP V2 build-tool installation failed"; exit 1; }
+      "$venv/bin/python" -m pip install -q --disable-pip-version-check --no-input \
+        --index-url https://download.pytorch.org/whl/cu130 \
+        --extra-index-url https://pypi.org/simple "torch==2.10.0" >&2 2>&1 \
+        || { cx_log "ERROR: torch 2.10.0+cu130 installation failed"; exit 1; }
+      # Torch pins NCCL 2.28.9; the PR #605 ElasticBuffer implementation requires 2.30.4.
+      "$venv/bin/python" -m pip install -q --disable-pip-version-check --no-input \
+        --force-reinstall --no-deps "nvidia-nccl-cu13==2.30.4" >&2 2>&1 \
+        || { cx_log "ERROR: NCCL 2.30.4 installation failed"; exit 1; }
+      cx_activate_deepep_v2 \
+        || { cx_log "ERROR: DeepEP V2 environment activation failed"; exit 1; }
+      cx_prepare_deepep_toolchain \
+        || { cx_log "ERROR: DeepEP V2 toolchain preparation failed"; exit 1; }
+      EP_NVSHMEM_ROOT_DIR="$NVSHMEM_DIR"
+      export EP_NVSHMEM_ROOT_DIR
+      cx_materialize_backend_source deepep-v2 "$source" \
+        || { cx_log "ERROR: DeepEP V2 staged source is invalid"; exit 1; }
+      (cd "$source" && SOURCE_DATE_EPOCH="$(cx_git_in_tree "$source" show -s --format=%ct HEAD)" \
+        TORCH_CUDA_ARCH_LIST="$arch" MAX_JOBS=16 \
+        python3 -m pip install -q --no-build-isolation --no-deps --force-reinstall .) >&2 2>&1 \
+        || { cx_log "ERROR: DeepEP V2 build failed"; exit 1; }
+      cx_probe_deepep_v2 \
+        || { cx_log "ERROR: DeepEP V2 ElasticBuffer/runtime probe failed"; exit 1; }
+      content_sha256="$(cx_deepep_v2_content_sha256)" \
+        || { cx_log "ERROR: DeepEP V2 installed-content hashing failed"; exit 1; }
+      marker_tmp="$(mktemp "$root/.collectivex-complete.tmp.XXXXXX")" \
+        || { cx_log "ERROR: DeepEP V2 cache-marker-create failed"; exit 1; }
+      chmod 600 "$marker_tmp" \
+        || { cx_log "ERROR: DeepEP V2 cache-marker-permission failed"; exit 1; }
+      printf '%s\n%s\n%s\n%s\n%s\n' \
+        "$revision" "$tree" "$fmt_revision" "$cache_key" "$content_sha256" > "$marker_tmp" \
+        || { cx_log "ERROR: DeepEP V2 cache-marker-write failed"; exit 1; }
+      mv -f -- "$marker_tmp" "$marker" \
+        || { cx_log "ERROR: DeepEP V2 cache-marker-publish failed"; exit 1; }
+    fi
+    cx_deepep_v2_cache_is_valid \
+      "$root" "$marker" "$revision" "$tree" "$fmt_revision" "$cache_key" \
+      || { cx_log "ERROR: DeepEP V2 cache validation failed"; exit 1; }
+  ); then
+    cx_log "ERROR: shared DeepEP V2 environment is incomplete"
+    return 1
+  fi
+  cx_activate_deepep_v2 || return 1
+  cx_prepare_deepep_toolchain || return 1
+  cx_enable_deepep_v2_jit_reproducibility || return 1
+  EP_NVSHMEM_ROOT_DIR="$NVSHMEM_DIR"
+  export EP_NVSHMEM_ROOT_DIR
+  cx_probe_deepep_v2 || { cx_log "ERROR: DeepEP V2 shared runtime probe failed"; return 1; }
+  cx_log "DeepEP V2 ready ($DEEPEP_V2_COMMIT, ElasticBuffer, NCCL Device API; LSA/Gin selected by adapter)"
+}
+
+# Build the pinned DeepEP `hybrid-ep` implementation for one NVLink/MNNVL domain. CUDA 13 moved
+# libcudacxx headers under cccl, but this intradomain path does not use the separate NVSHMEM
+# toolchain required by DeepEP V2.
+cx_deepep_hybrid_marker_content_sha256() {
+  python3 - "$1" "$2" "$3" "$4" <<'PY'
+import os
+import re
+import stat
+import sys
+
+root, marker, revision, tree = sys.argv[1:]
+try:
+    root_item = os.lstat(root)
+    marker_item = os.lstat(marker)
+    if (
+        not stat.S_ISDIR(root_item.st_mode)
+        or stat.S_IMODE(root_item.st_mode) & 0o777 != 0o700
+        or not stat.S_ISREG(marker_item.st_mode)
+        or marker_item.st_uid != root_item.st_uid
+        or stat.S_IMODE(marker_item.st_mode) & 0o777 != 0o600
+        or marker_item.st_size > 512
+    ):
+        raise OSError
+    descriptor = os.open(marker, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+    try:
+        opened = os.fstat(descriptor)
+        if (opened.st_dev, opened.st_ino) != (marker_item.st_dev, marker_item.st_ino):
+            raise OSError
+        payload = os.read(descriptor, 513)
+    finally:
+        os.close(descriptor)
+    lines = payload.decode("ascii").splitlines()
+    if len(lines) != 3 or lines[:2] != [revision, tree]:
+        raise ValueError
+    if not re.fullmatch(r"[0-9a-f]{64}", lines[2]):
+        raise ValueError
+except (OSError, UnicodeError, ValueError):
+    raise SystemExit(1)
+print(lines[2], end="")
+PY
+}
+
+cx_deepep_hybrid_cache_is_valid() {
+  local root="$1" marker="$2" revision="$3" tree="$4" expected actual status extra
+  expected="$(cx_deepep_hybrid_marker_content_sha256 \
+    "$root" "$marker" "$revision" "$tree")" || return 1
+  [ "$(cx_git_in_tree "$root" rev-parse HEAD 2>/dev/null)" = "$revision" ] \
+    && [ "$(cx_git_in_tree "$root" rev-parse 'HEAD^{tree}' 2>/dev/null)" = "$tree" ] \
+    || return 1
+  status="$(cx_git_in_tree "$root" status --porcelain --untracked-files=no \
+    --ignore-submodules=none 2>/dev/null)" || return 1
+  [ -z "$status" ] || return 1
+  extra="$(cx_git_in_tree "$root" ls-files --others --exclude-standard -- \
+    'deep_ep/*.py' 'deep_ep/*.so' 2>/dev/null)" || return 1
+  [ -z "$extra" ] || return 1
+  extra="$(cx_git_in_tree "$root" ls-files --others --ignored --exclude-standard -- \
+    'deep_ep/*.py' 'deep_ep/*.so' 2>/dev/null)" || return 1
+  [ -z "$extra" ] || return 1
+  actual="$(cx_extension_pair_sha256 "$root" 'deep_ep_cpp*.so' 'hybrid_ep_cpp*.so')" \
+    || return 1
+  [ "$actual" = "$expected" ]
+}
+
+cx_build_deepep_hybrid() {
+  local arch revision="$CX_DEEPEP_HYBRID_COMMIT" tree="$CX_DEEPEP_HYBRID_TREE"
+  local build_root marker marker_tmp lock_path content_sha256 cache_ready
+  export DEEPEP_COMMIT="$revision" DEEPEP_TREE="$tree"
+  arch="$(cx_cuda_arch)" || return 1
+  build_root="$PWD/.cx_backend/deepep-hybrid-${arch/./}"
+  marker="$build_root/.collectivex-complete"
+  lock_path="${build_root}.lock"
+  cx_log "DeepEP hybrid-ep: building $revision for CUDA target $arch"
+  unset NVSHMEM_DIR HYBRID_EP_MULTINODE USE_NIXL
+  cx_prepare_cuda_cccl || return 1
+  command -v flock >/dev/null || { cx_log "ERROR: flock is required for hybrid-ep"; return 1; }
+  mkdir -p "$PWD/.cx_backend" || return 1
+  if ! (
+    [ ! -L "$lock_path" ] || exit 1
+    (umask 077; : >> "$lock_path") && chmod 600 "$lock_path" || exit 1
+    exec 9<>"$lock_path" || exit 1
+    flock 9 || exit 1
+    cache_ready=0
+    if [ -e "$marker" ] || [ -L "$marker" ]; then
+      cx_deepep_hybrid_cache_is_valid "$build_root" "$marker" "$revision" "$tree" \
+        || exit 1
+      cache_ready=1
+    fi
+    if [ "$cache_ready" != 1 ]; then
+      cx_materialize_backend_source deepep-hybrid "$build_root" \
+        || { cx_log "ERROR: hybrid-ep staged source is invalid"; exit 1; }
+      (cd "$build_root" && \
+        SOURCE_DATE_EPOCH="$(cx_git_in_tree "$build_root" show -s --format=%ct HEAD)" \
+        TORCH_CUDA_ARCH_LIST="$arch" MAX_JOBS=16 \
+        python3 setup.py build_ext --inplace) >&2 2>&1 \
+        || { cx_log "ERROR: hybrid-ep build failed"; exit 1; }
+      content_sha256="$(cx_extension_pair_sha256 \
+        "$build_root" 'deep_ep_cpp*.so' 'hybrid_ep_cpp*.so')" || exit 1
+      marker_tmp="$(mktemp "$build_root/.collectivex-complete.tmp.XXXXXX")" || exit 1
+      chmod 600 "$marker_tmp" || exit 1
+      printf '%s\n%s\n%s\n' "$revision" "$tree" "$content_sha256" > "$marker_tmp" \
+        || exit 1
+      mv -f -- "$marker_tmp" "$marker" || exit 1
+    fi
+    cx_deepep_hybrid_cache_is_valid "$build_root" "$marker" "$revision" "$tree"
+  ); then
+    cx_log "ERROR: shared hybrid-ep build is incomplete"
+    return 1
+  fi
+  export PYTHONPATH="$build_root:${PYTHONPATH:-}"
+  python3 -c "import deep_ep; assert hasattr(deep_ep,'HybridEPBuffer'); print('built hybrid-ep deep_ep', getattr(deep_ep,'__version__','?'))" >&2 \
+    || { cx_log "ERROR: hybrid-ep import / HybridEPBuffer missing after build"; return 1; }
+  cx_log "DeepEP hybrid-ep ready ($DEEPEP_COMMIT)"
+}
+
+# UCCL EP (uccl.ep.Buffer is a DeepEP-API clone). The prebuilt wheel is cu12; on a cu13
+# image its kernels need a cu12 CUDA runtime on LD_LIBRARY_PATH (probe-confirmed). PEP-668
+# images need PIP_BREAK_SYSTEM_PACKAGES. Best-effort; failure to import fails loudly.
+cx_build_uccl() {
+  if [ -f /tmp/.cx_built_uccl ]; then
+    cx_log "UCCL EP already prepared this allocation — skip rebuild"
+    python3 -c "import torch; from uccl_deepep import Buffer" 2>/dev/null || return 1
+    return 0
+  fi
+  local version="0.1.1" tag="v0.1.1"
+  local wheel_sha256="390c1320918972206546e44d79b132988f2818ec07e23afcd0595f7183916cec"
+  cx_log "UCCL EP: installing uccl==$version + cu12 runtime shim"
+  export PIP_BREAK_SYSTEM_PACKAGES=1
+  pip install -q --no-deps "sortedcontainers==2.4.0" "intervaltree==3.1.0" >&2 2>&1 \
+    || { cx_log "ERROR: UCCL support dependency installation failed"; return 1; }
+  printf 'uccl==%s --hash=sha256:%s\n' "$version" "$wheel_sha256" \
+    | pip install -q --no-deps --only-binary=:all: --require-hashes -r /dev/stdin >&2 2>&1 \
+    || { cx_log "ERROR: pip install uccl==$version failed"; return 1; }
+  pip install -q --no-deps "nvidia-cuda-runtime-cu12==12.9.79" >&2 2>&1 \
+    || { cx_log "ERROR: CUDA 12 runtime shim install failed"; return 1; }
+  local cu12lib
+  cu12lib="$(python3 -c "import nvidia.cuda_runtime as m, os; print(os.path.join(os.path.dirname(m.__file__),'lib'))" 2>/dev/null)"
+  [ -n "$cu12lib" ] && export LD_LIBRARY_PATH="$cu12lib:${LD_LIBRARY_PATH:-}"
+  local installed
+  installed="$(python3 -c 'import importlib.metadata as m; print(m.version("uccl"))')" \
+    || { cx_log "ERROR: cannot read installed UCCL version"; return 1; }
+  [ "$installed" = "$version" ] \
+    || { cx_log "ERROR: expected UCCL $version, installed $installed"; return 1; }
+  UCCL_COMMIT="pkg-$installed"
+  export UCCL_COMMIT
+  # import torch FIRST: uccl.ep's C extension links libc10.so (torch), which is only on the loader
+  # path once torch is imported (rpath). The adapter (ep_uccl.py) imports torch before uccl.ep too.
+  python3 -c "import torch; from uccl.ep import Buffer; print('uccl.ep ready')" >&2 \
+    || { cx_log "ERROR: uccl.ep import failed (cu12 runtime on LD_LIBRARY_PATH?)"; return 1; }
+  # Vendor UCCL's DeepEP-API wrapper (ep/deep_ep_wrapper/deep_ep) under a NON-conflicting name
+  # (uccl_deepep) so it doesn't shadow the container's real deep_ep. Its Buffer(group, num_nvl_bytes,
+  # ...) takes a torch ProcessGroup (matching DeepEP + ep_uccl.py's calls) and runs the full
+  # proxy/IPC-handle/runtime.sync bootstrap that the low-level uccl.ep.Buffer(rank,num_ranks) lacks.
+  rm -rf /tmp/uccl_src /tmp/uccl_deepep_pkg
+  # Pin the wrapper to the SAME tag as the installed wheel (pkg-0.1.1 -> v0.1.1): the wrapper's
+  # dispatch calls into uccl.ep (get_rdma_buffer etc.), so a main-branch wrapper vs a 0.1.1 wheel
+  # mismatches signatures. Match them.
+  if git clone --depth 1 --branch "$tag" https://github.com/uccl-project/uccl /tmp/uccl_src >&2 2>&1 \
+     && [ "$(git -C /tmp/uccl_src rev-parse HEAD)" = "73ee4f12ba71717d6de34ba06806e1baaabe3f42" ] \
+     && [ -d /tmp/uccl_src/ep/deep_ep_wrapper/deep_ep ]; then
+    mkdir -p /tmp/uccl_deepep_pkg/uccl_deepep
+    cp /tmp/uccl_src/ep/deep_ep_wrapper/deep_ep/*.py /tmp/uccl_deepep_pkg/uccl_deepep/ 2>/dev/null
+    export PYTHONPATH="/tmp/uccl_deepep_pkg:${PYTHONPATH:-}"
+    python3 -c "import torch; from uccl_deepep import Buffer; print('uccl_deepep wrapper ready')" >&2 \
+      || { cx_log "ERROR: uccl_deepep wrapper import failed"; return 1; }
+    export CX_UCCL_WRAPPER=1
+    export UCCL_WRAPPER_COMMIT="73ee4f12ba71717d6de34ba06806e1baaabe3f42"
+  else
+    cx_log "ERROR: uccl deep_ep_wrapper not available"
+    return 1
+  fi
+  : > /tmp/.cx_built_uccl
+  cx_log "UCCL EP ready ($UCCL_COMMIT, wrapper=${CX_UCCL_WRAPPER:-0})"
+}
+
+# Rack build and rank steps may enter different container instances. Persist each node's
+# loader/import path and build identity on the shared staged mount, then require it from every rank.
+cx_persist_backend_env() {
+  local root="$PWD/.cx_backend/env" node_id="${SLURM_NODEID:-0}" path temporary name
+  local -a names=(PATH VIRTUAL_ENV LD_LIBRARY_PATH PYTHONPATH CUDA_HOME CPATH NVCC_PREPEND_FLAGS
+    NVSHMEM_DIR DEEPEP_COMMIT DEEPEP_TREE
+    EP_NCCL_ROOT_DIR EP_NVSHMEM_ROOT_DIR EP_JIT_CACHE_DIR EP_REUSE_NCCL_COMM
+    EP_JIT_DUMP_SASS
+    DEEPEP_V2_PR DEEPEP_V2_FIX_PR DEEPEP_V2_COMMIT DEEPEP_V2_TREE DEEPEP_V2_FMT_COMMIT
+    DEEPEP_V2_JIT_RANDOM_SEED
+    UCCL_COMMIT UCCL_WRAPPER_COMMIT CX_UCCL_WRAPPER)
+  [[ "$node_id" =~ ^[0-9]+$ ]] || return 1
+  mkdir -p "$root" || return 1
+  chmod 700 "$root" || return 1
+  temporary="$(mktemp "$root/.node-${node_id}.XXXXXX")" || return 1
+  chmod 600 "$temporary" || { rm -f "$temporary"; return 1; }
+  for name in "${names[@]}"; do
+    if declare -p "$name" >/dev/null 2>&1; then
+      printf 'export %s=%q\n' "$name" "${!name}" >> "$temporary" \
+        || { rm -f "$temporary"; return 1; }
+    fi
+  done
+  path="$root/node-${node_id}.sh"
+  mv -f -- "$temporary" "$path" || { rm -f "$temporary"; return 1; }
+}
+
+# Prepare and probe one backend without running a benchmark. The same hook is used
+# by normal in-container runs and by rack launchers' persistent build-only step.
+cx_prepare_backend() {
+  local backend="${1:-}"
+  case "$backend" in
+    deepep)
+      cx_probe_deepep || return 1
+      ;;
+    deepep-v2)
+      cx_build_deepep_v2 || return 1
+      ;;
+    deepep-hybrid)
+      cx_build_deepep_hybrid || return 1
+      ;;
+    uccl)
+      cx_build_uccl || return 1
+      ;;
+    mori)
+      python3 -c "import mori" 2>/dev/null || return 1
+      ;;
+    nccl-ep)
+      ;;
+    *)
+      cx_log "ERROR: unknown backend preparation request"
+      return 1
+      ;;
+  esac
+}
+
+prepare_backend_or_record() {
+  local backend="$1" phases="${CX_PHASE:-decode}" phase
+  cx_write_runtime_stage backend-setup || return 1
+  if cx_prepare_backend "$backend"; then
+    return 0
+  fi
+  cx_log "WARN: $backend preparation failed"
+  [ "$phases" = "both" ] && phases="decode prefill"
+  for phase in $phases; do
+    CX_FAILURE_MODE=backend-setup emit_failed_case "$backend" "$phase" 6
+  done
+  return 1
+}
+
+# dispatch_bench runs the CURRENT CX_BENCH (+ CX_* config env) once. The sweep workflow runs many
+# of these per allocation (SHARD mode below), reusing this single container + its built backend.
+dispatch_bench() {
+  case "$CX_BENCH" in
+    nccl-ep)
+      run_ep_suite "$CX_BENCH"
+      ;;
+    deepep|deepep-v2|deepep-hybrid|mori|uccl)
+      prepare_backend_or_record "$CX_BENCH" && run_ep_suite "$CX_BENCH"
+      ;;
+    *)
+      cx_die "unknown CX_BENCH=$CX_BENCH (want deepep|deepep-v2|mori|uccl|nccl-ep|deepep-hybrid)"
+      ;;
+  esac
+}
+
+rc=0
+cx_validate_shard_control "$PWD"
+# Build-only mode: rack launchers run the shared backend preparation hook once per
+# node inside a persistent named container, then direct rank processes reuse it.
+if [ -n "${CX_BUILD_ONLY:-}" ]; then
+  if cx_prepare_backend "${CX_BENCH:-}"; then
+    cx_persist_backend_env || rc=1
+  else
+    rc=1
+  fi
+  cx_log "backend preparation: bench=${CX_BENCH:-unknown} rc=$rc"
+  exit "$rc"
+fi
+if [ -n "${CX_SHARD_FILE:-}" ]; then
+  # SHARD/SWEEP mode (collectivex-sweep.yml): run EVERY case of this shard in THIS one allocation.
+  # All cases share (sku, backend, nodes), so backend preparation is paid once and cached.
+  ncases="$(python3 -c "import json;print(len(json.load(open('$CX_SHARD_FILE'))['cases']))")"
+  cx_log "SHARD mode: $ncases case(s) in one allocation (shard=$CX_SHARD_FILE)"
+  _cx_ts_base="$CX_TS"   # per-case CX_TS suffix below keeps each case's result file UNIQUE (else
+                         # cases sharing backend+phase overwrite each other at the same timestamp).
+  ci=0
+  failed_cases=0
+  while [ "$ci" -lt "$ncases" ]; do
+    CX_TS="${_cx_ts_base}-c$(printf '%03d' "$ci")"
+    export CX_TS
+    # Map varying case fields plus the frozen v1 defaults into CX_* env.
+    _exports="$(python3 - "$CX_SHARD_FILE" "$ci" <<'PY'
+import json, sys, shlex
+c = json.load(open(sys.argv[1]))["cases"][int(sys.argv[2])]
+def g(k, d=""):
+    v = c.get(k, d); return "" if v is None else str(v)
+env = {
+  "CX_BENCH": g("backend"),
+  "CX_ROUTING": g("routing", "uniform"), "CX_PHASE": g("phase", "decode"),
+  "CX_EP": g("ep", "1"),
+  "CX_EPLB": "1" if c.get("eplb") else "",
+  "CX_CASE_ID": g("case_id"), "CX_SUITE": g("suite"), "CX_WORKLOAD_NAME": g("workload"),
+  "CX_REQUIRED_PUBLICATION": g("required_publication"),
+  "CX_HIDDEN": g("hidden"), "CX_TOPK": g("topk"), "CX_EXPERTS": g("experts"),
+  "CX_TOKENS_LADDER": g("ladder"), "CX_CANONICAL": ("1" if c.get("canonical") else ""),
+}
+lines = [f"export {k}={shlex.quote(v)}" for k, v in env.items()]
+# Per-case timing "iters:trials:warmup" (fixed-512-v1 requires 8:64:32 everywhere);
+# cases without one must fall back to the harness defaults, so UNSET rather than export-empty
+# (an empty CX_ITERS would defeat the 8-iter default and break the run_ep argparse; NOTE no
+# apostrophes in this heredoc — bash command-substitution scanning chokes on unbalanced quotes).
+timing = g("timing")
+if timing:
+    parts = (timing.split(":") + ["", "", ""])[:3]
+    for k, v in zip(("CX_ITERS", "CX_TRIALS", "CX_WARMUP"), parts):
+        if v:
+            lines.append(f"export {k}={shlex.quote(v)}")
+else:
+    lines.append("unset CX_ITERS CX_TRIALS CX_WARMUP 2>/dev/null || true")
+print("\n".join(lines))
+PY
+)"
+    eval "$_exports"
+    # Each case has its OWN routing/dims -> its own canonical workload manifest. cx_stage_canonical
+    # short-circuits when CX_WORKLOAD_DIR is already set, so without this unset the first case's
+    # staged dir is reused for the rest and run_ep.py can't find the later cases' manifests
+    # (FileNotFoundError .cx_workloads/<wid>.manifest.json). Unset so every case re-stages its own.
+    unset CX_WORKLOAD_DIR 2>/dev/null || true
+    cx_log "  [$((ci+1))/$ncases] $CX_BENCH $CX_PHASE routing=$CX_ROUTING eplb=${CX_EPLB:-0}"
+    _cx_case_ts="$CX_TS"
+    CX_TS="${_cx_case_ts}-a01"
+    export CX_ATTEMPT_ID=1 CX_TS
+    dispatch_bench || {
+      failed_cases=$((failed_cases+1))
+      cx_log "  [$((ci+1))/$ncases] $CX_BENCH case FAILED; failed-case record preserved"
+    }
+    export CX_TS="$_cx_case_ts"
+    ci=$((ci + 1))
+  done
+  if [ "${failed_cases:-0}" -gt 0 ]; then
+    cx_log "SHARD done: $failed_cases/$ncases case(s) failed"
+    rc=1
+  fi
+  # The base timestamp matches every per-case file, so the final summary covers the whole shard.
+  export CX_TS="$_cx_ts_base"
+else
+  _cx_single_ts="$CX_TS"
+  CX_TS="${_cx_single_ts}-a01"
+  export CX_ATTEMPT_ID=1 CX_TS
+  dispatch_bench || rc=1
+fi
+
+# Summary table for the log; also fails the job if no valid results were produced.
+python3 summarize.py --results-dir results --runner "$CX_RUNNER" --ts "$CX_TS" || rc=1
+exit "$rc"
diff --git a/experimental/CollectiveX/schemas/channel-v1.schema.json b/experimental/CollectiveX/schemas/channel-v1.schema.json
new file mode 100644
index 0000000000..663e22914b
--- /dev/null
+++ b/experimental/CollectiveX/schemas/channel-v1.schema.json
@@ -0,0 +1,23 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://inferencex.com/schemas/collectivex/channel-v1.schema.json",
+  "title": "CollectiveX public channel v1",
+  "type": "object",
+  "additionalProperties": false,
+  "required": ["format","channel","dataset","generated_at"],
+  "properties": {
+      "format": {"const": "collectivex.channel.v1"},
+      "channel": {"enum": ["latest-attempt","dev-latest"]},
+      "dataset": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["path","sha256","bytes"],
+            "properties": {
+                    "path": {"type": "string","pattern": "^datasets/[0-9a-f]{64}/dataset\\.json$"},
+                    "sha256": {"type": "string","pattern": "^[0-9a-f]{64}$"},
+                    "bytes": {"type": "integer","minimum": 1,"maximum": 33554432}
+                  }
+          },
+      "generated_at": {"type": "string","format": "date-time"}
+    }
+}
diff --git a/experimental/CollectiveX/schemas/private-bundle-v1.schema.json b/experimental/CollectiveX/schemas/private-bundle-v1.schema.json
new file mode 100644
index 0000000000..166c808930
--- /dev/null
+++ b/experimental/CollectiveX/schemas/private-bundle-v1.schema.json
@@ -0,0 +1,162 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://inferencex.com/schemas/collectivex/private-bundle-v1.schema.json",
+  "title": "CollectiveX private attempt bundle v1",
+  "type": "object",
+  "additionalProperties": false,
+  "required": [
+      "format",
+      "schema_version",
+      "created_at",
+      "ingest_id",
+      "run",
+      "matrix",
+      "sources",
+      "attempts",
+      "coverage",
+      "runtime_fingerprints",
+      "checksums",
+      "validation"
+    ],
+  "properties": {
+      "format": {"const": "collectivex.private.bundle.v1"},
+      "schema_version": {"const": 1},
+      "created_at": {"type": "string","format": "date-time"},
+      "ingest_id": {"$ref": "#/$defs/sha256"},
+      "run": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["repository","run_id","run_attempt","source_sha"],
+            "properties": {
+                    "repository": {"type": "string","pattern": "^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$"},
+                    "run_id": {"type": "string","pattern": "^[1-9][0-9]*$"},
+                    "run_attempt": {"type": "integer","minimum": 1},
+                    "source_sha": {"type": "string","pattern": "^[0-9a-f]{40}$"}
+                  }
+          },
+      "matrix": {"$ref": "#/$defs/file"},
+      "sources": {"type": "array","minItems": 1,"uniqueItems": true,"items": {"$ref": "#/$defs/source"}},
+      "attempts": {
+            "type": "array",
+            "minItems": 1,
+            "items": {
+                    "type": "object",
+                    "additionalProperties": false,
+                    "required": [
+                              "attempt_id",
+                              "allocation_id",
+                              "case_id",
+                              "outcome",
+                              "reason",
+                              "selected",
+                              "document",
+                              "samples",
+                              "runtime_fingerprint_sha256",
+                              "series_ids",
+                              "evidence_ids"
+                            ],
+                    "properties": {
+                              "attempt_id": {"$ref": "#/$defs/attemptId"},
+                              "allocation_id": {"$ref": "#/$defs/allocationId"},
+                              "case_id": {"$ref": "#/$defs/caseId"},
+                              "outcome": {"$ref": "#/$defs/outcome"},
+                              "reason": {"$ref": "#/$defs/reason"},
+                              "selected": {"type": "boolean"},
+                              "document": {"$ref": "#/$defs/file"},
+                              "samples": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/file"}]},
+                              "runtime_fingerprint_sha256": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/sha256"}]},
+                              "series_ids": {"type": "array","uniqueItems": true,"items": {"$ref": "#/$defs/seriesId"}},
+                              "evidence_ids": {"type": "array","uniqueItems": true,"items": {"$ref": "#/$defs/evidenceId"}}
+                            }
+                  }
+          },
+      "coverage": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["expected_cases","terminal_cases","complete","outcome_counts","selections"],
+            "properties": {
+                    "expected_cases": {"type": "integer","minimum": 1},
+                    "terminal_cases": {"type": "integer","minimum": 0},
+                    "complete": {"type": "boolean"},
+                    "outcome_counts": {"$ref": "#/$defs/outcomeCounts"},
+                    "selections": {
+                              "type": "array",
+                              "minItems": 1,
+                              "items": {
+                                          "type": "object",
+                                          "additionalProperties": false,
+                                          "required": ["case_id","selected_attempt_id","outcome"],
+                                          "properties": {
+                                                        "case_id": {"$ref": "#/$defs/caseId"},
+                                                        "selected_attempt_id": {"$ref": "#/$defs/attemptId"},
+                                                        "outcome": {"$ref": "#/$defs/outcome"}
+                                                      }
+                                        }
+                            }
+                  }
+          },
+      "runtime_fingerprints": {"type": "array","uniqueItems": true,"items": {"$ref": "#/$defs/sha256"}},
+      "checksums": {"$ref": "#/$defs/file"},
+      "validation": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["policy","passed","checks"],
+            "properties": {
+                    "policy": {"const": "collectivex-publisher-v1"},
+                    "passed": {"const": true},
+                    "checks": {
+                              "type": "array",
+                              "minItems": 1,
+                              "uniqueItems": true,
+                              "items": {"type": "string","pattern": "^[a-z0-9][a-z0-9.-]*$"}
+                            }
+                  }
+          }
+    },
+  "$defs": {
+      "sha256": {"type": "string","pattern": "^[0-9a-f]{64}$"},
+      "caseId": {"type": "string","pattern": "^cxcase-v1-[0-9a-f]{64}$"},
+      "seriesId": {"type": "string","pattern": "^cxseries-v1-[0-9a-f]{64}$"},
+      "evidenceId": {"type": "string","pattern": "^cxevidence-v1-[0-9a-f]{64}$"},
+      "allocationId": {"type": "string","pattern": "^cxallocation-v1-[0-9a-f]{64}$"},
+      "attemptId": {"type": "string","pattern": "^cxattempt-v1-[0-9a-f]{64}$"},
+      "reason": {"oneOf": [{"type": "null"},{"type": "string","pattern": "^[a-z0-9][a-z0-9.-]*$","maxLength": 96}]},
+      "outcome": {"enum": ["success","unsupported","failed","invalid","diagnostic"]},
+      "outcomeCounts": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["success","unsupported","failed","invalid","diagnostic"],
+            "properties": {
+                    "success": {"type": "integer","minimum": 0},
+                    "unsupported": {"type": "integer","minimum": 0},
+                    "failed": {"type": "integer","minimum": 0},
+                    "invalid": {"type": "integer","minimum": 0},
+                    "diagnostic": {"type": "integer","minimum": 0}
+                  }
+          },
+      "file": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["path","sha256","bytes"],
+            "properties": {
+                    "path": {"type": "string","pattern": "^[A-Za-z0-9_.-]+(?:/[A-Za-z0-9_.-]+)*$"},
+                    "sha256": {"$ref": "#/$defs/sha256"},
+                    "bytes": {"type": "integer","minimum": 1}
+                  }
+          },
+      "source": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["path","sha256","bytes","artifact_name"],
+            "properties": {
+                    "path": {"type": "string","pattern": "^[A-Za-z0-9_.-]+(?:/[A-Za-z0-9_.-]+)*$"},
+                    "sha256": {"$ref": "#/$defs/sha256"},
+                    "bytes": {"type": "integer","minimum": 1},
+                    "artifact_name": {
+                              "type": "string",
+                              "pattern": "^cx(?:unsupported|shard-[a-z0-9][a-z0-9_.-]{0,127})-[1-9][0-9]*-[1-9][0-9]*$"
+                            }
+                  }
+          }
+    }
+}
diff --git a/experimental/CollectiveX/schemas/public-dataset-v1.schema.json b/experimental/CollectiveX/schemas/public-dataset-v1.schema.json
new file mode 100644
index 0000000000..87abf403d1
--- /dev/null
+++ b/experimental/CollectiveX/schemas/public-dataset-v1.schema.json
@@ -0,0 +1,562 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://inferencex.com/schemas/collectivex/public-dataset-v1.schema.json",
+  "title": "CollectiveX sanitized public dataset v1",
+  "type": "object",
+  "additionalProperties": false,
+  "required": [
+      "format",
+      "schema_version",
+      "generated_at",
+      "source_bundle_ids",
+      "promotion",
+      "coverage",
+      "attempts",
+      "series",
+      "cohorts",
+      "rankings",
+      "recommendations",
+      "sensitivities"
+    ],
+  "properties": {
+      "format": {"const": "collectivex.public.v1"},
+      "schema_version": {"const": 1},
+      "generated_at": {"type": "string","format": "date-time"},
+      "source_bundle_ids": {"type": "array","uniqueItems": true,"items": {"$ref": "#/$defs/sha256"}},
+      "promotion": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "status",
+                    "reason",
+                    "matrix_id",
+                    "allocation_ids",
+                    "required_allocations",
+                    "requested_cases",
+                    "terminal_cases",
+                    "policy"
+                  ],
+            "properties": {
+                    "status": {"enum": ["promoted","diagnostic","quarantined"]},
+                    "reason": {"$ref": "#/$defs/reason"},
+                    "matrix_id": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/sha256"}]},
+                    "allocation_ids": {"type": "array","uniqueItems": true,"items": {"$ref": "#/$defs/allocationId"}},
+                    "required_allocations": {"const": 3},
+                    "requested_cases": {"type": "integer","minimum": 0},
+                    "terminal_cases": {"type": "integer","minimum": 0},
+                    "policy": {"const": "collectivex-decision-grade-v1"}
+                  }
+          },
+      "coverage": {"type": "array","items": {"$ref": "#/$defs/coverage"}},
+      "attempts": {"type": "array","items": {"$ref": "#/$defs/attempt"}},
+      "series": {"type": "array","items": {"$ref": "#/$defs/series"}},
+      "cohorts": {"type": "array","items": {"$ref": "#/$defs/cohort"}},
+      "rankings": {"type": "array","items": {"$ref": "#/$defs/ranking"}},
+      "recommendations": {"type": "array","items": {"$ref": "#/$defs/recommendation"}},
+      "sensitivities": {"type": "array","items": {"$ref": "#/$defs/sensitivity"}}
+    },
+  "$defs": {
+      "sha256": {"type": "string","pattern": "^[0-9a-f]{64}$"},
+      "caseId": {"type": "string","pattern": "^cxcase-v1-[0-9a-f]{64}$"},
+      "workloadId": {"type": "string","pattern": "^cxwork-v1-[0-9a-f]{64}$"},
+      "seriesId": {"type": "string","pattern": "^cxseries-v1-[0-9a-f]{64}$"},
+      "pointId": {"type": "string","pattern": "^cxpoint-v1-[0-9a-f]{64}$"},
+      "evidenceId": {"type": "string","pattern": "^cxevidence-v1-[0-9a-f]{64}$"},
+      "allocationId": {"type": "string","pattern": "^cxallocation-v1-[0-9a-f]{64}$"},
+      "attemptId": {"type": "string","pattern": "^cxattempt-v1-[0-9a-f]{64}$"},
+      "safeId": {"type": "string","pattern": "^[a-z0-9][a-z0-9_.-]*$","maxLength": 128},
+      "publicationTier": {"enum": ["official","comparable-experimental"]},
+      "label": {"type": "string","minLength": 1,"maxLength": 160},
+      "nullableLabel": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/label"}]},
+      "reason": {"oneOf": [{"type": "null"},{"type": "string","pattern": "^[a-z0-9][a-z0-9.-]*$","maxLength": 96}]},
+      "outcome": {"enum": ["success","unsupported","failed","invalid","diagnostic"]},
+      "coverage": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "case_id",
+                    "label",
+                    "required",
+                    "sku",
+                    "backend",
+                    "phase",
+                    "disposition",
+                    "selected_attempt_id",
+                    "outcome",
+                    "failure_mode",
+                    "reason",
+                    "attempt_ids"
+                  ],
+            "properties": {
+                    "case_id": {"$ref": "#/$defs/caseId"},
+                    "label": {"$ref": "#/$defs/label"},
+                    "required": {"type": "boolean"},
+                    "sku": {"$ref": "#/$defs/safeId"},
+                    "backend": {"$ref": "#/$defs/safeId"},
+                    "phase": {"enum": ["decode","prefill"]},
+                    "disposition": {"enum": ["runnable","unsupported"]},
+                    "selected_attempt_id": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/attemptId"}]},
+                    "outcome": {"$ref": "#/$defs/outcome"},
+                    "failure_mode": {"$ref": "#/$defs/reason"},
+                    "reason": {"$ref": "#/$defs/reason"},
+                    "attempt_ids": {"type": "array","uniqueItems": true,"items": {"$ref": "#/$defs/attemptId"}}
+                  }
+          },
+      "attempt": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "attempt_id",
+                    "evidence",
+                    "case_id",
+                    "allocation_id",
+                    "run_id",
+                    "run_attempt",
+                    "attempt_index",
+                    "selected",
+                    "outcome",
+                    "failure_mode",
+                    "reason",
+                    "series_id",
+                    "completed_at"
+                  ],
+            "properties": {
+                    "attempt_id": {"$ref": "#/$defs/attemptId"},
+                    "evidence": {
+                              "type": "array",
+                              "uniqueItems": true,
+                              "items": {
+                                          "type": "object",
+                                          "additionalProperties": false,
+                                          "required": ["evidence_id","point_id"],
+                                          "properties": {"evidence_id": {"$ref": "#/$defs/evidenceId"},"point_id": {"$ref": "#/$defs/pointId"}}
+                                        }
+                            },
+                    "case_id": {"$ref": "#/$defs/caseId"},
+                    "allocation_id": {"$ref": "#/$defs/allocationId"},
+                    "run_id": {"type": "string","pattern": "^[1-9][0-9]*$"},
+                    "run_attempt": {"type": "integer","minimum": 1},
+                    "attempt_index": {"type": "integer","minimum": 1},
+                    "selected": {"type": "boolean"},
+                    "outcome": {"$ref": "#/$defs/outcome"},
+                    "failure_mode": {"$ref": "#/$defs/reason"},
+                    "reason": {"$ref": "#/$defs/reason"},
+                    "series_id": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/seriesId"}]},
+                    "completed_at": {"oneOf": [{"type": "null"},{"type": "string","format": "date-time"}]}
+                  }
+          },
+      "eligibility": {
+            "type": "object",
+            "additionalProperties": false,
+            "allOf": [{
+                    "if": {"properties": {"decision_grade": {"const": true}},"required": ["decision_grade"]},
+                    "then": {"properties": {"reasons": {"maxItems": 0}}},
+                    "else": {"properties": {"reasons": {"minItems": 1}}}
+                  }],
+            "required": [
+                    "decision_grade",
+                    "allocation_ids",
+                    "complete",
+                    "correct",
+                    "measured_roundtrip_p99",
+                    "stable_p50",
+                    "stable_p99",
+                    "stable_ordering",
+                    "p50_max_min_ratio",
+                    "p99_max_min_ratio",
+                    "reasons"
+                  ],
+            "properties": {
+                    "decision_grade": {"type": "boolean"},
+                    "allocation_ids": {"type": "array","uniqueItems": true,"items": {"$ref": "#/$defs/allocationId"}},
+                    "complete": {"type": "boolean"},
+                    "correct": {"type": "boolean"},
+                    "measured_roundtrip_p99": {"type": "boolean"},
+                    "stable_p50": {"type": "boolean"},
+                    "stable_p99": {"type": "boolean"},
+                    "stable_ordering": {"type": "boolean"},
+                    "p50_max_min_ratio": {"oneOf": [{"type": "null"},{"type": "number","minimum": 1}]},
+                    "p99_max_min_ratio": {"oneOf": [{"type": "null"},{"type": "number","minimum": 1}]},
+                    "reasons": {
+                              "type": "array",
+                              "uniqueItems": true,
+                              "items": {"type": "string","pattern": "^[a-z0-9][a-z0-9.-]*$","maxLength": 96}
+                            }
+                  }
+          },
+      "percentiles": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["p50","p90","p95","p99"],
+            "properties": {
+                    "p50": {"type": "number","exclusiveMinimum": 0},
+                    "p90": {"type": "number","exclusiveMinimum": 0},
+                    "p95": {"type": "number","exclusiveMinimum": 0},
+                    "p99": {"type": "number","exclusiveMinimum": 0}
+                  }
+          },
+      "component": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["origin","latency_us","logical_bytes","logical_payload_rate_gbps_at_latency_percentile","sample_count"],
+            "properties": {
+                    "origin": {"enum": ["measured","derived"]},
+                    "latency_us": {"$ref": "#/$defs/percentiles"},
+                    "logical_bytes": {"oneOf": [{"type": "null"},{"type": "integer","minimum": 1}]},
+                    "logical_payload_rate_gbps_at_latency_percentile": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/percentiles"}]},
+                    "sample_count": {"oneOf": [{"type": "null"},{"type": "integer","minimum": 1}]}
+                  }
+          },
+      "nullableComponent": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/component"}]},
+      "point": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "point_id",
+                    "tokens_per_rank",
+                    "global_tokens",
+                    "correct",
+                    "routing",
+                    "components",
+                    "roundtrip_token_rate_at_latency_percentile",
+                    "evidence_ids"
+                  ],
+            "properties": {
+                    "point_id": {"$ref": "#/$defs/pointId"},
+                    "tokens_per_rank": {"type": "integer","minimum": 1},
+                    "global_tokens": {"type": "integer","minimum": 1},
+                    "correct": {"type": "boolean"},
+                    "routing": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": [
+                                          "fanout_mean",
+                                          "recv_tokens_max",
+                                          "expert_load_cv",
+                                          "payload_rank_cv",
+                                          "hotspot_ratio",
+                                          "empty_expert_count",
+                                          "empty_rank_count",
+                                          "routed_copies"
+                                        ],
+                              "properties": {
+                                          "fanout_mean": {"type": "number","minimum": 0},
+                                          "recv_tokens_max": {"type": "integer","minimum": 0},
+                                          "expert_load_cv": {"type": "number","minimum": 0},
+                                          "payload_rank_cv": {"type": "number","minimum": 0},
+                                          "hotspot_ratio": {"type": "number","minimum": 0},
+                                          "empty_expert_count": {"type": "integer","minimum": 0},
+                                          "empty_rank_count": {"type": "integer","minimum": 0},
+                                          "routed_copies": {"type": "integer","minimum": 1}
+                                        }
+                            },
+                    "components": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": ["dispatch","combine","roundtrip","isolated_sum"],
+                              "properties": {
+                                          "dispatch": {"$ref": "#/$defs/nullableComponent"},
+                                          "combine": {"$ref": "#/$defs/nullableComponent"},
+                                          "roundtrip": {"$ref": "#/$defs/nullableComponent"},
+                                          "isolated_sum": {"$ref": "#/$defs/nullableComponent"}
+                                        }
+                            },
+                    "roundtrip_token_rate_at_latency_percentile": {"$ref": "#/$defs/percentiles"},
+                    "evidence_ids": {"type": "array","uniqueItems": true,"items": {"$ref": "#/$defs/evidenceId"}}
+                  }
+          },
+      "series": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "series_id",
+                    "label",
+                    "status",
+                    "case_ids",
+                    "allocation_ids",
+                    "model",
+                    "suite",
+                    "phase",
+                    "publication_tier",
+                    "backend",
+                    "build",
+                    "system",
+                    "workload",
+                    "eplb",
+                    "resource",
+                    "measurement",
+                    "points",
+                    "eligibility"
+                  ],
+            "properties": {
+                    "series_id": {"$ref": "#/$defs/seriesId"},
+                    "label": {"$ref": "#/$defs/label"},
+                    "status": {"enum": ["decision-grade","diagnostic"]},
+                    "case_ids": {"type": "array","minItems": 1,"uniqueItems": true,"items": {"$ref": "#/$defs/caseId"}},
+                    "allocation_ids": {"type": "array","minItems": 1,"uniqueItems": true,"items": {"$ref": "#/$defs/allocationId"}},
+                    "model": {"$ref": "#/$defs/safeId"},
+                    "suite": {"$ref": "#/$defs/safeId"},
+                    "phase": {"enum": ["decode","prefill"]},
+                    "publication_tier": {"$ref": "#/$defs/publicationTier"},
+                    "backend": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": ["id","label","role","generation","version"],
+                              "properties": {
+                                          "id": {"$ref": "#/$defs/safeId"},
+                                          "label": {"$ref": "#/$defs/label"},
+                                          "role": {"enum": ["library","reference"]},
+                                          "generation": {"$ref": "#/$defs/nullableLabel"},
+                                          "version": {"$ref": "#/$defs/nullableLabel"}
+                                        }
+                            },
+                    "build": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": ["implementation_contract_sha256","public_config_sha256","routing_control_sha256","runtime_fingerprint_sha256","image_digest","source_sha","squash_sha256"],
+                              "properties": {
+                                          "implementation_contract_sha256": {"$ref": "#/$defs/sha256"},
+                                          "public_config_sha256": {"$ref": "#/$defs/sha256"},
+                                          "routing_control_sha256": {"$ref": "#/$defs/sha256"},
+                                          "runtime_fingerprint_sha256": {"$ref": "#/$defs/sha256"},
+                                          "image_digest": {"type": "string","pattern": "^sha256:[0-9a-f]{64}$"},
+                                          "source_sha": {"type": "string","pattern": "^[0-9a-f]{40,64}$"},
+                                          "squash_sha256": {"$ref": "#/$defs/sha256"}
+                                        }
+                            },
+                    "system": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": ["sku","label","vendor","topology_class","transport","world_size","ep_size","placement"],
+                              "properties": {
+                                          "sku": {"$ref": "#/$defs/safeId"},
+                                          "label": {"$ref": "#/$defs/label"},
+                                          "vendor": {"enum": ["nvidia","amd"]},
+                                          "topology_class": {"$ref": "#/$defs/safeId"},
+                                          "transport": {"$ref": "#/$defs/safeId"},
+                                          "world_size": {"type": "integer","minimum": 1},
+                                          "ep_size": {"type": "integer","minimum": 1},
+                                          "placement": {"enum": ["packed"]}
+                                        }
+                            },
+                    "workload": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": [
+                                          "workload_id",
+                                          "hidden",
+                                          "top_k",
+                                          "experts",
+                                          "routing",
+                                          "eplb",
+                                          "dispatch_dtype",
+                                          "combine_dtype",
+                                          "activation_profile"
+                                        ],
+                              "properties": {
+                                          "workload_id": {"$ref": "#/$defs/workloadId"},
+                                          "hidden": {"type": "integer","minimum": 1},
+                                          "top_k": {"type": "integer","minimum": 1},
+                                          "experts": {"type": "integer","minimum": 1},
+                                          "routing": {"enum": ["uniform","zipf"]},
+                                          "eplb": {"type": "boolean"},
+                                          "dispatch_dtype": {"const": "bf16"},
+                                          "combine_dtype": {"const": "bf16"},
+                                          "activation_profile": {"const": "canonical-counter-source-v3"}
+                                        }
+                            },
+                    "eplb": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": [
+                                          "enabled",
+                                          "planner",
+                                          "mapping_sha256",
+                                          "logical_experts",
+                                          "physical_experts",
+                                          "redundant_experts",
+                                          "reference_tokens_per_rank",
+                                          "replicated_experts",
+                                          "max_replicas",
+                                          "imbalance_before",
+                                          "imbalance_after"
+                                        ],
+                              "properties": {
+                                          "enabled": {"type": "boolean"},
+                                          "planner": {"$ref": "#/$defs/nullableLabel"},
+                                          "mapping_sha256": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/sha256"}]},
+                                          "logical_experts": {"type": "integer","minimum": 1},
+                                          "physical_experts": {"type": "integer","minimum": 1},
+                                          "redundant_experts": {"type": "integer","minimum": 0},
+                                          "reference_tokens_per_rank": {"oneOf": [{"type": "null"},{"type": "integer","minimum": 1}]},
+                                          "replicated_experts": {"type": "integer","minimum": 0},
+                                          "max_replicas": {"oneOf": [{"type": "null"},{"type": "integer","minimum": 0}]},
+                                          "imbalance_before": {"oneOf": [{"type": "null"},{"type": "number","minimum": 0}]},
+                                          "imbalance_after": {"oneOf": [{"type": "null"},{"type": "number","minimum": 0}]}
+                                        }
+                            },
+                    "resource": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": ["mode","profile","comm_units_kind","configured_units"],
+                              "properties": {
+                                          "mode": {"const": "tuned"},
+                                          "profile": {"$ref": "#/$defs/safeId"},
+                                          "comm_units_kind": {"$ref": "#/$defs/nullableLabel"},
+                                          "configured_units": {"oneOf": [{"type": "null"},{"type": "integer","minimum": 1}]}
+                                        }
+                            },
+                    "measurement": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": [
+                                          "contract",
+                                          "sampling_contract",
+                                          "iters",
+                                          "trials",
+                                          "warmups",
+                                          "samples_per_component",
+                                          "headline_component",
+                                          "headline_percentile"
+                                        ],
+                              "properties": {
+                                          "contract": {"const": "layout-and-dispatch-v1"},
+                                          "sampling_contract": {"const": "fixed-512-v1"},
+                                          "iters": {"const": 8},
+                                          "trials": {"const": 64},
+                                          "warmups": {"const": 32},
+                                          "samples_per_component": {"const": 512},
+                                          "headline_component": {"const": "roundtrip"},
+                                          "headline_percentile": {"const": "p99"}
+                                        }
+                            },
+                    "points": {"type": "array","minItems": 1,"items": {"$ref": "#/$defs/point"}},
+                    "eligibility": {"$ref": "#/$defs/eligibility"}
+                  }
+          },
+      "cohort": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "cohort_id",
+                    "kind",
+                    "label",
+                    "description",
+                    "series_ids",
+                    "controlled_factors",
+                    "varying_factors",
+                    "publication_tier",
+                    "eligibility"
+                  ],
+            "properties": {
+                    "cohort_id": {"type": "string","pattern": "^cxcohort-v1-[0-9a-f]{64}$"},
+                    "kind": {"enum": ["library","chip","system","routing"]},
+                    "label": {"$ref": "#/$defs/label"},
+                    "description": {"$ref": "#/$defs/label"},
+                    "series_ids": {"type": "array","minItems": 2,"uniqueItems": true,"items": {"$ref": "#/$defs/seriesId"}},
+                    "controlled_factors": {"type": "array","minItems": 1,"uniqueItems": true,"items": {"$ref": "#/$defs/safeId"}},
+                    "varying_factors": {"type": "array","minItems": 1,"uniqueItems": true,"items": {"$ref": "#/$defs/safeId"}},
+                    "publication_tier": {"$ref": "#/$defs/publicationTier"},
+                    "eligibility": {"$ref": "#/$defs/eligibility"}
+                  }
+          },
+      "metric": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["operation","statistic","measure","objective","tokens_per_rank","phase"],
+            "properties": {
+                    "operation": {"const": "roundtrip"},
+                    "statistic": {"enum": ["p50","p99"]},
+                    "measure": {"enum": ["latency_us","logical_payload_rate_gbps_at_latency_percentile"]},
+                    "objective": {"enum": ["min","max"]},
+                    "tokens_per_rank": {"type": "integer","minimum": 1},
+                    "phase": {"enum": ["decode","prefill"]}
+                  }
+          },
+      "ranking": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["ranking_id","cohort_id","label","metric","entries","publication_tier","eligibility"],
+            "properties": {
+                    "ranking_id": {"type": "string","pattern": "^cxranking-v1-[0-9a-f]{64}$"},
+                    "cohort_id": {"type": "string","pattern": "^cxcohort-v1-[0-9a-f]{64}$"},
+                    "label": {"$ref": "#/$defs/label"},
+                    "metric": {"$ref": "#/$defs/metric"},
+                    "entries": {
+                              "type": "array",
+                              "minItems": 2,
+                              "items": {
+                                          "type": "object",
+                                          "additionalProperties": false,
+                                          "required": ["rank","series_id","point_id","value","unit"],
+                                          "properties": {
+                                                        "rank": {"type": "integer","minimum": 1},
+                                                        "series_id": {"$ref": "#/$defs/seriesId"},
+                                                        "point_id": {"$ref": "#/$defs/pointId"},
+                                                        "value": {"type": "number","exclusiveMinimum": 0},
+                                                        "unit": {"enum": ["us","GB/s"]}
+                                                      }
+                                        }
+                            },
+                    "publication_tier": {"$ref": "#/$defs/publicationTier"},
+                    "eligibility": {"$ref": "#/$defs/eligibility"}
+                  }
+          },
+      "recommendation": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "recommendation_id",
+                    "cohort_id",
+                    "label",
+                    "objective",
+                    "series_id",
+                    "point_id",
+                    "value",
+                    "unit",
+                    "rationale",
+                    "publication_tier",
+                    "eligibility"
+                  ],
+            "properties": {
+                    "recommendation_id": {"type": "string","pattern": "^cxrecommendation-v1-[0-9a-f]{64}$"},
+                    "cohort_id": {"type": "string","pattern": "^cxcohort-v1-[0-9a-f]{64}$"},
+                    "label": {"$ref": "#/$defs/label"},
+                    "objective": {"enum": ["min-p50-latency","min-p99-latency","max-payload-rate-at-p50-latency","max-payload-rate-at-p99-latency"]},
+                    "series_id": {"$ref": "#/$defs/seriesId"},
+                    "point_id": {"$ref": "#/$defs/pointId"},
+                    "value": {"type": "number","exclusiveMinimum": 0},
+                    "unit": {"enum": ["us","GB/s"]},
+                    "rationale": {"$ref": "#/$defs/label"},
+                    "publication_tier": {"const": "official"},
+                    "eligibility": {"$ref": "#/$defs/eligibility"}
+                  }
+          },
+      "sensitivity": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "sensitivity_id",
+                    "cohort_id",
+                    "label",
+                    "baseline_series_id",
+                    "candidate_series_id",
+                    "metric",
+                    "signed_change_ratio",
+                    "publication_tier",
+                    "eligibility"
+                  ],
+            "properties": {
+                    "sensitivity_id": {"type": "string","pattern": "^cxsensitivity-v1-[0-9a-f]{64}$"},
+                    "cohort_id": {"type": "string","pattern": "^cxcohort-v1-[0-9a-f]{64}$"},
+                    "label": {"$ref": "#/$defs/label"},
+                    "baseline_series_id": {"$ref": "#/$defs/seriesId"},
+                    "candidate_series_id": {"$ref": "#/$defs/seriesId"},
+                    "metric": {"$ref": "#/$defs/metric"},
+                    "signed_change_ratio": {"type": "number"},
+                    "publication_tier": {"$ref": "#/$defs/publicationTier"},
+                    "eligibility": {"$ref": "#/$defs/eligibility"}
+                  }
+          }
+    }
+}
diff --git a/experimental/CollectiveX/schemas/raw-case-v1.schema.json b/experimental/CollectiveX/schemas/raw-case-v1.schema.json
new file mode 100644
index 0000000000..ccf85b19ad
--- /dev/null
+++ b/experimental/CollectiveX/schemas/raw-case-v1.schema.json
@@ -0,0 +1,1142 @@
+{
+  "$id": "https://inferencex.com/schemas/collectivex/raw-case-v1.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$defs": {
+    "deepep_v2_jit_cubin": {
+      "additionalProperties": false,
+      "properties": {
+        "cache_key": {
+          "pattern":"^kernel\\.[A-Za-z0-9_+-]+\\.[0-9a-f]{32}$",
+          "type":"string"
+        },
+        "cubin_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+        "sass_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+        "source_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"}
+      },
+      "required": ["cache_key","cubin_sha256","sass_sha256","source_sha256"],
+      "type": "object"
+    },
+    "hybrid_jit_rank_artifact": {
+      "additionalProperties": false,
+      "properties": {
+        "bytes": {"minimum":1,"type":"integer"},
+        "rank": {"minimum":0,"type":"integer"},
+        "sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"}
+      },
+      "required": ["bytes","rank","sha256"],
+      "type": "object"
+    },
+    "hybrid_realized_config": {
+      "additionalProperties": false,
+      "properties": {
+        "backward_combine_api": {"type":"boolean"},
+        "device_side_sync_combine_api": {"type":"boolean"},
+        "device_side_sync_dispatch_api": {"type":"boolean"},
+        "forward_dispatch_api": {"type":"boolean"},
+        "hidden_dim": {"minimum":1,"type":"integer"},
+        "max_num_of_tokens_per_rank": {"minimum":1,"type":"integer"},
+        "num_of_additional_in_flight_s2g_combine_api": {"minimum":0,"type":"integer"},
+        "num_of_additional_in_flight_s2g_dispatch_api": {"minimum":0,"type":"integer"},
+        "num_of_blocks_combine_api": {"minimum":0,"type":"integer"},
+        "num_of_blocks_dispatch_api": {"minimum":0,"type":"integer"},
+        "num_of_blocks_permute": {"minimum":0,"type":"integer"},
+        "num_of_blocks_preprocessing_api": {"minimum":0,"type":"integer"},
+        "num_of_blocks_unpermute": {"minimum":0,"type":"integer"},
+        "num_of_experts_per_rank": {"minimum":1,"type":"integer"},
+        "num_of_in_flight_s2g_dispatch_api": {"minimum":0,"type":"integer"},
+        "num_of_in_flight_s2g_permute_block_dispatch_api": {"minimum":0,"type":"integer"},
+        "num_of_nodes": {"minimum":1,"type":"integer"},
+        "num_of_ranks_per_node": {"minimum":1,"type":"integer"},
+        "num_of_stages_dispatch_api": {"minimum":0,"type":"integer"},
+        "num_of_stages_g2s_combine_api": {"minimum":0,"type":"integer"},
+        "num_of_stages_permute_block_dispatch_api": {"minimum":0,"type":"integer"},
+        "num_of_stages_s2g_combine_api": {"minimum":0,"type":"integer"},
+        "num_of_threads_per_block_preprocessing_api": {"minimum":0,"type":"integer"},
+        "num_of_tokens_per_chunk_combine_api": {"minimum":0,"type":"integer"},
+        "num_of_tokens_per_chunk_dispatch_api": {"minimum":0,"type":"integer"},
+        "num_of_tokens_per_chunk_preprocessing_api": {"minimum":0,"type":"integer"},
+        "num_of_tokens_per_group_combine_api": {"minimum":0,"type":"integer"},
+        "pad_multiple": {"minimum":0,"type":"integer"},
+        "token_data_type": {"enum":["UINT8","UINT16"]}
+      },
+      "required": [
+        "backward_combine_api","device_side_sync_combine_api","device_side_sync_dispatch_api",
+        "forward_dispatch_api","hidden_dim","max_num_of_tokens_per_rank",
+        "num_of_additional_in_flight_s2g_combine_api",
+        "num_of_additional_in_flight_s2g_dispatch_api","num_of_blocks_combine_api",
+        "num_of_blocks_dispatch_api","num_of_blocks_permute","num_of_blocks_preprocessing_api",
+        "num_of_blocks_unpermute","num_of_experts_per_rank",
+        "num_of_in_flight_s2g_dispatch_api","num_of_in_flight_s2g_permute_block_dispatch_api",
+        "num_of_nodes","num_of_ranks_per_node","num_of_stages_dispatch_api",
+        "num_of_stages_g2s_combine_api","num_of_stages_permute_block_dispatch_api",
+        "num_of_stages_s2g_combine_api","num_of_threads_per_block_preprocessing_api",
+        "num_of_tokens_per_chunk_combine_api","num_of_tokens_per_chunk_dispatch_api",
+        "num_of_tokens_per_chunk_preprocessing_api","num_of_tokens_per_group_combine_api",
+        "pad_multiple","token_data_type"
+      ],
+      "type": "object"
+    },
+    "nullable_sha256": {"oneOf":[{"type":"null"},{"pattern":"^[0-9a-f]{64}$","type":"string"}]},
+    "oracle": {
+      "additionalProperties": false,
+      "properties": {
+        "checks": {
+          "additionalProperties": false,
+          "properties": {
+            "combine_values": {"type":"boolean"},
+            "counts": {"type":"boolean"},
+            "metadata": {"type":"boolean"},
+            "multiplicity": {"type":"boolean"},
+            "payload": {"type":"boolean"},
+            "source_set": {"type":"boolean"},
+            "weights": {"type":"boolean"}
+          },
+          "required": ["combine_values","counts","metadata","multiplicity","payload","source_set","weights"],
+          "type": "object"
+        },
+        "atol": {"const":0.02},
+        "combine_weight_semantics": {"const":"unweighted-rank-sum"},
+        "contract": {"const":"expert-specific-transform-v1"},
+        "dispatch_sha256": {"$ref":"#/$defs/nullable_sha256"},
+        "max_absolute_error": {"oneOf":[{"type":"null"},{"minimum":0,"type":"number"}]},
+        "max_elementwise_relative_error": {"oneOf":[{"type":"null"},{"minimum":0,"type":"number"}]},
+        "max_relative_error": {"oneOf":[{"type":"null"},{"minimum":0,"type":"number"}]},
+        "max_weight_error": {"oneOf":[{"type":"null"},{"minimum":0,"type":"number"}]},
+        "order_sha256": {"$ref":"#/$defs/nullable_sha256"},
+        "ordering_contract": {"minLength":1,"type":"string"},
+        "passed": {"type":"boolean"},
+        "receive_count": {"minimum":0,"type":"integer"},
+        "rtol": {"const":0.05}
+      },
+      "required": [
+        "atol",
+        "checks",
+        "combine_weight_semantics",
+        "contract",
+        "dispatch_sha256",
+        "max_absolute_error",
+        "max_elementwise_relative_error",
+        "max_relative_error",
+        "max_weight_error",
+        "order_sha256",
+        "ordering_contract",
+        "passed",
+        "receive_count",
+        "rtol"
+      ],
+      "type": "object"
+    },
+    "percentiles": {
+      "additionalProperties": false,
+      "properties": {
+        "p50": {"minimum":0,"type":"number"},
+        "p90": {"minimum":0,"type":"number"},
+        "p95": {"minimum":0,"type":"number"},
+        "p99": {"minimum":0,"type":"number"}
+      },
+      "required": ["p50","p90","p95","p99"],
+      "type": "object"
+    },
+    "component": {
+      "additionalProperties": false,
+      "properties": {
+        "availability": {"enum":["measured","derived","unavailable"]},
+        "origin": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+        "percentiles_us": {"oneOf":[{"type":"null"},{"$ref":"#/$defs/percentiles"}]},
+        "sample_count": {"minimum":0,"type":"integer"}
+      },
+      "required": ["availability","origin","percentiles_us","sample_count"],
+      "type": "object"
+    },
+    "histogram": {
+      "additionalProperties": false,
+      "properties": {
+        "bins": {"minimum":1,"type":"integer"},
+        "counts": {"items":{"minimum":0,"type":"integer"},"minItems":1,"type":"array"},
+        "max": {"minimum":0,"type":"number"},
+        "min": {"minimum":0,"type":"number"},
+        "n": {"minimum":1,"type":"integer"}
+      },
+      "required": ["n","min","max","bins","counts"],
+      "type": "object"
+    },
+    "scheduled_case": {
+      "additionalProperties": false,
+      "properties": {
+        "backend": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "canonical": {"const":true},
+        "ep": {"minimum":1,"type":"integer"},
+        "eplb": {"type":"boolean"},
+        "experts": {"minimum":1,"type":"integer"},
+        "gpus_per_node": {"minimum":1,"type":"integer"},
+        "hidden": {"minimum":1,"type":"integer"},
+        "ladder": {"pattern":"^[1-9][0-9]*( [1-9][0-9]*)*$","type":"string"},
+        "nodes": {"minimum":1,"type":"integer"},
+        "phase": {"enum":["decode","prefill"]},
+        "required_publication": {"enum":["official","comparable-experimental"]},
+        "routing": {"enum":["uniform","zipf"]},
+        "samples_per_point": {"const":512},
+        "scale_up_domain": {"minimum":1,"type":"integer"},
+        "suite": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "timing": {"const":"8:64:32"},
+        "topk": {"minimum":1,"type":"integer"},
+        "warmup_semantics": {"const":"full-roundtrip-before-each-component-trial-point-v1"},
+        "workload": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"}
+      },
+      "required": [
+        "backend",
+        "canonical",
+        "eplb",
+        "ep",
+        "experts",
+        "gpus_per_node",
+        "hidden",
+        "ladder",
+        "nodes",
+        "phase",
+        "required_publication",
+        "routing",
+        "samples_per_point",
+        "scale_up_domain",
+        "suite",
+        "timing",
+        "topk",
+        "warmup_semantics",
+        "workload"
+      ],
+      "type": "object"
+    },
+    "git_run": {
+      "additionalProperties": false,
+      "properties": {
+        "artifact": {"minLength":1,"type":"string"},
+        "job": {"minLength":1,"type":"string"},
+        "ref": {"minLength":1,"type":"string"},
+        "repo": {"pattern":"^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$","type":"string"},
+        "run_attempt": {"pattern":"^[1-9][0-9]*$","type":"string"},
+        "run_id": {"pattern":"^[1-9][0-9]*$","type":"string"},
+        "source_sha": {"pattern":"^[0-9a-f]{40}$","type":"string"}
+      },
+      "required": ["artifact","job","ref","repo","run_attempt","run_id","source_sha"],
+      "type": "object"
+    }
+  },
+  "additionalProperties": false,
+  "properties": {
+    "case": {
+      "additionalProperties": false,
+      "properties": {
+        "attempt_ordinal": {"minimum":1,"type":"integer"},
+        "backend": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "ep_size": {"minimum":1,"type":"integer"},
+        "eplb": {
+          "additionalProperties": false,
+          "properties": {
+            "enabled": {"type":"boolean"},
+            "imbalance_after": {"oneOf":[{"type":"null"},{"minimum":0,"type":"number"}]},
+            "imbalance_before": {"oneOf":[{"type":"null"},{"minimum":0,"type":"number"}]},
+            "mapping_hash": {"oneOf":[{"type":"null"},{"pattern":"^[0-9a-f]{64}$","type":"string"}]},
+            "max_replicas": {"oneOf":[{"type":"null"},{"minimum":0,"type":"integer"}]},
+            "num_logical_experts": {"minimum":1,"type":"integer"},
+            "num_physical_experts": {"minimum":1,"type":"integer"},
+            "num_redundant": {"minimum":0,"type":"integer"},
+            "planner": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "reference_tokens_per_rank": {"oneOf":[{"type":"null"},{"minimum":1,"type":"integer"}]},
+            "replicated_experts": {"minimum":0,"type":"integer"}
+          },
+          "required": [
+            "enabled",
+            "imbalance_after",
+            "imbalance_before",
+            "mapping_hash",
+            "max_replicas",
+            "num_logical_experts",
+            "num_physical_experts",
+            "num_redundant",
+            "planner",
+            "reference_tokens_per_rank",
+            "replicated_experts"
+          ],
+          "type": "object"
+        },
+        "mode": {"const":"normal"},
+        "phase": {"enum":["decode","prefill"]},
+        "required_publication": {"enum":["official","comparable-experimental"]},
+        "resource_mode": {"const":"tuned"},
+        "runner": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "shape": {
+          "additionalProperties": false,
+          "properties": {
+            "activation_profile": {"const":"canonical-counter-source-v3"},
+            "dispatch_dtype": {"const":"bf16"},
+            "eplb": {"type":"boolean"},
+            "experts": {"minimum":1,"type":"integer"},
+            "experts_per_rank": {"minimum":1,"type":"integer"},
+            "hidden": {"minimum":1,"type":"integer"},
+            "kernel_gen": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "num_logical_experts": {"minimum":1,"type":"integer"},
+            "quant": {
+              "additionalProperties": false,
+              "properties": {
+                "combine_accum_dtype": {"minLength":1,"type":"string"},
+                "combine_input_dtype": {"const":"bf16"},
+                "combine_output_dtype": {"const":"bf16"},
+                "combine_quant_mode": {"const":"none"},
+                "scale_layout": {"type":"null"}
+              },
+              "required": [
+                "combine_accum_dtype",
+                "combine_input_dtype",
+                "combine_output_dtype",
+                "combine_quant_mode",
+                "scale_layout"
+              ],
+              "type": "object"
+            },
+            "routing": {"enum":["uniform","zipf"]},
+            "topk": {"minimum":1,"type":"integer"}
+          },
+          "required": [
+            "activation_profile",
+            "dispatch_dtype",
+            "eplb",
+            "experts",
+            "experts_per_rank",
+            "hidden",
+            "kernel_gen",
+            "num_logical_experts",
+            "quant",
+            "routing",
+            "topk"
+          ],
+          "type": "object"
+        },
+        "suite": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "workload_name": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"}
+      },
+      "required": [
+        "attempt_ordinal",
+        "backend",
+        "eplb",
+        "ep_size",
+        "mode",
+        "phase",
+        "required_publication",
+        "resource_mode",
+        "runner",
+        "shape",
+        "suite",
+        "workload_name"
+      ],
+      "type": "object"
+    },
+    "format": {"const":"collectivex.ep.v1"},
+    "generated_at": {"format":"date-time","type":"string"},
+    "identity": {
+      "additionalProperties": false,
+      "properties": {
+        "allocation_factors": {
+          "additionalProperties": false,
+          "properties": {
+            "artifact": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "execution_id": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "job": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "repo": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "run_attempt": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "run_id": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "runner": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "source_sha": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]}
+          },
+          "required": ["artifact","execution_id","job","repo","run_attempt","run_id","runner","source_sha"],
+          "type": "object"
+        },
+        "allocation_id": {"pattern":"^cxallocation-v1-[0-9a-f]{64}$","type":"string"},
+        "attempt_id": {"pattern":"^cxattempt-v1-[0-9a-f]{64}$","type":"string"},
+        "attempt_ordinal": {"minimum":1,"type":"integer"},
+        "case_factors": {
+          "additionalProperties": false,
+          "properties": {
+            "case": {"$ref":"#/$defs/scheduled_case"},
+            "profile": {
+              "const": {
+                "activation_generator": "collectivex-activation-counter-v3",
+                "activation_profile": "canonical-counter-source-v3",
+                "combine_dtype": "bf16",
+                "combine_quant_mode": "none",
+                "component_order_contract": "roundtrip-dispatch-activation-only-combine-v2",
+                "conditioning_contract": "fixed-phase-ramp-8-roundtrips-v1",
+                "contract": "layout-and-dispatch-v1",
+                "dtype": "bf16",
+                "eplb_planner": "greedy-rank-major-v1",
+                "eplb_redundant_experts": 32,
+                "eplb_reference_tokens_per_rank": 2048,
+                "mode": "normal",
+                "oracle_contract": "expert-specific-transform-v1",
+                "oracle_tolerances": "rtol=0.05,atol=0.02",
+                "placement": "packed",
+                "percentile_method": "nearest-rank",
+                "rank_reduction": "cross-rank-max-per-iteration",
+                "resource_mode": "tuned",
+                "routing_generator": "collectivex-routing-counter-v3",
+                "sampling_contract": "fixed-512-v1",
+                "seed": 67
+              }
+            },
+            "sku": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"}
+          },
+          "required": ["case","profile","sku"],
+          "type": "object"
+        },
+        "case_id": {"pattern":"^cxcase-v1-[0-9a-f]{64}$","type":"string"},
+        "series_factors": {
+          "additionalProperties": false,
+          "properties": {
+            "backend": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+            "implementation_contract_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+            "public_config_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+            "routing_control_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+            "case_id": {"pattern":"^cxcase-v1-[0-9a-f]{64}$","type":"string"},
+            "image_digest": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "runtime_fingerprint_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+            "source_sha": {"oneOf":[{"type":"null"},{"pattern":"^[0-9a-f]{40}$","type":"string"}]},
+            "squash_sha256": {"oneOf":[{"type":"null"},{"pattern":"^[0-9a-f]{64}$","type":"string"}]},
+            "workload_id": {"pattern":"^cxwork-v1-[0-9a-f]{64}$","type":"string"}
+          },
+          "required": [
+            "backend",
+            "implementation_contract_sha256",
+            "public_config_sha256",
+            "routing_control_sha256",
+            "case_id",
+            "image_digest",
+            "runtime_fingerprint_sha256",
+            "source_sha",
+            "squash_sha256",
+            "workload_id"
+          ],
+          "type": "object"
+        },
+        "series_id": {"pattern":"^cxseries-v1-[0-9a-f]{64}$","type":"string"}
+      },
+      "required": [
+        "allocation_factors",
+        "allocation_id",
+        "attempt_id",
+        "attempt_ordinal",
+        "case_factors",
+        "case_id",
+        "series_factors",
+        "series_id"
+      ],
+      "type": "object"
+    },
+    "implementation": {
+      "additionalProperties": false,
+      "properties": {
+        "kernel_generation": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+        "name": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "provenance": {
+          "properties": {
+            "allow_hybrid_mode": {"const":false},
+            "communication_backend": {"const":"nccl-device-lsa"},
+            "deepep_fix_pr": {"const":630},
+            "deepep_pr": {"const":605},
+            "deterministic": {"type": "boolean"},
+            "gin_enabled": {"const":false},
+            "jit_cubins": {
+              "items": {"$ref":"#/$defs/deepep_v2_jit_cubin"},
+              "maxItems": 5,
+              "minItems": 5,
+              "type": "array",
+              "uniqueItems": true
+            },
+            "jit_kernel_keys": {
+              "items": {"maxLength":512,"pattern":"^[A-Za-z0-9][A-Za-z0-9_.+-]*$","type":"string"},
+              "maxItems": 3,
+              "minItems": 3,
+              "type": "array",
+              "uniqueItems": true
+            },
+            "jit_random_seed": {"const":"collectivex-deepep-v2-fa8a9b1"},
+            "jit_shared_objects": {
+              "items": {
+                "additionalProperties": false,
+                "properties": {
+                  "kernel_key": {"maxLength":512,"pattern":"^[A-Za-z0-9][A-Za-z0-9_.+-]*$","type":"string"},
+                  "rank_artifacts": {
+                    "items": {"$ref":"#/$defs/hybrid_jit_rank_artifact"},
+                    "minItems": 1,
+                    "type": "array"
+                  }
+                },
+                "required": ["kernel_key","rank_artifacts"],
+                "type": "object"
+              },
+              "maxItems": 3,
+              "minItems": 3,
+              "type": "array"
+            },
+            "num_experts": {"minimum": 1, "type": "integer"},
+            "realized_config": {"$ref":"#/$defs/hybrid_realized_config"},
+            "tuning_num_experts": {"minimum": 1, "type": "integer"},
+            "uccl_dependency_versions": {
+              "additionalProperties": false,
+              "properties": {
+                "intervaltree": {"const":"3.1.0"},
+                "nvidia-cuda-runtime-cu12": {"const":"12.9.79"},
+                "sortedcontainers": {"const":"2.4.0"}
+              },
+              "required": ["intervaltree","nvidia-cuda-runtime-cu12","sortedcontainers"],
+              "type": "object"
+            }
+          },
+          "type": "object",
+          "propertyNames": {
+            "enum": [
+              "allocated_qps",
+              "allow_hybrid_mode",
+              "allow_mnnvl",
+              "allow_multiple_reduction",
+              "api",
+              "api_signature_sha256",
+              "backend",
+              "backend_lineage",
+              "block_num",
+              "block_num_floored",
+              "block_num_target",
+              "branch",
+              "collective_library",
+              "combine_dtype",
+              "combine_warps",
+              "communication_backend",
+              "cuda_version",
+              "deepep_commit",
+              "deepep_distribution_version",
+              "deepep_fix_pr",
+              "deepep_pr",
+              "deepep_tree",
+              "deepep_version",
+              "deterministic",
+              "device_cus",
+              "device_sms",
+              "dispatch_dtype",
+              "dispatch_warps",
+              "enable_sdma",
+              "fmt_commit",
+              "gpus_per_node",
+              "gin_enabled",
+              "heap_size",
+              "impl",
+              "jit_cache_key",
+              "jit_cubins",
+              "jit_kernel_keys",
+              "jit_random_seed",
+              "jit_shared_objects",
+              "kernel_type",
+              "loaded_libraries",
+              "local_experts",
+              "logical_scaleout_ranks",
+              "logical_scaleup_ranks",
+              "mapping_variant",
+              "max_num_inp_token_per_rank",
+              "max_num_tokens",
+              "max_total_recv_tokens",
+              "mnnvl_comm",
+              "mode",
+              "mori_commit",
+              "nccl_communicator",
+              "nccl_package_version",
+              "nccl_version",
+              "num_experts",
+              "num_max_tokens_per_rank",
+              "num_nvl_bytes",
+              "num_qps",
+              "num_sms",
+              "nvshmem_package_version",
+              "path",
+              "physical_nvlink_ranks",
+              "physical_rdma_ranks",
+              "prefer_overlap_with_compute",
+              "reference_semantics",
+              "realized_config",
+              "requested_num_sms",
+              "resource_mode",
+              "routing_factor",
+              "routing_metadata",
+              "sm_fraction",
+              "top_k",
+              "torch_git_version",
+              "torch_version",
+              "transport",
+              "trtllm",
+              "tuned_source",
+              "tuning_num_experts",
+              "uccl_commit",
+              "uccl_dependency_versions",
+              "uccl_version",
+              "uccl_wrapper_commit",
+              "workspace"
+            ]
+          }
+        },
+        "resource_profile": {
+          "additionalProperties": false,
+          "properties": {
+            "achieved_fraction": {},
+            "comm_units_kind": {},
+            "configured_units": {},
+            "conformance_class": {},
+            "device_units": {},
+            "fixed_kernel": {},
+            "nonconforming": {},
+            "pareto_eligible": {},
+            "persistent_bytes": {},
+            "qps_per_rank": {},
+            "requested_fraction": {},
+            "tuned_source": {},
+            "target_achieved_within_tol": {},
+            "tolerance": {},
+            "resource_class": {},
+            "warps_combine": {},
+            "warps_dispatch": {}
+          },
+          "required": [
+            "comm_units_kind",
+            "requested_fraction",
+            "configured_units",
+            "device_units",
+            "achieved_fraction",
+            "warps_dispatch",
+            "warps_combine",
+            "qps_per_rank",
+            "persistent_bytes",
+            "tuned_source",
+            "resource_class",
+            "conformance_class",
+            "tolerance",
+            "target_achieved_within_tol",
+            "nonconforming",
+            "fixed_kernel",
+            "pareto_eligible"
+          ],
+          "type": "object"
+        }
+      },
+      "required": ["kernel_generation","name","provenance","resource_profile"],
+      "type": "object"
+    },
+    "measurement": {
+      "additionalProperties": false,
+      "properties": {
+        "component_order_contract": {"const":"roundtrip-dispatch-activation-only-combine-v2"},
+        "conditioning": {
+          "additionalProperties": false,
+          "properties": {
+            "contract": {"const":"fixed-phase-ramp-8-roundtrips-v1"},
+            "ladder": {"items":{"minimum":1,"type":"integer"},"minItems":1,"type":"array"},
+            "roundtrips_per_shape": {"const":8}
+          },
+          "required": ["contract","ladder","roundtrips_per_shape"],
+          "type": "object"
+        },
+        "contract": {"const":"layout-and-dispatch-v1"},
+        "rows": {
+          "items": {
+            "additionalProperties": false,
+            "properties": {
+              "anomalies": {
+                "items": {
+                  "additionalProperties": false,
+                  "properties": {
+                    "T": {"minimum":1,"type":"integer"},
+                    "component_floor_p50": {"minimum":0,"type":"number"},
+                    "isolated_sum_p99": {"minimum":0,"type":"number"},
+                    "ratio": {"minimum":0,"type":"number"},
+                    "roundtrip_p50": {"minimum":0,"type":"number"},
+                    "roundtrip_p99": {"minimum":0,"type":"number"},
+                    "threshold": {"minimum":0,"type":"number"},
+                    "type": {"enum":["roundtrip_gt_isolated_sum","roundtrip_lt_component_floor"]}
+                  },
+                  "required": ["type","T"],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "components": {
+                "additionalProperties": false,
+                "properties": {
+                  "combine": {"$ref":"#/$defs/component"},
+                  "dispatch": {"$ref":"#/$defs/component"},
+                  "isolated_sum": {"$ref":"#/$defs/component"},
+                  "roundtrip": {"$ref":"#/$defs/component"}
+                },
+                "required": ["combine","dispatch","isolated_sum","roundtrip"],
+                "type": "object"
+              },
+              "correctness": {
+                "additionalProperties": false,
+                "properties": {
+                  "contract": {"const":"expert-specific-transform-v1"},
+                  "max_relative_error": {"minimum":0,"type":"number"},
+                  "passed": {"type":"boolean"},
+                  "rank_evidence": {
+                    "items": {
+                      "additionalProperties": false,
+                      "properties": {
+                        "input_unchanged": {"type":"boolean"},
+                        "order_stable": {"type":"boolean"},
+                        "post_timing": {"$ref":"#/$defs/oracle"},
+                        "pre_timing": {"$ref":"#/$defs/oracle"},
+                        "rank": {"minimum":0,"type":"integer"}
+                      },
+                      "required": ["input_unchanged","order_stable","post_timing","pre_timing","rank"],
+                      "type": "object"
+                    },
+                    "minItems": 1,
+                    "type": "array"
+                  },
+                  "scope": {"const":"dispatch-metadata-and-transformed-combine"}
+                },
+                "required": ["contract","max_relative_error","passed","rank_evidence","scope"],
+                "type": "object"
+              },
+              "evidence_id": {"pattern":"^cxevidence-v1-[0-9a-f]{64}$","type":"string"},
+              "global_tokens": {"minimum":1,"type":"integer"},
+              "logical_bytes": {
+                "additionalProperties": false,
+                "properties": {
+                  "combine": {"minimum":1,"type":"integer"},
+                  "dispatch": {"minimum":1,"type":"integer"},
+                  "roundtrip": {"minimum":1,"type":"integer"}
+                },
+                "required": ["combine","dispatch","roundtrip"],
+                "type": "object"
+              },
+              "point_id": {"pattern":"^cxpoint-v1-[0-9a-f]{64}$","type":"string"},
+              "receive": {
+                "additionalProperties": false,
+                "properties": {
+                  "max": {"minimum":0,"type":"integer"},
+                  "mean": {"minimum":0,"type":"number"},
+                  "min": {"minimum":0,"type":"integer"},
+                  "total": {"minimum":0,"type":"integer"}
+                },
+                "required": ["max","mean","min","total"],
+                "type": "object"
+              },
+              "routing": {
+                "additionalProperties": false,
+                "properties": {
+                  "empty_expert_count": {"minimum":0,"type":"integer"},
+                  "empty_rank_count": {"minimum":0,"type":"integer"},
+                  "expert_assignment_rank_cv": {"minimum":0,"type":"number"},
+                  "expert_assignments_per_rank": {"items":{"minimum":0,"type":"integer"},"type":"array"},
+                  "expert_load_cv": {"minimum":0,"type":"number"},
+                  "expert_load_max": {"minimum":0,"type":"integer"},
+                  "expert_load_mean": {"minimum":0,"type":"number"},
+                  "expert_load_min": {"minimum":0,"type":"integer"},
+                  "fanout_histogram": {"items":{"minimum":0,"type":"integer"},"type":"array"},
+                  "fanout_max": {"minimum":1,"type":"integer"},
+                  "fanout_mean": {"minimum":0,"type":"number"},
+                  "fanout_min": {"minimum":1,"type":"integer"},
+                  "hash": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+                  "hotspot_ratio": {"minimum":0,"type":"number"},
+                  "locality": {
+                    "oneOf": [
+                      {"type":"null"},
+                      {
+                        "additionalProperties": false,
+                        "properties": {
+                          "copies": {"minimum":0,"type":"integer"},
+                          "cross_domain_fraction": {"minimum":0,"type":"number"},
+                          "cross_node_fraction": {"minimum":0,"type":"number"},
+                          "gpus_per_node": {"minimum":1,"type":"integer"},
+                          "local_rank_fraction": {"minimum":0,"type":"number"},
+                          "placement": {"const":"packed"},
+                          "same_node_fraction": {"minimum":0,"type":"number"},
+                          "same_scaleup_domain_fraction": {"minimum":0,"type":"number"},
+                          "scale_up_domain": {"minimum":1,"type":"integer"}
+                        },
+                        "required": [
+                          "placement",
+                          "local_rank_fraction",
+                          "same_node_fraction",
+                          "same_scaleup_domain_fraction",
+                          "cross_node_fraction",
+                          "cross_domain_fraction",
+                          "gpus_per_node",
+                          "scale_up_domain",
+                          "copies"
+                        ],
+                        "type": "object"
+                      }
+                    ]
+                  },
+                  "payload_copies_per_rank": {"items":{"minimum":0,"type":"integer"},"type":"array"},
+                  "payload_rank_cv": {"minimum":0,"type":"number"},
+                  "routed_copies": {"minimum":1,"type":"integer"},
+                  "source_token_stats": {
+                    "oneOf": [
+                      {"type":"null"},
+                      {
+                        "additionalProperties": false,
+                        "properties": {
+                          "cv": {"minimum":0,"type":"number"},
+                          "empty_ranks": {"minimum":0,"type":"integer"},
+                          "max": {"minimum":0,"type":"integer"},
+                          "mean": {"minimum":0,"type":"number"},
+                          "min": {"minimum":0,"type":"integer"},
+                          "ranks": {"minimum":1,"type":"integer"},
+                          "total": {"minimum":0,"type":"integer"}
+                        },
+                        "required": ["min","mean","max","cv","empty_ranks","total","ranks"],
+                        "type": "object"
+                      }
+                    ]
+                  }
+                },
+                "required": [
+                  "empty_expert_count",
+                  "empty_rank_count",
+                  "expert_assignment_rank_cv",
+                  "expert_assignments_per_rank",
+                  "expert_load_cv",
+                  "expert_load_max",
+                  "expert_load_mean",
+                  "expert_load_min",
+                  "fanout_histogram",
+                  "fanout_max",
+                  "fanout_mean",
+                  "fanout_min",
+                  "hash",
+                  "hotspot_ratio",
+                  "locality",
+                  "payload_copies_per_rank",
+                  "payload_rank_cv",
+                  "routed_copies",
+                  "source_token_stats"
+                ],
+                "type": "object"
+              },
+              "sample_histograms": {
+                "additionalProperties": false,
+                "properties": {
+                  "combine": {"oneOf":[{"type":"null"},{"$ref":"#/$defs/histogram"}]},
+                  "dispatch": {"oneOf":[{"type":"null"},{"$ref":"#/$defs/histogram"}]},
+                  "roundtrip": {"$ref":"#/$defs/histogram"}
+                },
+                "required": ["dispatch","combine","roundtrip"],
+                "type": "object"
+              },
+              "sample_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+              "token_rate_at_latency_percentile": {"$ref":"#/$defs/percentiles"},
+              "tokens_per_rank": {"minimum":1,"type":"integer"}
+            },
+            "required": [
+              "anomalies",
+              "components",
+              "correctness",
+              "evidence_id",
+              "global_tokens",
+              "logical_bytes",
+              "point_id",
+              "receive",
+              "routing",
+              "sample_histograms",
+              "sample_sha256",
+              "token_rate_at_latency_percentile",
+              "tokens_per_rank"
+            ],
+            "type": "object"
+          },
+          "minItems": 1,
+          "type": "array"
+        },
+        "sampling": {
+          "additionalProperties": false,
+          "properties": {
+            "contract": {"const":"fixed-512-v1"},
+            "iterations_per_trial": {"const":8},
+            "percentile_method": {"const":"nearest-rank"},
+            "reduction": {"const":"cross-rank-max-per-iteration"},
+            "samples_per_component": {"const":512},
+            "trials": {"const":64},
+            "warmup_iterations": {"const":32},
+            "warmup_semantics": {"const":"full-roundtrip-before-each-component-trial-point-v1"}
+          },
+          "required": [
+            "contract",
+            "iterations_per_trial",
+            "percentile_method",
+            "reduction",
+            "samples_per_component",
+            "trials",
+            "warmup_iterations",
+            "warmup_semantics"
+          ],
+          "type": "object"
+        },
+        "source_allocation": {"const":"even"}
+      },
+      "required": [
+        "component_order_contract",
+        "conditioning",
+        "contract",
+        "rows",
+        "sampling",
+        "source_allocation"
+      ],
+      "type": "object"
+    },
+    "outcome": {
+      "additionalProperties": false,
+      "properties": {
+        "publication_status": {"enum":["diagnostic","invalid"]},
+        "reasons": {"items":{"type":"string"},"type":"array"},
+        "status": {"enum":["success","invalid"]},
+        "validity": {
+          "additionalProperties": false,
+          "properties": {
+            "anomaly_free": {"type":"boolean"},
+            "execution_status": {"enum":["complete","failed"]},
+            "measurement_conformance": {"enum":["conformant","nonconformant"]},
+            "provenance_complete": {"type":"boolean"},
+            "resource_conformance": {"minLength":1,"type":"string"},
+            "sampling_conformance": {"enum":["conformant","nonconformant"]},
+            "semantic_correctness": {"enum":["pass","fail"]},
+            "workload_identity": {"enum":["consistent-across-ranks","inconsistent"]},
+            "workload_source": {"enum":["canonical-serialized","seeded-runtime"]}
+          },
+          "required": [
+            "execution_status",
+            "semantic_correctness",
+            "workload_identity",
+            "workload_source",
+            "measurement_conformance",
+            "sampling_conformance",
+            "resource_conformance",
+            "provenance_complete",
+            "anomaly_free"
+          ],
+          "type": "object"
+        }
+      },
+      "required": ["publication_status","reasons","status","validity"],
+      "type": "object"
+    },
+    "provenance": {
+      "additionalProperties": false,
+      "properties": {
+        "command": {"minLength":1,"type":"string"},
+        "distributed_launcher": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+        "git_run": {"oneOf":[{"type":"null"},{"$ref":"#/$defs/git_run"}]},
+        "image": {
+          "additionalProperties": false,
+          "properties": {
+            "arch": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+            "digest": {
+              "oneOf": [{"type":"null"},{"pattern":"^sha256:[0-9a-f]{64}$","type":"string"}]
+            },
+            "digest_verified": {"type":"boolean"},
+            "reference": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "squash_sha256": {"oneOf":[{"type":"null"},{"pattern":"^[0-9a-f]{64}$","type":"string"}]}
+          },
+          "required": ["arch","digest","digest_verified","reference","squash_sha256"],
+          "type": "object"
+        },
+        "redaction": {"const":"sanitized-v1"}
+      },
+      "required": ["command","distributed_launcher","git_run","image","redaction"],
+      "type": "object"
+    },
+    "record_type": {"const":"case-attempt"},
+    "runtime_fingerprint": {
+      "additionalProperties": false,
+      "properties": {
+        "accelerator_runtime": {
+          "additionalProperties": false,
+          "properties": {
+            "kind": {"enum":["cuda","hip"]},
+            "version": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]}
+          },
+          "required": ["kind","version"],
+          "type": "object"
+        },
+        "collective_library": {
+          "additionalProperties": false,
+          "properties": {
+            "kind": {"enum":["nccl","rccl"]},
+            "version": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]}
+          },
+          "required": ["kind","version"],
+          "type": "object"
+        },
+        "device": {
+          "additionalProperties": false,
+          "properties": {
+            "arch": {"minLength":1,"type":"string"},
+            "compute_units": {"minimum":1,"type":"integer"},
+            "memory_bytes": {"minimum":1,"type":"integer"},
+            "product": {"minLength":1,"type":"string"},
+            "warp_size": {"minimum":1,"type":"integer"}
+          },
+          "required": ["arch","compute_units","memory_bytes","product","warp_size"],
+          "type": "object"
+        },
+        "driver_version": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+        "framework": {
+          "additionalProperties": false,
+          "properties": {"kind":{"const":"torch"},"version":{"minLength":1,"type":"string"}},
+          "required": ["kind","version"],
+          "type": "object"
+        },
+        "machine": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "python_version": {"minLength":1,"type":"string"},
+        "vendor": {"enum":["nvidia","amd"]}
+      },
+      "required": [
+        "accelerator_runtime",
+        "collective_library",
+        "device",
+        "driver_version",
+        "framework",
+        "machine",
+        "python_version",
+        "vendor"
+      ],
+      "type": "object"
+    },
+    "sample_artifact": {
+      "additionalProperties": false,
+      "properties": {
+        "bytes": {"minimum":1,"type":"integer"},
+        "format": {"const":"collectivex.samples.v1"},
+        "path": {"pattern":"^[A-Za-z0-9_.-]+$","type":"string"},
+        "sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"}
+      },
+      "required": ["bytes","format","path","sha256"],
+      "type": "object"
+    },
+    "schema_version": {"const":1},
+    "topology": {
+      "additionalProperties": false,
+      "properties": {
+        "device_count": {"minimum":1,"type":"integer"},
+        "device_product": {"minLength":1,"type":"string"},
+        "gpus_per_node": {"minimum":1,"type":"integer"},
+        "nodes": {"minimum":1,"type":"integer"},
+        "placement": {"const":"packed"},
+        "realized_placement": {
+          "additionalProperties": false,
+          "properties": {
+            "gpus_per_node": {"minimum":1,"type":"integer"},
+            "nodes": {"minimum":1,"type":"integer"},
+            "ranks_per_node": {"minimum":1,"type":"integer"},
+            "unique_local_ranks": {"const":true},
+            "valid": {"const":true}
+          },
+          "required": ["gpus_per_node","nodes","ranks_per_node","unique_local_ranks","valid"],
+          "type": "object"
+        },
+        "scale_up_domain": {"minimum":1,"type":"integer"},
+        "topology_class": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "transport": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "world_size": {"minimum":1,"type":"integer"}
+      },
+      "required": [
+        "device_count",
+        "device_product",
+        "gpus_per_node",
+        "nodes",
+        "placement",
+        "realized_placement",
+        "scale_up_domain",
+        "topology_class",
+        "transport",
+        "world_size"
+      ],
+      "type": "object"
+    },
+    "workload": {
+      "additionalProperties": false,
+      "properties": {
+        "activation_generator": {"const":"collectivex-activation-counter-v3"},
+        "activation_identity": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+        "activation_profile": {"const":"canonical-counter-source-v3"},
+        "cross_rank_consistent": {"const":true},
+        "manifest_checksums": {
+          "oneOf": [
+            {"type":"null"},
+            {
+              "additionalProperties": {
+                "additionalProperties": false,
+                "properties": {
+                  "topk_idx": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+                  "topk_weights": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+                  "trace": {"pattern":"^[0-9a-f]{64}$","type":"string"}
+                },
+                "required": ["topk_idx", "topk_weights", "trace"],
+                "type": "object"
+              },
+              "type": "object"
+            }
+          ]
+        },
+        "members": {
+          "oneOf": [
+            {"type":"null"},
+            {
+              "items": {"pattern":"^cxwork-v1-[0-9a-f]{64}$","type":"string"},
+              "minItems": 1,
+              "uniqueItems": true,
+              "type": "array"
+            }
+          ]
+        },
+        "routing_generator": {"const":"collectivex-routing-counter-v3"},
+        "source": {"enum":["canonical-serialized","seeded-runtime"]},
+        "trace_hashes": {
+          "items": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+          "minItems": 1,
+          "type": "array"
+        },
+        "trace_signature": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+        "workload_id": {
+          "oneOf": [{"type":"null"},{"pattern":"^cxwork-v1-[0-9a-f]{64}$","type":"string"}]
+        }
+      },
+      "required": [
+        "activation_generator",
+        "activation_identity",
+        "activation_profile",
+        "cross_rank_consistent",
+        "manifest_checksums",
+        "members",
+        "routing_generator",
+        "source",
+        "trace_hashes",
+        "trace_signature",
+        "workload_id"
+      ],
+      "type": "object"
+    }
+  },
+  "required": [
+    "case",
+    "format",
+    "generated_at",
+    "identity",
+    "implementation",
+    "measurement",
+    "outcome",
+    "provenance",
+    "record_type",
+    "runtime_fingerprint",
+    "sample_artifact",
+    "schema_version",
+    "topology",
+    "workload"
+  ],
+  "title": "CollectiveX raw case attempt v1",
+  "type": "object"
+}
diff --git a/experimental/CollectiveX/schemas/samples-v1.schema.json b/experimental/CollectiveX/schemas/samples-v1.schema.json
new file mode 100644
index 0000000000..b9a1df0541
--- /dev/null
+++ b/experimental/CollectiveX/schemas/samples-v1.schema.json
@@ -0,0 +1,80 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://inferencex.com/schemas/collectivex/samples-v1.schema.json",
+  "title": "CollectiveX exact private samples v1",
+  "type": "object",
+  "additionalProperties": false,
+  "required": ["allocation_id","attempt_id","case_id","format","points","sampling","schema_version","series_id"],
+  "properties": {
+      "allocation_id": {"$ref": "#/$defs/allocationId"},
+      "attempt_id": {"$ref": "#/$defs/attemptId"},
+      "case_id": {"$ref": "#/$defs/caseId"},
+      "format": {"const": "collectivex.samples.v1"},
+      "points": {
+            "type": "array",
+            "minItems": 1,
+            "items": {
+                    "type": "object",
+                    "additionalProperties": false,
+                    "required": ["components","evidence_id","point_id","sample_sha256","tokens_per_rank"],
+                    "properties": {
+                              "components": {
+                                          "type": "object",
+                                          "additionalProperties": false,
+                                          "required": ["combine","dispatch","roundtrip"],
+                                          "properties": {
+                                                        "combine": {"$ref": "#/$defs/component"},
+                                                        "dispatch": {"$ref": "#/$defs/component"},
+                                                        "roundtrip": {"$ref": "#/$defs/component"}
+                                                      }
+                                        },
+                              "evidence_id": {"$ref": "#/$defs/evidenceId"},
+                              "point_id": {"$ref": "#/$defs/pointId"},
+                              "sample_sha256": {"$ref": "#/$defs/sha256"},
+                              "tokens_per_rank": {"type": "integer","minimum": 1}
+                            }
+                  }
+          },
+      "sampling": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["iterations_per_trial","reduction","trials"],
+            "properties": {
+                    "iterations_per_trial": {"const": 8},
+                    "reduction": {"const": "cross-rank-max-per-iteration"},
+                    "trials": {"const": 64}
+                  }
+          },
+      "schema_version": {"const": 1},
+      "series_id": {"$ref": "#/$defs/seriesId"}
+    },
+  "$defs": {
+      "sha256": {"type": "string","pattern": "^[0-9a-f]{64}$"},
+      "caseId": {"type": "string","pattern": "^cxcase-v1-[0-9a-f]{64}$"},
+      "seriesId": {"type": "string","pattern": "^cxseries-v1-[0-9a-f]{64}$"},
+      "pointId": {"type": "string","pattern": "^cxpoint-v1-[0-9a-f]{64}$"},
+      "evidenceId": {"type": "string","pattern": "^cxevidence-v1-[0-9a-f]{64}$"},
+      "allocationId": {"type": "string","pattern": "^cxallocation-v1-[0-9a-f]{64}$"},
+      "attemptId": {"type": "string","pattern": "^cxattempt-v1-[0-9a-f]{64}$"},
+      "component": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["availability","sample_count","trials"],
+            "properties": {
+                    "availability": {"enum": ["measured","unavailable"]},
+                    "sample_count": {"type": "integer","minimum": 0,"maximum": 512},
+                    "trials": {
+                              "oneOf": [
+                                          {"type": "null"},
+                                          {
+                                                        "type": "array",
+                                                        "minItems": 64,
+                                                        "maxItems": 64,
+                                                        "items": {"type": "array","minItems": 8,"maxItems": 8,"items": {"type": "number","minimum": 0}}
+                                                      }
+                                        ]
+                            }
+                  }
+          }
+    }
+}
diff --git a/experimental/CollectiveX/schemas/terminal-outcome-v1.schema.json b/experimental/CollectiveX/schemas/terminal-outcome-v1.schema.json
new file mode 100644
index 0000000000..9c28613064
--- /dev/null
+++ b/experimental/CollectiveX/schemas/terminal-outcome-v1.schema.json
@@ -0,0 +1,246 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://inferencex.com/schemas/collectivex/terminal-outcome-v1.schema.json",
+  "title": "CollectiveX terminal outcome v1",
+  "type": "object",
+  "additionalProperties": false,
+  "required": ["case","format","generated_at","identity","outcome","provenance","record_type","schema_version"],
+  "properties": {
+      "case": {"$ref": "#/$defs/case"},
+      "format": {"const": "collectivex.terminal.v1"},
+      "generated_at": {"type": "string","format": "date-time"},
+      "identity": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["allocation_factors","allocation_id","attempt_id","attempt_ordinal","case_factors","case_id"],
+            "properties": {
+                    "allocation_factors": {"$ref": "#/$defs/allocationFactors"},
+                    "allocation_id": {"$ref": "#/$defs/allocationId"},
+                    "attempt_id": {"$ref": "#/$defs/attemptId"},
+                    "attempt_ordinal": {"type": "integer","minimum": 1},
+                    "case_factors": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": ["case","profile","sku"],
+                              "properties": {
+                                          "case": {"$ref": "#/$defs/case"},
+                                          "profile": {
+                                                        "const": {
+                                                                        "activation_generator": "collectivex-activation-counter-v3",
+                                                                        "activation_profile": "canonical-counter-source-v3",
+                                                                        "combine_dtype": "bf16",
+                                                                        "combine_quant_mode": "none",
+                                                                        "component_order_contract": "roundtrip-dispatch-activation-only-combine-v2",
+                                                                        "conditioning_contract": "fixed-phase-ramp-8-roundtrips-v1",
+                                                                        "contract": "layout-and-dispatch-v1",
+                                                                        "dtype": "bf16",
+                                                                        "eplb_planner": "greedy-rank-major-v1",
+                                                                        "eplb_redundant_experts": 32,
+                                                                        "eplb_reference_tokens_per_rank": 2048,
+                                                                        "mode": "normal",
+                                                                        "oracle_contract": "expert-specific-transform-v1",
+                                                                        "oracle_tolerances": "rtol=0.05,atol=0.02",
+                                                                        "placement": "packed",
+                                                                        "percentile_method": "nearest-rank",
+                                                                        "rank_reduction": "cross-rank-max-per-iteration",
+                                                                        "resource_mode": "tuned",
+                                                                        "routing_generator": "collectivex-routing-counter-v3",
+                                                                        "sampling_contract": "fixed-512-v1",
+                                                                        "seed": 67
+                                                                      }
+                                                      },
+                                          "sku": {"$ref": "#/$defs/safeId"}
+                                        }
+                            },
+                    "case_id": {"$ref": "#/$defs/caseId"}
+                  }
+          },
+      "outcome": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["failure_mode","reason","return_code","status"],
+            "properties": {
+                    "failure_mode": {"$ref": "#/$defs/safeId"},
+                    "reason": {"type": "string","minLength": 1,"maxLength": 240},
+                    "return_code": {"type": "integer","minimum": 0},
+                    "status": {"enum": ["failed","invalid","unsupported"]}
+                  }
+          },
+      "provenance": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["git_run","control_sha256","redaction","source"],
+            "properties": {
+                    "git_run": {"$ref": "#/$defs/gitRun"},
+                    "control_sha256": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/sha256"}]},
+                    "redaction": {"const": "sanitized-v1"},
+                    "source": {
+                              "enum": [
+                                          "runtime-emitter",
+                                          "post-emit-command",
+                                          "matrix-capability-resolver"
+                                        ]
+                            }
+                  }
+          },
+      "record_type": {"const": "terminal-outcome"},
+      "schema_version": {"const": 1}
+    },
+  "allOf": [
+      {
+            "oneOf": [
+                    {
+                          "properties": {
+                                  "provenance": {
+                                          "properties": {"source": {"const": "runtime-emitter"}}
+                                        },
+                                  "outcome": {"$ref": "#/$defs/runtimeOutcome"}
+                                }
+                        },
+                    {
+                          "properties": {
+                                  "provenance": {
+                                          "properties": {"source": {"const": "post-emit-command"}}
+                                        },
+                                  "outcome": {"$ref": "#/$defs/postEmitOutcome"}
+                                }
+                        },
+                    {
+                          "properties": {
+                                  "provenance": {
+                                          "properties": {"source": {"const": "matrix-capability-resolver"}}
+                                        },
+                                  "outcome": {"$ref": "#/$defs/capabilityOutcome"}
+                                }
+                        }
+                  ]
+          }
+    ],
+  "$defs": {
+      "runtimeOutcome": {
+            "type": "object",
+            "properties": {"status": {"const": "failed"}},
+            "allOf": [
+                    {
+                          "oneOf": [
+                                  {"properties": {"failure_mode": {"const": "setup"}, "reason": {"const": "launcher-setup-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "repository-stage"}, "reason": {"const": "repository-staging-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "registry-verification"}, "reason": {"const": "container-registry-verification-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "scheduler-allocation"}, "reason": {"const": "scheduler-allocation-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "container-import"}, "reason": {"const": "container-image-preparation-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "container-hash"}, "reason": {"const": "container-image-identity-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "container-launch"}, "reason": {"const": "container-runtime-launch-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "backend-setup"}, "reason": {"const": "backend-setup-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "artifact-collection"}, "reason": {"const": "artifact-collection-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "runtime-identity"}, "reason": {"const": "runtime-identity-mismatch"}}},
+                                  {"properties": {"failure_mode": {"const": "timeout"}, "reason": {"const": "execution-timeout"}}},
+                                  {"properties": {"failure_mode": {"const": "deadlock"}, "reason": {"const": "execution-deadlock"}}},
+                                  {"properties": {"failure_mode": {"const": "execution"}, "reason": {"const": "distributed-command-failed"}}}
+                                ]
+                        }
+                  ]
+          },
+      "postEmitOutcome": {
+            "type": "object",
+            "properties": {
+                    "status": {"const": "failed"},
+                    "failure_mode": {"enum": ["runtime-identity", "timeout", "deadlock", "execution"]},
+                    "reason": {"const": "post-emit-distributed-command-failed"}
+                  }
+          },
+      "capabilityOutcome": {
+            "type": "object",
+            "properties": {
+                    "status": {"const": "unsupported"},
+                    "failure_mode": {"const": "capability"},
+                    "reason": {
+                              "enum": [
+                                          "backend-platform-unsupported",
+                                          "backend-token-capacity"
+                                        ]
+                            }
+                  }
+          },
+      "sha256": {"type": "string","pattern": "^[0-9a-f]{64}$"},
+      "safeId": {"type": "string","pattern": "^[a-z0-9][a-z0-9_.-]*$","maxLength": 128},
+      "caseId": {"type": "string","pattern": "^cxcase-v1-[0-9a-f]{64}$"},
+      "allocationId": {"type": "string","pattern": "^cxallocation-v1-[0-9a-f]{64}$"},
+      "attemptId": {"type": "string","pattern": "^cxattempt-v1-[0-9a-f]{64}$"},
+      "nullableText": {"oneOf": [{"type": "null"},{"type": "string","minLength": 1}]},
+      "allocationFactors": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["artifact","execution_id","job","repo","run_attempt","run_id","runner","source_sha"],
+            "properties": {
+                    "artifact": {"$ref": "#/$defs/nullableText"},
+                    "execution_id": {"$ref": "#/$defs/nullableText"},
+                    "job": {"$ref": "#/$defs/nullableText"},
+                    "repo": {"$ref": "#/$defs/nullableText"},
+                    "run_attempt": {"$ref": "#/$defs/nullableText"},
+                    "run_id": {"$ref": "#/$defs/nullableText"},
+                    "runner": {"$ref": "#/$defs/nullableText"},
+                    "source_sha": {"$ref": "#/$defs/nullableText"}
+                  }
+          },
+      "gitRun": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["artifact","job","ref","repo","run_attempt","run_id","source_sha"],
+            "properties": {
+                    "artifact": {"type": "string","minLength": 1},
+                    "job": {"type": "string","minLength": 1},
+                    "ref": {"type": "string","minLength": 1},
+                    "repo": {"type": "string","pattern": "^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$"},
+                    "run_attempt": {"type": "string","pattern": "^[1-9][0-9]*$"},
+                    "run_id": {"type": "string","pattern": "^[1-9][0-9]*$"},
+                    "source_sha": {"type": "string","pattern": "^[0-9a-f]{40}$"}
+                  }
+          },
+      "case": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "backend",
+                    "canonical",
+                    "eplb",
+                    "ep",
+                    "experts",
+                    "gpus_per_node",
+                    "hidden",
+                    "ladder",
+                    "nodes",
+                    "phase",
+                    "required_publication",
+                    "routing",
+                    "samples_per_point",
+                    "scale_up_domain",
+                    "suite",
+                    "timing",
+                    "topk",
+                    "warmup_semantics",
+                    "workload"
+                  ],
+            "properties": {
+                    "backend": {"$ref": "#/$defs/safeId"},
+                    "canonical": {"const": true},
+                    "eplb": {"type": "boolean"},
+                    "ep": {"type": "integer","minimum": 1},
+                    "experts": {"type": "integer","minimum": 1},
+                    "gpus_per_node": {"type": "integer","minimum": 1},
+                    "hidden": {"type": "integer","minimum": 1},
+                    "ladder": {"type": "string","pattern": "^[1-9][0-9]*( [1-9][0-9]*)*$"},
+                    "nodes": {"type": "integer","minimum": 1},
+                    "phase": {"enum": ["decode","prefill"]},
+                    "required_publication": {"enum": ["official","comparable-experimental"]},
+                    "routing": {"enum": ["uniform","zipf"]},
+                    "samples_per_point": {"const": 512},
+                    "scale_up_domain": {"type": "integer","minimum": 1},
+                    "suite": {"$ref": "#/$defs/safeId"},
+                    "timing": {"const": "8:64:32"},
+                    "topk": {"type": "integer","minimum": 1},
+                    "warmup_semantics": {"const": "full-roundtrip-before-each-component-trial-point-v1"},
+                    "workload": {"$ref": "#/$defs/safeId"}
+                  }
+          }
+    }
+}
diff --git a/experimental/CollectiveX/summarize.py b/experimental/CollectiveX/summarize.py
new file mode 100644
index 0000000000..3752db6b9d
--- /dev/null
+++ b/experimental/CollectiveX/summarize.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+"""Render a small native-v1 shard summary and gate on a successful case."""
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+import contracts
+
+
+def load_results(directory: str, runner: str | None, timestamp: str | None) -> list[dict]:
+    documents: list[dict] = []
+    for path in sorted(Path(directory).glob("*.json")):
+        if runner and not path.name.startswith(f"{runner}_"):
+            continue
+        if timestamp and timestamp not in path.name:
+            continue
+        try:
+            document = contracts.strict_load(path)
+            if document.get("format") == contracts.RAW_FORMAT:
+                documents.append(contracts.load_raw_attempt(path))
+            elif document.get("format") == contracts.TERMINAL_FORMAT:
+                documents.append(contracts.validate_terminal_document(document))
+        except (contracts.ContractError, OSError):
+            continue
+    return documents
+
+
+def _identity(document: dict) -> tuple[str, str, str, str, bool, str, int]:
+    case = document["case"]
+    if document["format"] == contracts.RAW_FORMAT:
+        routing = case["shape"]["routing"]
+        eplb = case["eplb"]["enabled"]
+    else:
+        routing = case["routing"]
+        eplb = case["eplb"]
+    sku = document["identity"]["case_factors"]["sku"]
+    return (
+        sku, case["suite"], routing, case["phase"], eplb,
+        case["required_publication"], case.get("ep_size", case.get("ep", 0)),
+    )
+
+
+def _headline(document: dict) -> tuple[int | str, float | str, float | str]:
+    if document["format"] != contracts.RAW_FORMAT:
+        return "-", "-", "-"
+    rows = document["measurement"]["rows"]
+    row = next((item for item in rows if item["tokens_per_rank"] == 64), rows[len(rows) // 2])
+    latency = row["components"]["roundtrip"]["percentiles_us"]
+    return row["tokens_per_rank"], latency["p50"], latency["p99"]
+
+
+def render(documents: list[dict], markdown: bool) -> str:
+    documents = sorted(documents, key=_identity)
+    if markdown:
+        lines = [
+            "## CollectiveX EP results", "",
+            "| sku | backend | suite | phase | routing | tier | ep | outcome | T* | p50 us | p99 us |",
+            "|---|---|---|---|---|---|--:|---|--:|--:|--:|",
+        ]
+        for document in documents:
+            sku, suite, routing, phase, eplb, tier, ep = _identity(document)
+            backend = document["case"]["backend"]
+            token, p50, p99 = _headline(document)
+            lines.append(
+                f"| {sku} | `{backend}` | {suite} | {phase} | "
+                f"{routing}{'+eplb' if eplb else ''} | {tier} | {ep} | "
+                f"{document['outcome']['status']} | {token} | {p50} | {p99} |"
+            )
+        if not documents:
+            lines.append("\n> No valid native v1 outcome documents found.")
+        return "\n".join(lines)
+    lines = ["CollectiveX EP results", "======================"]
+    for document in documents:
+        sku, suite, routing, phase, eplb, tier, ep = _identity(document)
+        backend = document["case"]["backend"]
+        token, _, p99 = _headline(document)
+        lines.append(
+            f"  {sku:<10} {backend:<16} {suite:<13} {phase:<7} "
+            f"{routing}{'+eplb' if eplb else ''} {tier} ep{ep} "
+            f"{document['outcome']['status']} T={token} roundtrip_p99_us={p99}"
+        )
+    return "\n".join(lines)
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Summarize CollectiveX native v1 outcomes")
+    parser.add_argument("--results-dir", default="results")
+    parser.add_argument("--runner")
+    parser.add_argument("--ts")
+    parser.add_argument("--markdown", action="store_true")
+    args = parser.parse_args()
+    documents = load_results(args.results_dir, args.runner, args.ts)
+    print(render(documents, args.markdown))
+    if args.markdown:
+        return 0
+    return 0 if any(
+        document["format"] == contracts.RAW_FORMAT
+        and document["outcome"]["status"] == "success"
+        for document in documents
+    ) else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/experimental/CollectiveX/sweep_matrix.py b/experimental/CollectiveX/sweep_matrix.py
new file mode 100644
index 0000000000..17aa80c94a
--- /dev/null
+++ b/experimental/CollectiveX/sweep_matrix.py
@@ -0,0 +1,974 @@
+#!/usr/bin/env python3
+"""Resolve CollectiveX v1 suites and extract validated execution shards.
+
+The promoted v1 profile is intentionally narrow: normal-mode BF16,
+layout-and-dispatch-v1, tuned resources, and unquantized BF16 combine. Those
+constants are runtime defaults, not matrix axes. The matrix contains only the
+dimensions that vary between scheduled cases.
+"""
+from __future__ import annotations
+
+import argparse
+import datetime as dt
+import hashlib
+import itertools
+import json
+import os
+from pathlib import Path
+import re
+import sys
+from typing import Any
+
+HERE = Path(__file__).resolve().parent
+sys.path.insert(0, str(HERE))
+sys.path.insert(0, str(HERE / "tests"))
+
+try:  # Shard extraction on GPU runners is intentionally stdlib-only.
+    import yaml  # type: ignore
+except ModuleNotFoundError:  # pragma: no cover - exercised by the workflow environment
+    yaml = None
+
+import capability as cap  # noqa: E402
+import contracts  # noqa: E402
+import ep_harness  # noqa: E402
+import identity  # noqa: E402
+
+
+EP_TIMING_PROFILE = (
+    f"{ep_harness.TIMED_ITERS_PER_TRIAL}:{ep_harness.TRIALS_PER_POINT}:"
+    f"{ep_harness.WARMUP_ITERS_PER_TRIAL}"
+)
+V1_PROFILE = dict(identity.V1_CASE_PROFILE)
+V1_WORKLOAD = ("deepseek-v3-v1", 7168, 8, 256)
+V1_SUITE_CONTRACTS = {
+    "ep-core-v1": {
+        "publication": "official",
+        "coordinates": {("decode", "uniform", False), ("prefill", "uniform", False)},
+        "ladders": {
+            "decode": tuple(ep_harness.DECODE_LADDER),
+            "prefill": (256, 512),
+        },
+    },
+    "ep-routing-v1": {
+        "publication": "comparable-experimental",
+        "coordinates": {
+            ("decode", "zipf", False), ("decode", "zipf", True),
+            ("prefill", "zipf", False), ("prefill", "zipf", True),
+        },
+        "ladders": {"decode": (128,), "prefill": (512,)},
+    },
+}
+IDENTIFIER = re.compile(r"[a-z0-9][a-z0-9.-]*")
+SUITE_FIELDS = {
+    "ep_degrees", "eplb", "phases", "platforms", "required_publication", "routings", "token_points",
+    "token_points_decode", "token_points_prefill", "workloads",
+}
+SUITE_REQUIRED = {
+    "phases", "platforms", "required_publication", "routings", "workloads",
+}
+
+
+class MatrixError(ValueError):
+    """A matrix or shard-control document violates the execution contract."""
+
+
+if yaml is not None:
+    class _UniqueKeyLoader(yaml.SafeLoader):
+        pass
+
+    def _unique_mapping(loader: Any, node: Any, deep: bool = False) -> dict[Any, Any]:
+        result: dict[Any, Any] = {}
+        for key_node, value_node in node.value:
+            key = loader.construct_object(key_node, deep=deep)
+            if key in result:
+                raise SystemExit(f"duplicate YAML key {key!r} at line {key_node.start_mark.line + 1}")
+            result[key] = loader.construct_object(value_node, deep=deep)
+        return result
+
+    _UniqueKeyLoader.add_constructor(
+        yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, _unique_mapping
+    )
+
+
+def _load(name: str) -> dict[str, Any]:
+    if yaml is None:
+        raise SystemExit("matrix generation requires PyYAML; shard extraction does not")
+    try:
+        with (HERE / "configs" / name).open() as fh:
+            document = yaml.load(fh, Loader=_UniqueKeyLoader)
+    except yaml.YAMLError as exc:
+        raise SystemExit(f"configs/{name} is not valid YAML: {exc}") from exc
+    if not isinstance(document, dict):
+        raise SystemExit(f"configs/{name} must contain a YAML object")
+    return document
+
+
+def _workload_registry(workloads: dict[str, Any]) -> dict[str, dict[str, Any]]:
+    return {
+        name: cfg
+        for section in ("synthetic", "model_derived")
+        for name, cfg in (workloads.get(section) or {}).items()
+    }
+
+
+def _fields(value: Any, path: str, allowed: set[str], required: set[str]) -> dict[str, Any]:
+    if not isinstance(value, dict):
+        raise SystemExit(f"{path} must be an object")
+    if any(not isinstance(key, str) for key in value):
+        raise SystemExit(f"{path} field names must be strings")
+    unknown, missing = set(value) - allowed, required - set(value)
+    if unknown or missing:
+        raise SystemExit(f"{path} fields: unknown={sorted(unknown)}, missing={sorted(missing)}")
+    return value
+
+
+def _list(value: Any, path: str, item_type: type, allowed: set[Any] | None = None) -> list[Any]:
+    if (not isinstance(value, list) or not value
+            or any(type(item) is not item_type for item in value)
+            or len(value) != len(set(value))
+            or (allowed is not None and any(item not in allowed for item in value))):
+        raise SystemExit(f"{path} must be a non-empty unique list of valid {item_type.__name__}s")
+    return value
+
+
+def validate_config_documents(
+    suites_document: dict[str, Any], workloads: dict[str, Any]
+) -> None:
+    """Reject configuration that is ambiguous, unused, or outside the v1 grid."""
+    _fields(
+        suites_document, "configs/suites.yaml",
+        {"schema_version", "suites"}, {"schema_version", "suites"},
+    )
+    _fields(
+        workloads, "configs/workloads.yaml",
+        {"schema_version", "synthetic", "model_derived"}, {"schema_version"},
+    )
+    if type(suites_document["schema_version"]) is not int or suites_document["schema_version"] != 1:
+        raise SystemExit("configs/suites.yaml schema_version must be integer 1")
+    if type(workloads["schema_version"]) is not int or workloads["schema_version"] != 1:
+        raise SystemExit("configs/workloads.yaml schema_version must be integer 1")
+    registry: dict[str, dict[str, Any]] = {}
+    for section, expert_field in (
+        ("synthetic", "experts"),
+        ("model_derived", "routed_experts"),
+    ):
+        entries = workloads.get(section, {})
+        if not isinstance(entries, dict):
+            raise SystemExit(f"workloads.{section} must be an object")
+        for name, value in entries.items():
+            if not isinstance(name, str) or not IDENTIFIER.fullmatch(name) or name in registry:
+                raise SystemExit(f"workloads.{section} has invalid or duplicate name {name!r}")
+            fields = {"hidden", "topk", expert_field, "verified_against"}
+            config = _fields(value, f"workload {name}", fields, fields - {"verified_against"})
+            dimensions = [config[key] for key in ("hidden", "topk", expert_field)]
+            if any(type(item) is not int or item <= 0 for item in dimensions):
+                raise SystemExit(f"workload {name} dimensions must be positive integers")
+            if dimensions[1] > dimensions[2]:
+                raise SystemExit(f"workload {name}.topk exceeds its expert count")
+            source = config.get("verified_against")
+            if source is not None and (not isinstance(source, str) or not source.strip()):
+                raise SystemExit(f"workload {name}.verified_against must be a non-empty string")
+            registry[name] = config
+    if not registry:
+        raise SystemExit("configs/workloads.yaml must define at least one workload")
+
+    suites = suites_document["suites"]
+    if not isinstance(suites, dict) or not suites:
+        raise SystemExit("configs/suites.yaml suites must be a non-empty object")
+    referenced: set[str] = set()
+    for name, value in suites.items():
+        if not isinstance(name, str) or not IDENTIFIER.fullmatch(name):
+            raise SystemExit(f"invalid suite name {name!r}")
+        suite = _fields(value, f"suite {name}", SUITE_FIELDS, SUITE_REQUIRED)
+        suite_workloads = _list(suite["workloads"], f"suite {name}.workloads", str)
+        unknown = sorted(set(suite_workloads) - set(registry))
+        if unknown:
+            raise SystemExit(f"suite {name}: unknown workloads {unknown}")
+        referenced.update(suite_workloads)
+        platforms = _list(
+            suite["platforms"], f"suite {name}.platforms", str, set(cap.PLATFORMS)
+        )
+        phases = _list(suite["phases"], f"suite {name}.phases", str, {"decode", "prefill"})
+        routings = _list(suite["routings"], f"suite {name}.routings", str, {"uniform", "zipf"})
+        eplb = _list(suite.get("eplb", [False]), f"suite {name}.eplb", bool)
+        if True in eplb and routings != ["zipf"]:
+            raise SystemExit(f"suite {name}: EPLB is only valid for Zipf routing")
+        if suite["required_publication"] not in {"official", "comparable-experimental"}:
+            raise SystemExit(f"suite {name}.required_publication is invalid")
+        if suite["required_publication"] == "official":
+            unverified = [item for item in suite_workloads if not registry[item].get("verified_against")]
+            if unverified:
+                raise SystemExit(f"suite {name}: official workloads need verified_against: {unverified}")
+        if "ep_degrees" in suite:
+            degrees = _list(suite["ep_degrees"], f"suite {name}.ep_degrees", int)
+            if any(degree <= 0 for degree in degrees):
+                raise SystemExit(f"suite {name}.ep_degrees must be positive")
+            for platform in platforms:
+                if not set(degrees).issubset(cap.PLATFORMS[platform]["ep_degrees"]):
+                    raise SystemExit(f"suite {name}: invalid EP degree for {platform}")
+        for phase in {"decode", "prefill"} - set(phases):
+            if f"token_points_{phase}" in suite:
+                raise SystemExit(f"suite {name}.token_points_{phase} is unreachable")
+        if "token_points" in suite and all(
+            f"token_points_{phase}" in suite for phase in phases
+        ):
+            raise SystemExit(f"suite {name}.token_points is unreachable")
+        for phase in phases:
+            _ladder(suite, phase)
+    unused = sorted(set(registry) - referenced)
+    if unused:
+        raise SystemExit(f"unreferenced workloads: {unused}")
+
+
+def _dims(workloads: dict[str, Any], name: str) -> tuple[int, int, int]:
+    config = _workload_registry(workloads)[name]
+    values = (
+        config.get("hidden"),
+        config.get("topk"),
+        config.get("experts", config.get("routed_experts")),
+    )
+    return values  # type: ignore[return-value]
+
+
+def _ladder(suite: dict[str, Any], phase: str) -> str:
+    points = suite.get(f"token_points_{phase}", suite.get("token_points"))
+    if points is None:
+        points = ep_harness.DECODE_LADDER if phase == "decode" else ep_harness.PREFILL_LADDER
+    if (not isinstance(points, list) or not points
+            or any(isinstance(point, bool) or not isinstance(point, int) or point <= 0
+                   for point in points)
+            or points != sorted(set(points))):
+        raise SystemExit(f"invalid {phase} token ladder: {points!r}")
+    return " ".join(map(str, points))
+
+
+def _v1_requested_ladder(case: dict[str, Any]) -> str:
+    """Bind extracted controls to the frozen v1 suite and workload catalog."""
+    suite = V1_SUITE_CONTRACTS.get(case.get("suite"))
+    coordinate = (case.get("phase"), case.get("routing"), case.get("eplb"))
+    if (
+        suite is None
+        or coordinate not in suite["coordinates"]
+        or case.get("required_publication") != suite["publication"]
+        or (
+            case.get("workload"), case.get("hidden"), case.get("topk"), case.get("experts")
+        ) != V1_WORKLOAD
+    ):
+        raise MatrixError("case differs from the frozen v1 suite/workload catalog")
+    return " ".join(map(str, suite["ladders"][case["phase"]]))
+
+
+def _expected_disposition(
+    sku: str, case: dict[str, Any]
+) -> tuple[str, str | None, str | None]:
+    requested_ladder = _v1_requested_ladder(case)
+    ok, detail = cap.resolve(
+        sku, case["backend"], nodes=case["nodes"],
+        routing=case["routing"], eplb=case["eplb"],
+    )
+    if ok:
+        if case["ladder"] != requested_ladder:
+            raise MatrixError("case ladder differs from the frozen v1 suite catalog")
+        return "runnable", None, None
+    if case["ladder"] != requested_ladder:
+        raise MatrixError("unsupported case ladder differs from the frozen v1 suite catalog")
+    return "unsupported", "backend-platform-unsupported", detail
+
+
+def _case_id(sku: str, case: dict[str, Any]) -> str:
+    return identity.case_id(sku=sku, profile=V1_PROFILE, case=case)
+
+
+def _semantic_points(sku: str, case: dict[str, Any]) -> list[str]:
+    execution = {
+        key: value for key, value in case.items()
+        if key not in {"canonical", "case_id", "ladder", "required_publication", "suite", "workload"}
+    }
+    return [
+        json.dumps(
+            {"sku": sku, "tokens_per_rank": int(point), **execution},
+            sort_keys=True,
+            separators=(",", ":"),
+        )
+        for point in case["ladder"].split()
+    ]
+
+
+def _select_backends(backend: str, backends: str) -> list[str]:
+    available = list(cap.SWEEP_BACKENDS)
+    if backend and backends:
+        raise SystemExit("--backend and --backends are mutually exclusive")
+    if backends:
+        names = available if backends == "all" else [
+            value.strip() for value in backends.split(",") if value.strip()
+        ]
+    else:
+        names = [backend or "deepep"]
+    unknown = sorted(set(names) - set(available))
+    if unknown:
+        raise SystemExit(f"unknown backend values {unknown}; have {available}")
+    if len(names) != len(set(names)):
+        raise SystemExit("backend selection contains duplicates")
+    return names
+
+
+def resolve_matrix(
+    suites: str = "all",
+    backend: str = "",
+    backends: str = "",
+    only_sku: str = "",
+    min_nodes: int = 0,
+    max_nodes: int = 0,
+    max_cases: int = 128,
+) -> dict[str, Any]:
+    """Resolve suite configuration into allocation-sized workflow shards."""
+    if max_cases <= 0:
+        raise SystemExit("--max-cases must be positive")
+    if min_nodes < 0 or max_nodes < 0 or (min_nodes and max_nodes and min_nodes > max_nodes):
+        raise SystemExit("invalid node bounds")
+    if only_sku and only_sku not in cap.PLATFORMS:
+        raise SystemExit(f"unknown --only-sku {only_sku!r}; have {sorted(cap.PLATFORMS)}")
+
+    workloads = _load("workloads.yaml")
+    suites_document = _load("suites.yaml")
+    validate_config_documents(suites_document, workloads)
+    registry = suites_document["suites"]
+    names = list(registry) if suites == "all" else [
+        value.strip() for value in suites.split(",") if value.strip()
+    ]
+    if not names or len(names) != len(set(names)):
+        raise SystemExit("suite selection must be non-empty and unique")
+    unknown = sorted(set(names) - set(registry))
+    if unknown:
+        raise SystemExit(f"unknown suites {unknown}; have {sorted(registry)}")
+    targets = _select_backends(backend, backends)
+
+    shards: dict[tuple[str, str, int], list[dict[str, Any]]] = {}
+    requested_cases: list[dict[str, Any]] = []
+    scheduled: set[str] = set()
+    for suite_name in names:
+        suite = registry[suite_name]
+        phases = suite["phases"]
+        routings = suite["routings"]
+        eplb_values = suite.get("eplb", [False])
+        for platform_name in suite["platforms"]:
+            platform = cap.PLATFORMS[platform_name]
+            if only_sku and platform_name != only_sku:
+                continue
+            gpus_per_node = int(platform["gpus_per_node"])
+            scale_up_domain = int(platform["scale_up_domain"])
+            ep_degrees = suite.get("ep_degrees") or platform["ep_degrees"]
+            for workload, ep, phase, routing, eplb, target in itertools.product(
+                suite["workloads"], ep_degrees, phases, routings, eplb_values, targets
+            ):
+                if ep not in platform["ep_degrees"]:
+                    raise SystemExit(
+                        f"suite {suite_name}: {platform_name} EP{ep} is not registered"
+                    )
+                nodes_int = (ep + gpus_per_node - 1) // gpus_per_node
+                if min_nodes and nodes_int < min_nodes:
+                    continue
+                if max_nodes and nodes_int > max_nodes:
+                    continue
+                ok, capability_detail = cap.resolve(
+                    platform_name,
+                    target,
+                    nodes=nodes_int,
+                    routing=routing,
+                    eplb=bool(eplb),
+                )
+                hidden, topk, experts = _dims(workloads, workload)
+                nodes = nodes_int
+
+                def add_case(
+                    case_ladder: str,
+                    disposition: str,
+                    reason: str | None,
+                    detail: str | None,
+                ) -> None:
+                    case: dict[str, Any] = {
+                        "suite": suite_name,
+                        "workload": workload,
+                        "required_publication": suite["required_publication"],
+                        "backend": target,
+                        "routing": routing,
+                        "phase": phase,
+                        "ep": ep,
+                        "eplb": eplb,
+                        "hidden": hidden,
+                        "topk": topk,
+                        "experts": experts,
+                        "samples_per_point": ep_harness.TIMED_SAMPLES_PER_POINT,
+                        "warmup_semantics": ep_harness.WARMUP_SEMANTICS,
+                        "ladder": case_ladder,
+                        "timing": EP_TIMING_PROFILE,
+                        "canonical": True,
+                        "nodes": nodes,
+                        "gpus_per_node": gpus_per_node,
+                        "scale_up_domain": scale_up_domain,
+                    }
+                    for signature in _semantic_points(platform_name, case):
+                        if signature in scheduled:
+                            raise SystemExit(
+                                f"suite {suite_name}: duplicate semantic point for {platform_name}"
+                            )
+                        scheduled.add(signature)
+                    case["case_id"] = _case_id(platform_name, case)
+                    requested_cases.append(
+                        {
+                            "sku": platform_name,
+                            "case": case,
+                            "disposition": disposition,
+                            "reason": reason,
+                            "detail": detail,
+                        }
+                    )
+                    if disposition == "runnable":
+                        shards.setdefault((platform_name, target, nodes), []).append(case)
+
+                requested_ladder = _ladder(suite, phase)
+                if not ok:
+                    add_case(
+                        requested_ladder,
+                        "unsupported",
+                        "backend-platform-unsupported",
+                        capability_detail,
+                    )
+                    continue
+                add_case(requested_ladder, "runnable", None, None)
+
+    shards_by_sku: dict[str, list[dict[str, Any]]] = {}
+    for (sku, target, nodes), cases in sorted(shards.items()):
+        chunk_size = max_cases
+        for offset in range(0, len(cases), chunk_size):
+            chunk = cases[offset:offset + chunk_size]
+            part = offset // chunk_size
+            shard_id = f"{sku}-{target}-n{nodes}"
+            if len(cases) > chunk_size:
+                shard_id += f"-p{part}"
+            shards_by_sku.setdefault(sku, []).append({
+                "id": shard_id,
+                "sku": sku,
+                "backend": target,
+                "launcher": cap.PLATFORMS[sku]["launcher"],
+                "gpus_per_node": cap.PLATFORMS[sku]["gpus_per_node"],
+                "scale_up_domain": cap.PLATFORMS[sku]["scale_up_domain"],
+                "nodes": nodes,
+                "n": len(chunk),
+                "case_ids": [case["case_id"] for case in chunk],
+            })
+    include = [
+        shards_by_sku[sku][round_index]
+        for round_index in range(max(map(len, shards_by_sku.values()), default=0))
+        for sku in sorted(shards_by_sku)
+        if round_index < len(shards_by_sku[sku])
+    ]
+    return {
+        "format": "collectivex.matrix.v1",
+        "schema_version": 1,
+        "requested_cases": requested_cases,
+        "include": include,
+    }
+
+
+def _strict_json_load(path: Path) -> Any:
+    def reject_constant(value: str) -> None:
+        raise MatrixError(f"non-finite JSON number {value}")
+
+    def reject_duplicates(pairs: list[tuple[str, Any]]) -> dict[str, Any]:
+        result: dict[str, Any] = {}
+        for key, value in pairs:
+            if key in result:
+                raise MatrixError(f"duplicate JSON key {key!r}")
+            result[key] = value
+        return result
+
+    if not path.is_file():
+        raise MatrixError(f"matrix does not exist: {path}")
+    if path.stat().st_size == 0:
+        raise MatrixError(f"matrix is empty: {path}")
+    try:
+        with path.open() as fh:
+            return json.load(
+                fh, parse_constant=reject_constant, object_pairs_hook=reject_duplicates
+            )
+    except (OSError, json.JSONDecodeError) as exc:
+        raise MatrixError(f"matrix is not valid JSON: {exc}") from exc
+
+
+def _positive_int(value: Any, field: str) -> int:
+    if type(value) is not int:
+        raise MatrixError(f"{field} must be a positive integer")
+    if value <= 0:
+        raise MatrixError(f"{field} must be a positive integer")
+    return value
+
+
+def validate_shard_control(
+    shard: dict[str, Any],
+    *,
+    sku: str,
+    backend: str,
+    nodes: int,
+    require_runnable: bool = True,
+) -> None:
+    """Validate one shard against the workflow cell that requested it."""
+    if not isinstance(shard, dict):
+        raise MatrixError("shard must be a JSON object")
+    if sku not in cap.PLATFORMS or backend not in cap.SWEEP_BACKENDS:
+        raise MatrixError("shard platform/backend is not registered")
+    top_fields = {"schema_version", "id", "sku", "backend", "nodes", "n", "cases"}
+    if (
+        set(shard) != top_fields
+        or type(shard.get("schema_version")) is not int
+        or shard["schema_version"] != 1
+    ):
+        raise MatrixError("shard fields or schema version differ from v1 contract")
+    if not isinstance(shard.get("id"), str) or not IDENTIFIER.fullmatch(shard["id"]):
+        raise MatrixError("shard has invalid id")
+    for field, expected in (("sku", sku), ("backend", backend)):
+        if shard.get(field) != expected:
+            raise MatrixError(
+                f"shard {field} mismatch: expected {expected!r}, got {shard.get(field)!r}"
+            )
+    if _positive_int(shard.get("nodes"), "shard.nodes") != nodes:
+        raise MatrixError(
+            f"shard nodes mismatch: expected {nodes}, got {shard.get('nodes')!r}"
+        )
+    cases = shard.get("cases")
+    if not isinstance(cases, list) or not cases:
+        raise MatrixError("shard must contain at least one case")
+    if _positive_int(shard.get("n"), "shard.n") != len(cases):
+        raise MatrixError("shard.n does not match the number of cases")
+    seen: set[str] = set()
+    required = {
+        "case_id", "suite", "workload", "required_publication", "backend", "routing",
+        "phase", "ep", "eplb", "hidden", "topk", "experts", "samples_per_point",
+        "warmup_semantics", "ladder", "timing", "canonical", "nodes",
+        "gpus_per_node", "scale_up_domain",
+    }
+    for index, case in enumerate(cases):
+        if not isinstance(case, dict):
+            raise MatrixError(f"case {index} must be a JSON object")
+        fields = set(case)
+        if fields != required:
+            raise MatrixError(
+                f"case {index} fields differ from v1 contract: "
+                f"missing={sorted(required - fields)}, extra={sorted(fields - required)}"
+            )
+        case_id = case["case_id"]
+        if not identity.is_typed_id(case_id, "case"):
+            raise MatrixError(f"case {index} has invalid case_id")
+        if case_id in seen:
+            raise MatrixError(f"duplicate case_id {case_id}")
+        seen.add(case_id)
+        for field in ("suite", "workload", "required_publication", "backend", "routing", "phase",
+                      "warmup_semantics", "ladder", "timing"):
+            if not isinstance(case[field], str) or not case[field]:
+                raise MatrixError(f"case {index}.{field} must be a non-empty string")
+        for field in ("suite", "workload", "required_publication", "backend", "routing", "phase"):
+            if not IDENTIFIER.fullmatch(case[field]):
+                raise MatrixError(f"case {index}.{field} is not a safe identifier")
+        if case["required_publication"] not in {"official", "comparable-experimental"}:
+            raise MatrixError(f"case {index} has invalid publication requirement")
+        case_identity = {key: value for key, value in case.items() if key != "case_id"}
+        if case_id != _case_id(sku, case_identity):
+            raise MatrixError(f"case {index} case_id does not match its contents")
+        if case["backend"] != backend:
+            raise MatrixError(f"case {index} backend does not match shard")
+        if _positive_int(case["nodes"], f"case {index}.nodes") != nodes:
+            raise MatrixError(f"case {index} nodes does not match shard")
+        ep = _positive_int(case["ep"], f"case {index}.ep")
+        gpus_per_node = _positive_int(
+            case["gpus_per_node"], f"case {index}.gpus_per_node"
+        )
+        platform = cap.PLATFORMS[sku]
+        if (
+            gpus_per_node != platform["gpus_per_node"]
+            or case["scale_up_domain"] != platform["scale_up_domain"]
+            or ep not in platform["ep_degrees"]
+        ):
+            raise MatrixError(f"case {index} differs from the platform registry")
+        if ep != nodes * gpus_per_node:
+            raise MatrixError(f"case {index} ep does not equal nodes * gpus_per_node")
+        if case["samples_per_point"] != ep_harness.TIMED_SAMPLES_PER_POINT:
+            raise MatrixError(f"case {index} violates fixed-512-v1")
+        if case["timing"] != EP_TIMING_PROFILE:
+            raise MatrixError(f"case {index} has invalid timing profile")
+        if case["warmup_semantics"] != ep_harness.WARMUP_SEMANTICS:
+            raise MatrixError(f"case {index} has invalid warmup semantics")
+        if case["phase"] not in {"decode", "prefill"}:
+            raise MatrixError(f"case {index} has invalid phase")
+        if case["routing"] not in {"uniform", "zipf"}:
+            raise MatrixError(f"case {index} has invalid routing")
+        if not isinstance(case["eplb"], bool) or (case["eplb"] and case["routing"] != "zipf"):
+            raise MatrixError(f"case {index} has invalid EPLB setting")
+        if not isinstance(case["canonical"], bool) or not case["canonical"]:
+            raise MatrixError(f"case {index} must use a canonical workload")
+        for field in ("ep", "nodes", "gpus_per_node", "hidden", "topk", "experts",
+                      "samples_per_point", "scale_up_domain"):
+            if isinstance(case[field], bool) or not isinstance(case[field], int):
+                raise MatrixError(f"case {index}.{field} must be an integer")
+            _positive_int(case[field], f"case {index}.{field}")
+        if ep > _positive_int(case["scale_up_domain"], f"case {index}.scale_up_domain"):
+            raise MatrixError(f"case {index} exceeds its scale-up domain")
+        try:
+            ladder = [int(value) for value in case["ladder"].split()]
+        except (AttributeError, ValueError) as exc:
+            raise MatrixError(f"case {index} has invalid token ladder") from exc
+        if (not ladder or any(value <= 0 for value in ladder)
+                or ladder != sorted(set(ladder))
+                or case["ladder"] != " ".join(map(str, ladder))):
+            raise MatrixError(f"case {index} has invalid token ladder")
+        if require_runnable:
+            disposition, reason, _ = _expected_disposition(sku, case)
+            if disposition != "runnable":
+                raise MatrixError(f"case {index} violates capability registry: {reason}")
+        else:
+            _v1_requested_ladder(case)
+
+
+def validate_matrix_document(document: Any) -> dict[str, Any]:
+    """Validate the complete requested grid and its runnable shard partition."""
+    if not isinstance(document, dict) or set(document) != {
+        "format", "schema_version", "requested_cases", "include"
+    }:
+        raise MatrixError("matrix fields differ from the v1 contract")
+    if (
+        document["format"] != "collectivex.matrix.v1"
+        or type(document["schema_version"]) is not int
+        or document["schema_version"] != 1
+    ):
+        raise MatrixError("matrix format/schema differs from v1")
+    requested = document["requested_cases"]
+    include = document["include"]
+    if not isinstance(requested, list) or not requested:
+        raise MatrixError("matrix.requested_cases must be non-empty")
+    if not isinstance(include, list):
+        raise MatrixError("matrix.include must be an array")
+
+    cases_by_id: dict[str, dict[str, Any]] = {}
+    runnable_ids: set[str] = set()
+    semantic_points: set[str] = set()
+    for index, value in enumerate(requested):
+        path = f"matrix.requested_cases[{index}]"
+        if not isinstance(value, dict) or set(value) != {
+            "sku", "case", "disposition", "reason", "detail"
+        }:
+            raise MatrixError(f"{path} fields differ from the v1 contract")
+        sku = value["sku"]
+        case = value["case"]
+        disposition = value["disposition"]
+        if sku not in cap.PLATFORMS:
+            raise MatrixError(f"{path}.sku is unknown")
+        if disposition not in {"runnable", "unsupported"}:
+            raise MatrixError(f"{path}.disposition is invalid")
+        if disposition == "runnable":
+            if value["reason"] is not None or value["detail"] is not None:
+                raise MatrixError(f"{path} runnable cases cannot have a reason")
+        else:
+            if (
+                not isinstance(value["reason"], str)
+                or not IDENTIFIER.fullmatch(value["reason"])
+                or not isinstance(value["detail"], str)
+                or not value["detail"]
+            ):
+                raise MatrixError(f"{path} unsupported cases need a public reason and detail")
+        if not isinstance(case, dict):
+            raise MatrixError(f"{path}.case must be an object")
+        backend = case.get("backend")
+        nodes = case.get("nodes")
+        if not isinstance(backend, str) or type(nodes) is not int:
+            raise MatrixError(f"{path}.case backend/nodes are invalid")
+        validate_shard_control(
+            {
+                "schema_version": 1,
+                "id": "requested-case",
+                "sku": sku,
+                "backend": backend,
+                "nodes": nodes,
+                "n": 1,
+                "cases": [case],
+            },
+            sku=sku,
+            backend=backend,
+            nodes=nodes,
+            require_runnable=disposition == "runnable",
+        )
+        case_id = case["case_id"]
+        if case_id in cases_by_id:
+            raise MatrixError(f"duplicate requested case_id {case_id}")
+        for signature in _semantic_points(sku, case):
+            if signature in semantic_points:
+                raise MatrixError(f"{path} duplicates a semantic token point")
+            semantic_points.add(signature)
+        cases_by_id[case_id] = value
+        expected = _expected_disposition(sku, case)
+        if (disposition, value["reason"], value["detail"]) != expected:
+            raise MatrixError(f"{path} disposition differs from the frozen v1 catalog")
+        if disposition == "runnable":
+            runnable_ids.add(case_id)
+
+    shard_ids: set[str] = set()
+    assigned: list[str] = []
+    for index, shard in enumerate(include):
+        path = f"matrix.include[{index}]"
+        expected = {
+            "id", "sku", "backend", "launcher", "gpus_per_node", "scale_up_domain",
+            "nodes", "n", "case_ids",
+        }
+        if not isinstance(shard, dict) or set(shard) != expected:
+            raise MatrixError(f"{path} fields differ from the v1 contract")
+        shard_id = shard["id"]
+        if not isinstance(shard_id, str) or not IDENTIFIER.fullmatch(shard_id):
+            raise MatrixError(f"{path}.id is invalid")
+        if shard_id in shard_ids:
+            raise MatrixError(f"duplicate shard id {shard_id}")
+        shard_ids.add(shard_id)
+        sku = shard["sku"]
+        if sku not in cap.PLATFORMS:
+            raise MatrixError(f"{path}.sku is unknown")
+        platform = cap.PLATFORMS[sku]
+        for field in ("launcher", "gpus_per_node", "scale_up_domain"):
+            if shard[field] != platform[field]:
+                raise MatrixError(f"{path}.{field} differs from the platform registry")
+        case_ids = shard["case_ids"]
+        if not isinstance(case_ids, list) or not case_ids or len(case_ids) != len(set(case_ids)):
+            raise MatrixError(f"{path}.case_ids must be a non-empty unique array")
+        if _positive_int(shard["n"], f"{path}.n") != len(case_ids):
+            raise MatrixError(f"{path}.n differs from case_ids")
+        nodes = _positive_int(shard["nodes"], f"{path}.nodes")
+        for case_id in case_ids:
+            wrapper = cases_by_id.get(case_id)
+            if wrapper is None or wrapper["disposition"] != "runnable":
+                raise MatrixError(f"{path} references a missing or unsupported case")
+            case = wrapper["case"]
+            if (
+                wrapper["sku"] != sku
+                or case["backend"] != shard["backend"]
+                or case["nodes"] != nodes
+            ):
+                raise MatrixError(f"{path} case does not match shard coordinates")
+            assigned.append(case_id)
+    if len(assigned) != len(set(assigned)):
+        raise MatrixError("a runnable case is assigned to more than one shard")
+    if set(assigned) != runnable_ids:
+        raise MatrixError("runnable requested cases and shard assignments differ")
+    return document
+
+
+def extract_shard(
+    matrix_path: str | os.PathLike[str],
+    shard_id: str,
+    output_path: str | os.PathLike[str],
+    *,
+    sku: str,
+    backend: str,
+    nodes: int,
+) -> dict[str, Any]:
+    """Extract one strictly matched shard control file, writing it atomically."""
+    document = validate_matrix_document(_strict_json_load(Path(matrix_path)))
+    include = document["include"]
+    matches = [item for item in include if isinstance(item, dict) and item.get("id") == shard_id]
+    if len(matches) != 1:
+        raise MatrixError(f"expected exactly one shard {shard_id!r}, found {len(matches)}")
+    source = matches[0]
+    requested = {
+        item["case"]["case_id"]: item
+        for item in document["requested_cases"]
+    }
+    cases = [requested[case_id]["case"] for case_id in source["case_ids"]]
+    control = {
+        "schema_version": 1,
+        "id": source.get("id"),
+        "sku": source.get("sku"),
+        "backend": source.get("backend"),
+        "nodes": source.get("nodes"),
+        "n": source.get("n"),
+        "cases": cases,
+    }
+    validate_shard_control(control, sku=sku, backend=backend, nodes=nodes)
+    output = Path(output_path)
+    output.parent.mkdir(parents=True, exist_ok=True)
+    temporary = output.with_name(f".{output.name}.tmp-{os.getpid()}")
+    try:
+        with temporary.open("w") as fh:
+            json.dump(control, fh, sort_keys=True, separators=(",", ":"))
+            fh.write("\n")
+        os.replace(temporary, output)
+    finally:
+        temporary.unlink(missing_ok=True)
+    return control
+
+
+def emit_unsupported(
+    matrix_path: str | os.PathLike[str], output_dir: str | os.PathLike[str]
+) -> list[Path]:
+    """Materialize one strict terminal outcome for each unsupported requested case."""
+    source = Path(matrix_path)
+    document = validate_matrix_document(_strict_json_load(source))
+    control_sha256 = hashlib.sha256(source.read_bytes()).hexdigest()
+    generated_at = dt.datetime.now(dt.timezone.utc).isoformat()
+    git_run = {
+        "run_id": os.environ.get("GITHUB_RUN_ID"),
+        "run_attempt": os.environ.get("GITHUB_RUN_ATTEMPT"),
+        "ref": os.environ.get("GITHUB_REF_NAME") or os.environ.get("GITHUB_REF"),
+        "source_sha": os.environ.get("COLLECTIVEX_SOURCE_SHA") or os.environ.get("GITHUB_SHA"),
+        "repo": os.environ.get("GITHUB_REPOSITORY"),
+        "job": os.environ.get("GITHUB_JOB"),
+        "artifact": os.environ.get("COLLECTIVEX_ARTIFACT_NAME"),
+    }
+    allocation_factors = {
+        "artifact": git_run["artifact"],
+        "execution_id": os.environ.get("COLLECTIVEX_EXECUTION_ID"),
+        "job": git_run["job"],
+        "repo": git_run["repo"],
+        "run_attempt": git_run["run_attempt"],
+        "run_id": git_run["run_id"],
+        "runner": "capability-resolver",
+        "source_sha": git_run["source_sha"],
+    }
+    destination = Path(output_dir)
+    destination.mkdir(parents=True, exist_ok=True)
+    written: list[Path] = []
+    for wrapper in document["requested_cases"]:
+        if wrapper["disposition"] != "unsupported":
+            continue
+        scheduled = wrapper["case"]
+        case = {key: value for key, value in scheduled.items() if key != "case_id"}
+        case_factors = {"case": case, "profile": V1_PROFILE, "sku": wrapper["sku"]}
+        case_id = identity.digest("case", case_factors)
+        if case_id != scheduled["case_id"]:
+            raise MatrixError(f"unsupported case identity differs for {scheduled['case_id']}")
+        attempt_ordinal = 1
+        record = contracts.make_terminal_document(
+            allocation_factors=allocation_factors,
+            attempt_ordinal=attempt_ordinal,
+            case=case,
+            case_factors=case_factors,
+            control_sha256=control_sha256,
+            failure_mode="capability",
+            generated_at=generated_at,
+            git_run=git_run,
+            reason=wrapper["reason"],
+            return_code=5,
+            source="matrix-capability-resolver",
+            status="unsupported",
+            expected_case_id=case_id,
+        )
+        path = destination / f"unsupported_{case_id}.json"
+        temporary = path.with_name(f".{path.name}.tmp-{os.getpid()}")
+        try:
+            with temporary.open("x") as handle:
+                json.dump(record, handle, allow_nan=False, sort_keys=True, separators=(",", ":"))
+                handle.write("\n")
+                handle.flush()
+                os.fsync(handle.fileno())
+            os.replace(temporary, path)
+        finally:
+            temporary.unlink(missing_ok=True)
+        written.append(path)
+    return written
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="CollectiveX v1 matrix resolver")
+    parser.add_argument("--suites", default="all", help="'all' or comma-list of suites")
+    parser.add_argument("--backend", default="", help="select one EP backend")
+    parser.add_argument("--backends", default="", help="'all' or comma-list of EP backends")
+    parser.add_argument("--only-sku", default="")
+    parser.add_argument("--min-nodes", type=int, default=0)
+    parser.add_argument("--max-nodes", type=int, default=0)
+    parser.add_argument("--max-cases", type=int, default=128)
+    parser.add_argument("--extract-from", default="", metavar="MATRIX")
+    parser.add_argument("--validate-control", default="", metavar="SHARD")
+    parser.add_argument("--emit-unsupported-from", default="", metavar="MATRIX")
+    parser.add_argument("--out-dir", default="")
+    parser.add_argument("--shard-id", default="")
+    parser.add_argument("--expect-sku", default="")
+    parser.add_argument("--expect-backend", default="")
+    parser.add_argument("--expect-nodes", type=int, default=0)
+    parser.add_argument("--out", default="")
+    args = parser.parse_args()
+
+    if args.emit_unsupported_from:
+        if not args.out_dir:
+            parser.error("unsupported outcome emission requires --out-dir")
+        try:
+            written = emit_unsupported(args.emit_unsupported_from, args.out_dir)
+        except MatrixError as exc:
+            parser.error(str(exc))
+        print(f"emitted {len(written)} unsupported terminal outcomes", file=sys.stderr)
+        return 0
+
+    if args.validate_control:
+        if not all((args.expect_sku, args.expect_backend, args.expect_nodes)):
+            parser.error(
+                "control validation requires --expect-sku, --expect-backend, and --expect-nodes"
+            )
+        try:
+            control = _strict_json_load(Path(args.validate_control))
+            validate_shard_control(
+                control,
+                sku=args.expect_sku,
+                backend=args.expect_backend,
+                nodes=args.expect_nodes,
+            )
+        except MatrixError as exc:
+            parser.error(str(exc))
+        print(f"validated {control.get('id')}: {control['n']} cases", file=sys.stderr)
+        return 0
+
+    if args.extract_from:
+        if not all((args.shard_id, args.expect_sku, args.expect_backend, args.expect_nodes, args.out)):
+            parser.error(
+                "shard extraction requires --shard-id, --expect-sku, --expect-backend, "
+                "--expect-nodes, and --out"
+            )
+        try:
+            control = extract_shard(
+                args.extract_from,
+                args.shard_id,
+                args.out,
+                sku=args.expect_sku,
+                backend=args.expect_backend,
+                nodes=args.expect_nodes,
+            )
+        except MatrixError as exc:
+            parser.error(str(exc))
+        print(f"extracted {control['id']}: {control['n']} cases", file=sys.stderr)
+        print(json.dumps(control, separators=(",", ":")))
+        return 0
+
+    matrix = resolve_matrix(
+        suites=args.suites,
+        backend=args.backend,
+        backends=args.backends,
+        only_sku=args.only_sku,
+        min_nodes=args.min_nodes,
+        max_nodes=args.max_nodes,
+        max_cases=args.max_cases,
+    )
+    try:
+        validate_matrix_document(matrix)
+    except MatrixError as exc:
+        parser.error(str(exc))
+    if args.out:
+        with open(args.out, "w") as fh:
+            json.dump(matrix, fh, sort_keys=True, separators=(",", ":"))
+            fh.write("\n")
+    runnable = sum(
+        item["disposition"] == "runnable" for item in matrix["requested_cases"]
+    )
+    unsupported = len(matrix["requested_cases"]) - runnable
+    print(
+        f"resolved {len(matrix['include'])} shard-cells, "
+        f"{runnable} runnable and {unsupported} unsupported cases",
+        file=sys.stderr,
+    )
+    print(json.dumps(matrix))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/experimental/CollectiveX/tests/ep_deepep.py b/experimental/CollectiveX/tests/ep_deepep.py
new file mode 100644
index 0000000000..3109e7c771
--- /dev/null
+++ b/experimental/CollectiveX/tests/ep_deepep.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+"""CollectiveX DeepEP adapter for the v1 BF16 normal-mode workload."""
+from __future__ import annotations
+
+import inspect
+import os
+import sys
+import types
+
+import torch
+import torch.distributed as dist
+import contracts
+
+try:
+    import deep_ep
+    from deep_ep import Buffer  # type: ignore
+except Exception as exc:  # pragma: no cover - requires the benchmark image
+    print(f"ERROR: deep_ep import failed: {exc!r}", file=sys.stderr)
+    raise
+
+
+def _deepep_version() -> str:
+    try:
+        import importlib.metadata as metadata
+
+        return metadata.version("deep_ep")
+    except Exception:
+        return getattr(deep_ep, "__version__", "unknown")
+
+
+def _mnnvl_buffer_configuration() -> tuple[dict[str, bool], str]:
+    """Resolve the explicit DeepEP MNNVL API contract."""
+    requested_value = os.environ.get("CX_ALLOW_MNNVL")
+    if requested_value not in {None, "", "0", "1"}:
+        raise RuntimeError("CX_ALLOW_MNNVL must be unset, 0, or 1")
+    requested = requested_value == "1"
+    if not requested:
+        return contracts.resolve_deepep_mnnvl(
+            requested=False, signature_parameters=(),
+            deepep_commit=os.environ.get("DEEPEP_COMMIT"),
+        )
+    try:
+        parameters = inspect.signature(Buffer.__init__).parameters
+    except (TypeError, ValueError) as exc:
+        raise RuntimeError("cannot inspect DeepEP Buffer MNNVL API") from exc
+    try:
+        return contracts.resolve_deepep_mnnvl(
+            requested=True, signature_parameters=parameters,
+            deepep_commit=os.environ.get("DEEPEP_COMMIT"),
+        )
+    except contracts.ContractError as exc:
+        raise RuntimeError(str(exc)) from exc
+
+
+class DeepEPBackend:
+    name = "deepep"
+    combine_needs_redispatch = False
+    # DeepEP reduces activations and top-k weights independently. The activation
+    # tensor must therefore carry the complete local weighted expert sum.
+    combine_weight_semantics = "unweighted-rank-sum"
+    def __init__(self, args, rank, world_size, local_rank, device):
+        self.args = args
+        self.rank = rank
+        self.world_size = world_size
+        self.device = device
+        self.mode = "normal"
+
+        self.group = dist.group.WORLD
+        device_sms = torch.cuda.get_device_properties(device).multi_processor_count
+        num_nvl_bytes = 4 * 1024 * 1024 * 1024
+        mnnvl_kwargs, mnnvl_comm = _mnnvl_buffer_configuration()
+        self.buffer = Buffer(self.group, num_nvl_bytes, 0, **mnnvl_kwargs)
+
+        num_sms = int(getattr(Buffer, "num_sms", args.num_sms))
+        try:
+            Buffer.set_num_sms(num_sms)
+        except Exception as exc:  # pragma: no cover - version dependent
+            raise RuntimeError(
+                f"DeepEP did not apply requested num_sms={num_sms}: {exc!r}"
+            ) from exc
+        applied_num_sms = int(getattr(Buffer, "num_sms", num_sms))
+        if applied_num_sms != num_sms:
+            raise RuntimeError(
+                f"DeepEP num_sms mismatch: requested={num_sms} applied={applied_num_sms}"
+            )
+
+        version = _deepep_version()
+        self.backend_provenance = {
+            "deepep_version": version,
+            "deepep_commit": os.environ.get("DEEPEP_COMMIT") or f"pkg-{version}",
+            "backend_lineage": "deepep-v1",
+            "mode": "normal",
+            "dispatch_dtype": "bf16",
+            "combine_dtype": "bf16",
+            "resource_mode": "tuned",
+            "requested_num_sms": num_sms,
+            "num_sms": applied_num_sms,
+            "device_sms": device_sms,
+            "sm_fraction": applied_num_sms / device_sms,
+            "tuned_source": "deepep-default-num_sms",
+            "num_nvl_bytes": num_nvl_bytes,
+            "allow_mnnvl": bool(mnnvl_kwargs),
+            "mnnvl_comm": mnnvl_comm,
+        }
+
+    def buffer_cap(self, args):
+        return None
+
+    def make_problem(self, T, idx, weights, x):
+        return types.SimpleNamespace(
+            T=T,
+            x=x,
+            topk_idx=idx.to(torch.int64),
+            topk_weights=weights.to(torch.float32),
+        )
+
+    def dispatch(self, p):
+        (
+            num_tokens_per_rank,
+            _,
+            num_tokens_per_expert,
+            is_token_in_rank,
+            _,
+        ) = self.buffer.get_dispatch_layout(p.topk_idx, self.args.experts)
+        recv_x, recv_topk_idx, recv_topk_weights, recv_counts, handle, _ = self.buffer.dispatch(
+            p.x,
+            topk_idx=p.topk_idx,
+            topk_weights=p.topk_weights,
+            num_tokens_per_rank=num_tokens_per_rank,
+            is_token_in_rank=is_token_in_rank,
+            num_tokens_per_expert=num_tokens_per_expert,
+        )
+        return types.SimpleNamespace(
+            recv_x=recv_x,
+            recv_topk_idx=recv_topk_idx,
+            recv_topk_weights=recv_topk_weights,
+            recv_counts=recv_counts,
+            handle=handle,
+        )
+
+    def stage(self, p, h):
+        h.combine_input = h.recv_x
+
+    def combine(self, p, h):
+        combined_x, _, _ = self.buffer.combine(h.combine_input, h.handle)
+        return combined_x
+
+    def inspect_dispatch(self, p, h):
+        valid = h.recv_topk_idx >= 0
+        expert_ids = torch.where(
+            valid,
+            h.recv_topk_idx + self.rank * (self.args.experts // self.world_size),
+            h.recv_topk_idx,
+        )
+        return types.SimpleNamespace(
+            payload=h.recv_x,
+            expert_ids=expert_ids,
+            weights=h.recv_topk_weights.masked_fill(~valid, 0),
+            local_expert_counts=torch.tensor(h.recv_counts, device=self.device, dtype=torch.int64),
+            ordering_contract="source-rank-major-stable-v1",
+        )
+
+    def combine_transformed(self, p, h, transformed):
+        combined, _, _ = self.buffer.combine(transformed.to(h.recv_x.dtype), h.handle)
+        return combined
+
+    def recv_tokens(self, h):
+        return int(h.recv_x.shape[0])
+
+    def finalize(self, rc):
+        try:
+            dist.barrier()
+            dist.destroy_process_group()
+        except Exception:
+            pass
+        return rc
diff --git a/experimental/CollectiveX/tests/ep_deepep_hybrid.py b/experimental/CollectiveX/tests/ep_deepep_hybrid.py
new file mode 100644
index 0000000000..6514e93c51
--- /dev/null
+++ b/experimental/CollectiveX/tests/ep_deepep_hybrid.py
@@ -0,0 +1,378 @@
+#!/usr/bin/env python3
+"""CollectiveX EP backend adapter — DeepEP `hybrid-ep` branch (NVIDIA TMA-based HybridEPBuffer).
+
+The hybrid-ep branch (https://github.com/deepseek-ai/DeepEP/tree/hybrid-ep) is NVIDIA's TMA +
+warp-pipeline implementation of expert-parallel all-to-all, exposing `deep_ep.HybridEPBuffer`
+(distinct from the mainline `deep_ep.Buffer`). HybridEP is NVIDIA's MoE backend built for NVL72
+rack-scale (Megatron `moe_flex_dispatcher_backend="hybridep"`). This adapter drives the single-
+NVLink-domain path (`num_of_hybrid_ep_ranks_per_nvlink_domain == world_size`, <=8 ranks). That domain
+is ONE node on x86, while GB200/GB300 MNNVL can expose multiple trays as one NVLink domain. The v1
+matrix therefore exercises the same path at EP8 across two GB trays, subject to the normal three-run
+qualification gate. The container build is done by runtime/run_in_container.sh
+`cx_build_deepep_hybrid` (CUDA-13 CCCL include path, without the V2 NVSHMEM overlay).
+
+API (pinned on B300, branch e0a5b1d):
+  HybridEPBuffer(group, hidden_dim, max_num_of_tokens_per_rank, num_local_experts, use_fp8=False, ...)
+  .dispatch(hidden, topk_idx=, topk_weights=, num_of_experts=) -> (recv_hidden, recv_x2, None, handle)
+  .combine(hidden, handle=) -> [T, hidden]
+
+CORRECTNESS: identity expert (no expert compute), combine WITHOUT probs -> each source token is
+reconstructed as x * (distinct ranks among its top_k experts) — verified: an 8-rank uniform top_k=8
+round trip gives relerr(combined, x) = 4.28, matching E[distinct ranks] ~ 5.26 exactly. So this uses
+the same per-rank-sum combine contract (no gate re-weight). BF16 tolerance is 5e-2.
+
+STATUS: bf16 / normal / layout-and-dispatch-v1. The v1 scope is one detected NVLink domain at up to
+eight ranks; fp8 and the cross-RACK (>1 NVL72, IBGDA/RDMA) path remain out of scope.
+"""
+from __future__ import annotations
+
+import hashlib
+import importlib
+import json
+import os
+from pathlib import Path
+import re
+import shutil
+import sys
+import tempfile
+import types
+
+import torch
+import torch.distributed as dist
+import contracts
+
+try:
+    import deep_ep
+    HybridEPBuffer = deep_ep.HybridEPBuffer
+except Exception as exc:  # pragma: no cover - needs the hybrid-ep build
+    print("ERROR: deep_ep.HybridEPBuffer import failed — the hybrid-ep branch must be built at job "
+          "setup (cx_build_deepep_hybrid). "
+          f"{exc!r}", file=sys.stderr)
+    raise
+
+
+def _deepep_hybrid_version() -> str:
+    return os.environ.get("DEEPEP_COMMIT", getattr(deep_ep, "__version__", "hybrid-ep"))
+
+
+def _hybrid_build_evidence() -> list[dict[str, str]]:
+    records = []
+    for module_name, role in (
+        ("deep_ep_cpp", "deepep-extension"),
+        ("hybrid_ep_cpp", "deepep-hybrid-extension"),
+    ):
+        module = importlib.import_module(module_name)
+        path = getattr(module, "__file__", None)
+        if not path:
+            raise RuntimeError(f"{module_name} has no loaded extension path")
+        records.append(contracts.content_manifest_evidence(
+            role=role,
+            name=module_name,
+            files=[(os.path.basename(path), path)],
+        ))
+    return sorted(records, key=lambda item: (item["role"], item["name"]))
+
+
+HYBRID_CONFIG_FIELDS = (
+    "hidden_dim", "max_num_of_tokens_per_rank", "num_of_experts_per_rank",
+    "num_of_ranks_per_node", "num_of_nodes", "pad_multiple",
+    "num_of_tokens_per_chunk_preprocessing_api",
+    "num_of_threads_per_block_preprocessing_api", "num_of_blocks_preprocessing_api",
+    "num_of_blocks_permute", "num_of_blocks_unpermute", "token_data_type",
+    "num_of_stages_dispatch_api", "num_of_stages_permute_block_dispatch_api",
+    "num_of_in_flight_s2g_dispatch_api",
+    "num_of_in_flight_s2g_permute_block_dispatch_api",
+    "num_of_additional_in_flight_s2g_dispatch_api",
+    "num_of_tokens_per_chunk_dispatch_api", "num_of_blocks_dispatch_api",
+    "forward_dispatch_api", "device_side_sync_dispatch_api",
+    "num_of_stages_g2s_combine_api", "num_of_stages_s2g_combine_api",
+    "num_of_tokens_per_chunk_combine_api", "num_of_tokens_per_group_combine_api",
+    "num_of_blocks_combine_api", "num_of_additional_in_flight_s2g_combine_api",
+    "backward_combine_api", "device_side_sync_combine_api",
+)
+
+
+def _hybrid_realized_config(config) -> dict[str, str | int | bool]:
+    """Project the Python-visible, post-autotune HybridEP config to JSON scalars."""
+    realized = {}
+    for field in HYBRID_CONFIG_FIELDS:
+        try:
+            value = getattr(config, field)
+        except AttributeError as exc:
+            raise RuntimeError(f"HybridEP realized config omits {field}") from exc
+        if field == "token_data_type":
+            token_type = getattr(value, "name", None)
+            if token_type not in {"UINT8", "UINT16"}:
+                token_type = {"uint8_t": "UINT8", "uint16_t": "UINT16"}.get(str(value))
+            if token_type is None:
+                raise RuntimeError("HybridEP realized token_data_type is invalid")
+            realized[field] = token_type
+            continue
+        if type(value) is bool:
+            realized[field] = value
+            continue
+        try:
+            realized[field] = int(value)
+        except (TypeError, ValueError) as exc:
+            raise RuntimeError(f"HybridEP realized config {field} is not integral") from exc
+    return realized
+
+
+def _sha256_with_size(path: Path) -> tuple[str, int]:
+    digest = hashlib.sha256()
+    size = 0
+    with path.open("rb") as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+            digest.update(chunk)
+            size += len(chunk)
+    return digest.hexdigest(), size
+
+
+def _hybrid_jit_evidence(root: Path) -> list[dict[str, str | int]]:
+    """Hash final JIT libraries without exposing rank-specific cache paths."""
+    if not root.is_dir():
+        raise RuntimeError("DeepEP Hybrid produced no JIT cache directory")
+    artifacts = []
+    for path in sorted(root.iterdir(), key=lambda item: item.name):
+        if path.suffix != ".so":
+            continue
+        if path.is_symlink() or not path.is_file():
+            raise RuntimeError("DeepEP Hybrid JIT artifact is not a regular file")
+        kernel_key = path.stem
+        if not re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9_.+-]{0,511}", kernel_key):
+            raise RuntimeError("DeepEP Hybrid JIT kernel key is invalid")
+        digest, size = _sha256_with_size(path)
+        if size <= 0:
+            raise RuntimeError("DeepEP Hybrid JIT artifact is empty")
+        artifacts.append({
+            "bytes": size,
+            "kernel_key": kernel_key,
+            "sha256": digest,
+        })
+    if len(artifacts) != 3:
+        raise RuntimeError(
+            f"DeepEP Hybrid expected 3 final JIT libraries, found {len(artifacts)}"
+        )
+    return artifacts
+
+
+def _require_cross_rank_equal(value, label: str) -> None:
+    gathered = [None] * dist.get_world_size()
+    dist.all_gather_object(gathered, value)
+    canonical = {json.dumps(item, sort_keys=True, separators=(",", ":")) for item in gathered}
+    if len(canonical) != 1:
+        raise RuntimeError(f"DeepEP Hybrid {label} differs across ranks")
+
+
+class DeepEPHybridBackend:
+    name = "deepep-hybrid"
+    # HybridEPBuffer.combine consumes the recv payload + the dispatch handle (no re-dispatch needed
+    # before a timed combine); the harness times dispatch and combine separately (like ep_deepep).
+    combine_needs_redispatch = False
+    combine_weight_semantics = "unweighted-rank-sum"
+
+    def __init__(self, args, rank, world_size, local_rank, device):
+        self.args = args
+        self.rank = rank
+        self.world_size = world_size
+        self.device = device
+        self.mode = "normal"
+        self.group = dist.group.WORLD
+        self.tolerance = 5e-2
+        self.top_k = int(args.topk)
+        self.num_experts = int(args.experts)
+        self.hidden = int(args.hidden)
+        self.local_experts = max(1, self.num_experts // world_size)
+        # Token cap (per rank) for the symmetric buffer; the sweep is capped here (buffer_cap).
+        self.max_tokens = 4096
+        dev_sms = torch.cuda.get_device_properties(device).multi_processor_count
+        ver = _deepep_hybrid_version()
+        loaded_libraries = _hybrid_build_evidence()
+        _require_cross_rank_equal(loaded_libraries, "loaded extension identities")
+
+        # HybridEP's compiler uses a process-specific child of HYBRID_EP_CACHE_DIR. Give every
+        # rank a fresh private base so stale kernels cannot enter this attempt's evidence.
+        self._previous_jit_cache_dir = os.environ.get("HYBRID_EP_CACHE_DIR")
+        self._jit_cache_dir = tempfile.mkdtemp(prefix=f"collectivex-hybrid-r{rank}-")
+        os.environ["HYBRID_EP_CACHE_DIR"] = self._jit_cache_dir
+        self._jit_root = (
+            Path(self._jit_cache_dir) / ".deepep" / "hybrid_ep" / "jit"
+            / f"proc-{os.getpid()}"
+        )
+        self._realized_config = None
+        self._deferred_semantic_snapshot = None
+        self._deferred_jit_diagnostics = None
+
+        # Construct the HybridEPBuffer treating all ranks as ONE NVLink domain (default
+        # num_of_hybrid_ep_ranks_per_nvlink_domain == world_size). On x86 that domain is one node; on a
+        # GB200/GB300 NVL72 the MNNVL fabric makes 2 trays one NVLink domain, so EP8 (8 ranks) is covered
+        # by this same path (validated transport=mnnvl). SM counts default.
+        try:
+            self.buffer = HybridEPBuffer(
+                self.group, hidden_dim=self.hidden,
+                max_num_of_tokens_per_rank=self.max_tokens,
+                num_local_experts=self.local_experts, use_fp8=False)
+        except Exception as exc:
+            shutil.rmtree(self._jit_cache_dir, ignore_errors=True)
+            if self._previous_jit_cache_dir is None:
+                os.environ.pop("HYBRID_EP_CACHE_DIR", None)
+            else:
+                os.environ["HYBRID_EP_CACHE_DIR"] = self._previous_jit_cache_dir
+            raise RuntimeError(
+                f"HybridEPBuffer construction failed (hidden={self.hidden} max_tokens={self.max_tokens} "
+                f"local_experts={self.local_experts} world={world_size}): {exc!r}") from exc
+        update_template_config = self.buffer.update_template_config
+
+        def tracked_update_template_config(*call_args, **call_kwargs):
+            config = update_template_config(*call_args, **call_kwargs)
+            realized = _hybrid_realized_config(config)
+            if self._realized_config is not None and realized != self._realized_config:
+                raise RuntimeError("DeepEP Hybrid realized autotune config changed within one case")
+            self._realized_config = realized
+            return config
+
+        self.buffer.update_template_config = tracked_update_template_config
+        self.domain_rank = int(self.buffer.local_rank)
+        if self.domain_rank != rank:
+            raise RuntimeError(
+                "HybridEPBuffer rank within the single NVLink domain differs from global rank: "
+                f"domain={self.domain_rank} global={rank}"
+            )
+        if rank == 0:
+            print(f"[deepep-hybrid] HybridEPBuffer constructed (single NVLink domain, world={world_size}, "
+                  f"local_experts={self.local_experts}, hidden={self.hidden})", file=sys.stderr)
+
+        self.backend_provenance = {
+            "deepep_commit": ver, "branch": "hybrid-ep",
+            "deepep_tree": os.environ.get("DEEPEP_TREE"),
+            "backend_lineage": "deepep-hybrid",
+            "loaded_libraries": loaded_libraries,
+            "impl": "deep_ep.HybridEPBuffer (NVIDIA TMA + warp-pipeline)",
+            "mode": "normal", "transport": "nvlink-domain",  # one node (x86) or one NVL72 MNNVL domain (gb300 EP8)
+            "resource_mode": "tuned",
+            "num_sms": None, "device_sms": dev_sms,
+            "tuned_source": "deepep-hybrid-configurer-autotune-v1",
+            "realized_config": None, "jit_kernel_keys": [], "jit_shared_objects": [],
+            "max_num_tokens": self.max_tokens, "top_k": self.top_k,
+            "num_experts": self.num_experts, "local_experts": self.local_experts,
+            "routing_factor": "ranks",
+        }
+
+    def buffer_cap(self, args):
+        return self.max_tokens
+
+    def make_problem(self, T, idx, weights, x):
+        return types.SimpleNamespace(
+            T=int(T), x=x,
+            topk_idx=idx.to(torch.int64),
+            topk_weights=weights.to(torch.float32),
+        )
+
+    def dispatch(self, p):
+        recv, recv_probs, _scales, handle = self.buffer.dispatch(
+            p.x,
+            topk_idx=p.topk_idx,
+            topk_weights=p.topk_weights,
+            num_of_experts=self.num_experts,
+        )
+        return types.SimpleNamespace(
+            recv=recv,
+            recv_payload=recv,
+            recv_probs=recv_probs,
+            handle=handle,
+            combine_input=None,
+        )
+
+    def stage(self, p, h):
+        # Identity expert: the recv hidden IS the "expert output". combine reduces it per source token.
+        h.combine_input = h.recv_payload
+        return None
+
+    def combine(self, p, h):
+        # combine(hidden, handle=) -> [T, H] per-source-token reduction (no gate re-weight: "ranks").
+        comb = self.buffer.combine(h.combine_input, handle=h.handle)
+        return comb[0] if isinstance(comb, (tuple, list)) else comb
+
+    def capture_deferred_provenance(self):
+        torch.cuda.synchronize()
+        dist.barrier()
+        if self._realized_config is None:
+            raise RuntimeError("DeepEP Hybrid autotune config was not materialized")
+        local_artifacts = _hybrid_jit_evidence(self._jit_root)
+        semantic = {
+            "jit_kernel_keys": [item["kernel_key"] for item in local_artifacts],
+            "realized_config": dict(self._realized_config),
+        }
+        # NVCC may embed each rank's timestamped source basename in its ELF, so raw .so hashes are
+        # diagnostics rather than a cross-rank identity. Stable kernel keys encode every codegen
+        # input, including HybridEpConfigInstance fields that the Python binding does not expose.
+        _require_cross_rank_equal(semantic, "realized config/JIT kernel keys")
+        gathered_artifacts = [None] * dist.get_world_size()
+        dist.all_gather_object(gathered_artifacts, local_artifacts)
+        diagnostics = []
+        for artifact_index, kernel_key in enumerate(semantic["jit_kernel_keys"]):
+            diagnostics.append({
+                "kernel_key": kernel_key,
+                "rank_artifacts": [
+                    {
+                        "bytes": rank_artifacts[artifact_index]["bytes"],
+                        "rank": artifact_rank,
+                        "sha256": rank_artifacts[artifact_index]["sha256"],
+                    }
+                    for artifact_rank, rank_artifacts in enumerate(gathered_artifacts)
+                ],
+            })
+        if self._deferred_semantic_snapshot is not None and semantic != self._deferred_semantic_snapshot:
+            raise RuntimeError("DeepEP Hybrid config/JIT kernel set changed after measurement")
+        if self._deferred_jit_diagnostics is not None and diagnostics != self._deferred_jit_diagnostics:
+            raise RuntimeError("DeepEP Hybrid rank-local JIT artifacts changed after measurement")
+        self._deferred_semantic_snapshot = semantic
+        self._deferred_jit_diagnostics = diagnostics
+        self.backend_provenance.update(semantic)
+        self.backend_provenance["jit_shared_objects"] = diagnostics
+
+    def inspect_dispatch(self, p, h):
+        count = self.recv_tokens(h)
+        routing_map = h.handle[4][:count]
+        rows, local_expert_ids = routing_map.nonzero(as_tuple=True)
+        positions = routing_map.to(torch.int64).cumsum(dim=1)[rows, local_expert_ids] - 1
+        probability_columns = self.domain_rank * self.local_experts + local_expert_ids
+        if h.recv_probs.shape[1] < (self.domain_rank + 1) * self.local_experts:
+            raise RuntimeError("HybridEPBuffer probability tensor omits this NVLink-domain rank")
+        expert_ids = torch.full(
+            (count, self.top_k), -1, dtype=torch.int64, device=self.device
+        )
+        weights = torch.zeros(
+            (count, self.top_k), dtype=torch.float32, device=self.device
+        )
+        expert_ids[rows, positions] = local_expert_ids + self.rank * self.local_experts
+        weights[rows, positions] = h.recv_probs[:count][rows, probability_columns]
+        return types.SimpleNamespace(
+            payload=h.recv_payload[:count],
+            expert_ids=expert_ids,
+            weights=weights,
+            local_expert_counts=routing_map.sum(dim=0, dtype=torch.int64),
+            ordering_contract="global-source-filter-stable-v1",
+        )
+
+    def combine_transformed(self, p, h, transformed):
+        combined = self.buffer.combine(
+            transformed.to(h.recv_payload.dtype), handle=h.handle
+        )
+        return combined[0] if isinstance(combined, (tuple, list)) else combined
+
+    def recv_tokens(self, h):
+        return int(h.handle[3].item())
+
+    def finalize(self, rc):
+        try:
+            dist.barrier()
+            dist.destroy_process_group()
+        except Exception:
+            pass
+        shutil.rmtree(self._jit_cache_dir, ignore_errors=True)
+        if self._previous_jit_cache_dir is None:
+            os.environ.pop("HYBRID_EP_CACHE_DIR", None)
+        else:
+            os.environ["HYBRID_EP_CACHE_DIR"] = self._previous_jit_cache_dir
+        return rc
diff --git a/experimental/CollectiveX/tests/ep_deepep_v2.py b/experimental/CollectiveX/tests/ep_deepep_v2.py
new file mode 100644
index 0000000000..a11185effb
--- /dev/null
+++ b/experimental/CollectiveX/tests/ep_deepep_v2.py
@@ -0,0 +1,507 @@
+#!/usr/bin/env python3
+"""DeepEP PR #605 adapter with PR #630's pure scale-up initialization fix."""
+
+from __future__ import annotations
+
+import ctypes
+import hashlib
+import importlib.metadata
+import inspect
+import json
+import os
+import re
+import sys
+import types
+from pathlib import Path
+
+import torch
+import torch.distributed as dist
+import contracts
+import ep_harness
+
+try:
+    import deep_ep
+    from deep_ep import ElasticBuffer  # type: ignore
+except Exception as exc:  # pragma: no cover - requires the benchmark image
+    print(f"ERROR: DeepEP V2 import failed: {exc!r}", file=sys.stderr)
+    raise
+
+
+DEEPEP_V2_PR = 605
+DEEPEP_V2_FIX_PR = 630
+DEEPEP_V2_COMMIT = "fa8a9b16898204afd347c663b89e65ef87dc6ce6"
+DEEPEP_V2_TREE = "29809e75c5874e6609dac4804e7b651d5226959f"
+DEEPEP_V2_FMT_COMMIT = "a4c7e17133ee9cb6a2f45545f6e974dd3c393efa"
+DEEPEP_V2_VERSION = "2.0.0"
+DEEPEP_V2_DISTRIBUTION = "2.0.0+fa8a9b1"
+DEEPEP_V2_JIT_RANDOM_SEED = "collectivex-deepep-v2-fa8a9b1"
+TORCH_VERSION = "2.10.0+cu130"
+NCCL_VERSION = "2.30.4"
+NVSHMEM_VERSION = "3.3.9"
+DEEPEP_V2_JIT_KERNELS = contracts.DEEPEP_V2_JIT_KERNELS
+
+
+def _sha256(path: str) -> str:
+    digest = hashlib.sha256()
+    with open(path, "rb") as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+
+def _api_sha256() -> str:
+    signatures = {
+        "ElasticBuffer.__init__": str(inspect.signature(ElasticBuffer.__init__)),
+        "ElasticBuffer.dispatch": str(inspect.signature(ElasticBuffer.dispatch)),
+        "ElasticBuffer.combine": str(inspect.signature(ElasticBuffer.combine)),
+    }
+    return hashlib.sha256(
+        json.dumps(signatures, sort_keys=True, separators=(",", ":")).encode()
+    ).hexdigest()
+
+
+def _loaded_library_paths() -> set[str]:
+    extension = getattr(getattr(deep_ep, "_C", None), "__file__", None)
+    if not extension or not os.path.isfile(extension):
+        raise RuntimeError("DeepEP V2 extension library is not loaded")
+    paths = {os.path.realpath(extension)}
+    try:
+        with open("/proc/self/maps", encoding="utf-8") as handle:
+            for line in handle:
+                path = line.rstrip().split()[-1]
+                name = os.path.basename(path)
+                if ("libnccl.so" in name or "libnvshmem_host.so" in name) and os.path.isfile(path):
+                    paths.add(os.path.realpath(path))
+    except OSError as exc:  # pragma: no cover - benchmark runtime is Linux
+        raise RuntimeError("cannot inspect loaded communication libraries") from exc
+    return paths
+
+
+def _loaded_nccl_version() -> str:
+    matches = [
+        path for path in _loaded_library_paths()
+        if "libnccl.so" in os.path.basename(path)
+    ]
+    if len(matches) != 1:
+        raise RuntimeError("expected exactly one loaded NCCL library")
+    version = ctypes.c_int()
+    if ctypes.CDLL(matches[0]).ncclGetVersion(ctypes.byref(version)) != 0:
+        raise RuntimeError("loaded NCCL version query failed")
+    return ep_harness.format_collective_version(version.value)
+
+
+def _loaded_library_evidence() -> list[dict[str, str]]:
+    """Return content identities, never private library paths."""
+    paths = _loaded_library_paths()
+    required = {
+        "nccl": [path for path in paths if "libnccl.so" in os.path.basename(path)],
+        "nvshmem": [path for path in paths if "libnvshmem_host.so" in os.path.basename(path)],
+    }
+    mismatches = [f"{name}={len(matches)}" for name, matches in required.items() if len(matches) != 1]
+    if mismatches:
+        raise RuntimeError("expected one loaded library for each dependency: " + ", ".join(mismatches))
+
+    def role(path: str) -> str:
+        name = os.path.basename(path)
+        if "libnccl.so" in name:
+            return "nccl"
+        if "libnvshmem_host.so" in name:
+            return "nvshmem"
+        return "deepep-extension"
+
+    def label(path: str) -> str:
+        return "deep_ep._C" if role(path) == "deepep-extension" else os.path.basename(path)
+
+    return sorted(
+        ({"role": role(path), "name": label(path), "sha256": _sha256(path)} for path in paths),
+        key=lambda item: (item["role"], item["name"], item["sha256"]),
+    )
+
+
+def _jit_artifact_evidence() -> list[dict[str, str]]:
+    root = Path(os.environ["EP_JIT_CACHE_DIR"]) / "cache"
+    if root.is_symlink() or not root.is_dir():
+        raise RuntimeError("DeepEP V2 produced no JIT cache evidence")
+    artifacts = []
+    kernel_names = set()
+    for directory in sorted(root.iterdir(), key=lambda item: item.name):
+        match = re.fullmatch(r"kernel\.([A-Za-z0-9_+-]+)\.([0-9a-f]{32})", directory.name)
+        if directory.is_symlink() or not directory.is_dir() or match is None:
+            raise RuntimeError("DeepEP V2 JIT cache contains an invalid entry")
+        if {path.name for path in directory.iterdir()} != {
+            "kernel.cu", "kernel.cubin", "kernel.sass",
+        }:
+            raise RuntimeError("DeepEP V2 JIT kernel evidence is incomplete")
+        source = directory / "kernel.cu"
+        cubin = directory / "kernel.cubin"
+        sass = directory / "kernel.sass"
+        if any(path.is_symlink() or not path.is_file() for path in (source, cubin, sass)):
+            raise RuntimeError("DeepEP V2 JIT evidence is not a regular file")
+        if any(path.stat().st_size <= 0 for path in (source, cubin, sass)):
+            raise RuntimeError("DeepEP V2 JIT evidence is empty")
+        kernel_names.add(match.group(1))
+        artifacts.append({
+            "cache_key": directory.name,
+            "source_sha256": _sha256(str(source)),
+            "sass_sha256": _sha256(str(sass)),
+            "cubin_sha256": _sha256(str(cubin)),
+        })
+    if (
+        len(artifacts) != len(DEEPEP_V2_JIT_KERNELS)
+        or kernel_names != DEEPEP_V2_JIT_KERNELS
+    ):
+        raise RuntimeError("DeepEP V2 JIT kernel set differs from the v1 contract")
+    return sorted(artifacts, key=lambda item: item["cache_key"])
+
+
+def _jit_cache_key(
+    args,
+    world_size: int,
+    max_tokens: int,
+    allow_hybrid_mode: bool,
+    realized: dict[str, int | bool],
+) -> str:
+    """Key generated kernels by codegen inputs, not routing data or case identity."""
+    payload = {
+        "contract": "deepep-v2-jit-config-v3",
+        "runner": args.runner,
+        "world_size": world_size,
+        "hidden": args.hidden,
+        "topk": args.topk,
+        "physical_experts": args.experts,
+        "tuning_experts": getattr(args, "num_logical_experts", args.experts),
+        "max_tokens": max_tokens,
+        "dispatch_dtype": "bf16",
+        "combine_dtype": "bf16",
+        "input_layout": "bf16-no-sf",
+        "expert_alignment": 1,
+        "do_cpu_sync": True,
+        "cached_mode": False,
+        "do_expand": False,
+        "use_expanded_layout": False,
+        "allow_hybrid_mode": allow_hybrid_mode,
+        "allow_multiple_reduction": True,
+        "prefer_overlap_with_compute": True,
+        "deterministic": False,
+        **realized,
+    }
+    return "jitcfg-v3-" + hashlib.sha256(
+        json.dumps(payload, sort_keys=True, separators=(",", ":")).encode()
+    ).hexdigest()
+
+
+def _require_cross_rank_equal(value, label: str) -> None:
+    gathered = [None] * dist.get_world_size()
+    dist.all_gather_object(gathered, value)
+    canonical = {json.dumps(item, sort_keys=True, separators=(",", ":")) for item in gathered}
+    if len(canonical) != 1:
+        raise RuntimeError(f"DeepEP V2 {label} differs across ranks")
+
+
+def _configure_gin_mode(args, world_size: int) -> bool:
+    scale_up_domain = int(
+        getattr(args, "scale_up_domain", None)
+        or getattr(args, "gpus_per_node", None)
+        or world_size
+    )
+    allow_hybrid_mode = world_size > scale_up_domain
+    if allow_hybrid_mode:
+        os.environ.pop("EP_DISABLE_GIN", None)
+    else:
+        os.environ["EP_DISABLE_GIN"] = "1"
+    return allow_hybrid_mode
+
+
+def _lsa_topology_is_valid(
+    gin_enabled: bool, world_size: int, config: dict[str, int | bool]
+) -> bool:
+    return gin_enabled or (
+        config["physical_rdma_ranks"] == 1
+        and config["physical_nvlink_ranks"] == world_size
+        and config["logical_scaleout_ranks"] == 1
+        and config["logical_scaleup_ranks"] == world_size
+        and config["is_scaleup_nvlink"] is True
+    )
+
+
+def _require_runtime() -> tuple[str, str]:
+    expected = {
+        "DEEPEP_V2_PR": str(DEEPEP_V2_PR),
+        "DEEPEP_V2_FIX_PR": str(DEEPEP_V2_FIX_PR),
+        "DEEPEP_V2_COMMIT": DEEPEP_V2_COMMIT,
+        "DEEPEP_V2_TREE": DEEPEP_V2_TREE,
+        "DEEPEP_V2_FMT_COMMIT": DEEPEP_V2_FMT_COMMIT,
+        "DEEPEP_V2_JIT_RANDOM_SEED": DEEPEP_V2_JIT_RANDOM_SEED,
+        "EP_JIT_DUMP_SASS": "1",
+    }
+    mismatches = [
+        f"{name}={os.environ.get(name)!r}, expected {value!r}"
+        for name, value in expected.items()
+        if os.environ.get(name) != value
+    ]
+    torch_version = str(torch.__version__)
+    nccl_package_version = importlib.metadata.version("nvidia-nccl-cu13")
+    nvshmem_package_version = importlib.metadata.version("nvidia-nvshmem-cu12")
+    actual = {
+        "deep_ep": str(getattr(deep_ep, "__version__", "")),
+        "deep_ep distribution": importlib.metadata.version("deep_ep"),
+        "torch": torch_version,
+        "nvidia-nccl-cu13": nccl_package_version,
+        "nvidia-nvshmem-cu12": nvshmem_package_version,
+    }
+    required = {
+        "deep_ep": DEEPEP_V2_VERSION,
+        "deep_ep distribution": DEEPEP_V2_DISTRIBUTION,
+        "torch": TORCH_VERSION,
+        "nvidia-nccl-cu13": NCCL_VERSION,
+        "nvidia-nvshmem-cu12": NVSHMEM_VERSION,
+    }
+    mismatches.extend(
+        f"{name}={actual[name]!r}, expected {value!r}"
+        for name, value in required.items()
+        if actual[name] != value
+    )
+    if not inspect.isclass(ElasticBuffer) or ElasticBuffer.__name__ != "ElasticBuffer":
+        mismatches.append("deep_ep.ElasticBuffer is absent")
+    if os.environ.get("EP_SUPPRESS_NCCL_CHECK"):
+        mismatches.append("EP_SUPPRESS_NCCL_CHECK must be unset")
+    nccl_runtime_version = _loaded_nccl_version()
+    if nccl_runtime_version != NCCL_VERSION:
+        mismatches.append(
+            f"loaded NCCL={nccl_runtime_version!r}, expected {NCCL_VERSION!r}"
+        )
+    if mismatches:
+        raise RuntimeError("invalid DeepEP V2 runtime: " + "; ".join(mismatches))
+    return torch_version, nccl_runtime_version
+
+
+class DeepEPV2Backend:
+    name = "deepep-v2"
+    combine_needs_redispatch = False
+    combine_weight_semantics = "unweighted-rank-sum"
+
+    def __init__(self, args, rank, world_size, local_rank, device):
+        self.args = args
+        self.rank = rank
+        self.world_size = world_size
+        self.device = device
+        self.mode = "normal"
+        self.group = dist.group.WORLD
+        torch_version, nccl_runtime_version = _require_runtime()
+        ladder, _ = ep_harness.token_ladder(args.tokens_ladder, args.phase, None)
+        conditioning = ep_harness.CONDITIONING_LADDERS[args.phase]
+        self.max_tokens = max([*ladder, *conditioning])
+        jit_root = Path(os.environ["EP_JIT_CACHE_DIR"])
+        allow_hybrid_mode = _configure_gin_mode(args, world_size)
+        gin_enabled = allow_hybrid_mode
+        communication_backend = "nccl-gin" if gin_enabled else "nccl-device-lsa"
+        self._deferred_jit_snapshot = None
+        self.buffer = ElasticBuffer(
+            self.group,
+            num_max_tokens_per_rank=self.max_tokens,
+            hidden=args.hidden,
+            num_topk=args.topk,
+            use_fp8_dispatch=False,
+            deterministic=False,
+            allow_hybrid_mode=allow_hybrid_mode,
+            allow_multiple_reduction=True,
+            prefer_overlap_with_compute=True,
+            num_gpu_timeout_secs=100,
+            explicitly_destroy=True,
+        )
+        tuning_num_experts = int(getattr(args, "num_logical_experts", args.experts))
+        self.num_sms = int(
+            self.buffer.get_theoretical_num_sms(tuning_num_experts, args.topk)
+        )
+        self.num_qps = int(self.buffer.get_theoretical_num_qps(self.num_sms))
+        properties = torch.cuda.get_device_properties(device)
+        device_sms = int(properties.multi_processor_count)
+        jit_config = {
+            "num_sms": self.num_sms,
+            "num_qps": self.num_qps,
+            "allocated_qps": int(self.buffer.num_allocated_qps),
+            "logical_scaleout_ranks": int(self.buffer.num_scaleout_ranks),
+            "logical_scaleup_ranks": int(self.buffer.num_scaleup_ranks),
+            "physical_rdma_ranks": int(self.buffer.num_rdma_ranks),
+            "physical_nvlink_ranks": int(self.buffer.num_nvlink_ranks),
+            "is_scaleup_nvlink": self.buffer.num_scaleup_ranks == self.buffer.num_nvlink_ranks,
+            "device_arch_major": int(properties.major),
+            "device_arch_minor": int(properties.minor),
+            "device_sms": device_sms,
+            "device_smem_bytes": int(properties.shared_memory_per_block_optin),
+            "gpu_timeout_cycles": 100 * int(properties.clock_rate) * 1000,
+        }
+        _require_cross_rank_equal(jit_config, "JIT configuration")
+        if not _lsa_topology_is_valid(gin_enabled, world_size, jit_config):
+            raise RuntimeError("DeepEP V2 no-GIN run is outside one realized LSA domain")
+        self.jit_cache_key = _jit_cache_key(
+            args, world_size, self.max_tokens, allow_hybrid_mode, jit_config
+        )
+        os.environ["EP_JIT_CACHE_DIR"] = str(jit_root / self.jit_cache_key)
+        realized_config = {
+            "jit_cache_key": self.jit_cache_key,
+            "num_max_tokens_per_rank": self.max_tokens,
+            **jit_config,
+        }
+        _require_cross_rank_equal(realized_config, "realized tuning/topology")
+        comm = getattr(self.buffer, "nccl_comm_handle", None)
+        communicator = (
+            "deepep-managed" if getattr(comm, "managed", True) else "pytorch-reused"
+        )
+
+        loaded_libraries = _loaded_library_evidence()
+        _require_cross_rank_equal(loaded_libraries, "loaded libraries")
+        self.backend_provenance = {
+            "deepep_version": DEEPEP_V2_VERSION,
+            "deepep_distribution_version": importlib.metadata.version("deep_ep"),
+            "deepep_commit": DEEPEP_V2_COMMIT,
+            "deepep_tree": DEEPEP_V2_TREE,
+            "deepep_pr": DEEPEP_V2_PR,
+            "deepep_fix_pr": DEEPEP_V2_FIX_PR,
+            "fmt_commit": DEEPEP_V2_FMT_COMMIT,
+            "api": "deep_ep.ElasticBuffer",
+            "api_signature_sha256": _api_sha256(),
+            "communication_backend": communication_backend,
+            "gin_enabled": gin_enabled,
+            "nccl_communicator": communicator,
+            "torch_version": torch_version,
+            "torch_git_version": str(torch.version.git_version),
+            "cuda_version": str(torch.version.cuda),
+            "nccl_package_version": importlib.metadata.version("nvidia-nccl-cu13"),
+            "nccl_version": nccl_runtime_version,
+            "nvshmem_package_version": importlib.metadata.version("nvidia-nvshmem-cu12"),
+            "loaded_libraries": loaded_libraries,
+            "jit_cache_key": self.jit_cache_key,
+            "jit_cubins": [],
+            "jit_random_seed": DEEPEP_V2_JIT_RANDOM_SEED,
+            "num_experts": int(args.experts),
+            "mode": "normal",
+            "dispatch_dtype": "bf16",
+            "combine_dtype": "bf16",
+            "deterministic": False,
+            "resource_mode": "tuned",
+            "requested_num_sms": self.num_sms,
+            "tuning_num_experts": tuning_num_experts,
+            "num_sms": self.num_sms,
+            "num_qps": self.num_qps,
+            "allocated_qps": int(self.buffer.num_allocated_qps),
+            "device_sms": device_sms,
+            "sm_fraction": self.num_sms / device_sms,
+            "tuned_source": "deepep-v2-analytical-sm-qp-logical-experts-v1",
+            "num_max_tokens_per_rank": self.max_tokens,
+            "allow_hybrid_mode": bool(self.buffer.allow_hybrid_mode),
+            "allow_multiple_reduction": bool(self.buffer.allow_multiple_reduction),
+            "prefer_overlap_with_compute": bool(
+                self.buffer.prefer_overlap_with_compute
+            ),
+            "logical_scaleout_ranks": int(self.buffer.num_scaleout_ranks),
+            "logical_scaleup_ranks": int(self.buffer.num_scaleup_ranks),
+            "physical_rdma_ranks": int(self.buffer.num_rdma_ranks),
+            "physical_nvlink_ranks": int(self.buffer.num_nvlink_ranks),
+        }
+
+    def buffer_cap(self, args):
+        return self.max_tokens
+
+    def make_problem(self, T, idx, weights, x):
+        return types.SimpleNamespace(
+            T=T,
+            x=x,
+            topk_idx=idx.to(deep_ep.topk_idx_t),
+            topk_weights=weights.to(torch.float32),
+        )
+
+    def dispatch(self, p):
+        recv_x, recv_topk_idx, recv_topk_weights, handle, _ = self.buffer.dispatch(
+            p.x,
+            topk_idx=p.topk_idx,
+            topk_weights=p.topk_weights,
+            num_experts=self.args.experts,
+            num_max_tokens_per_rank=self.max_tokens,
+            expert_alignment=1,
+            num_sms=self.num_sms,
+            num_qps=self.num_qps,
+            async_with_compute_stream=False,
+            do_handle_copy=True,
+            do_cpu_sync=True,
+            do_expand=False,
+        )
+        return types.SimpleNamespace(
+            recv_x=recv_x,
+            recv_topk_idx=recv_topk_idx,
+            recv_topk_weights=recv_topk_weights,
+            handle=handle,
+        )
+
+    def stage(self, p, h):
+        h.combine_input = h.recv_x
+
+    def combine(self, p, h):
+        combined_x, _, _ = self.buffer.combine(
+            h.combine_input,
+            handle=h.handle,
+            num_sms=self.num_sms,
+            num_qps=self.num_qps,
+            async_with_compute_stream=False,
+        )
+        return combined_x
+
+    def capture_deferred_provenance(self):
+        # destroy() uses this same barrier. Materialize its JIT kernel before hashing the
+        # implementation so the first and later routing cases see identical evidence.
+        self.buffer.barrier(use_comm_stream=True, with_cpu_sync=True)
+        torch.cuda.synchronize()
+        jit_cubins = _jit_artifact_evidence()
+        _require_cross_rank_equal(jit_cubins, "JIT CUBINs")
+        if (
+            self._deferred_jit_snapshot is not None
+            and jit_cubins != self._deferred_jit_snapshot
+        ):
+            raise RuntimeError("DeepEP V2 JIT CUBIN set changed after measurement")
+        self._deferred_jit_snapshot = jit_cubins
+        self.backend_provenance["jit_cubins"] = jit_cubins
+
+    def inspect_dispatch(self, p, h):
+        count = self.recv_tokens(h)
+        local_idx = h.recv_topk_idx[:count]
+        valid = local_idx >= 0
+        expert_ids = torch.where(
+            valid,
+            local_idx + self.rank * (self.args.experts // self.world_size),
+            local_idx,
+        )
+        local = local_idx[valid].to(torch.int64)
+        return types.SimpleNamespace(
+            payload=h.recv_x[:count],
+            expert_ids=expert_ids,
+            weights=h.recv_topk_weights[:count].masked_fill(~valid, 0),
+            local_expert_counts=torch.bincount(
+                local, minlength=self.args.experts // self.world_size
+            ),
+            ordering_contract="elastic-source-metadata-v1",
+        )
+
+    def combine_transformed(self, p, h, transformed):
+        combine_input = torch.zeros_like(h.recv_x)
+        combine_input[: transformed.shape[0]].copy_(transformed.to(combine_input.dtype))
+        combined, _, _ = self.buffer.combine(
+            combine_input,
+            handle=h.handle,
+            num_sms=self.num_sms,
+            num_qps=self.num_qps,
+            async_with_compute_stream=False,
+        )
+        return combined
+
+    def recv_tokens(self, h):
+        return int(h.handle.psum_num_recv_tokens_per_scaleup_rank[-1].item())
+
+    def finalize(self, rc):
+        try:
+            dist.barrier()
+            self.buffer.destroy()
+            dist.barrier()
+            dist.destroy_process_group()
+        except Exception:
+            return 1
+        return rc
diff --git a/experimental/CollectiveX/tests/ep_harness.py b/experimental/CollectiveX/tests/ep_harness.py
new file mode 100644
index 0000000000..ca9dee8fcf
--- /dev/null
+++ b/experimental/CollectiveX/tests/ep_harness.py
@@ -0,0 +1,1362 @@
+#!/usr/bin/env python3
+"""CollectiveX — shared EP (expert-parallel) dispatch/combine benchmark harness.
+
+Backend-agnostic core. The per-backend adapters (`ep_deepep.py`, `ep_mori.py`)
+implement a small duck-typed protocol; this module owns the source-tokens-per-rank
+sweep, the timing, the correctness gate, and the provenance-tagged JSON doc.
+
+Fair-comparison contract (see docs/methodology.md):
+  * **Deterministic shared routing trace** (`routing.py`): the per-token expert IDs +
+    gate weights are generated once from a fixed seed over the *global* batch and are
+    identical on every SKU; each rank materializes its slice. So every platform runs
+    the *same* problem (no per-rank/per-platform RNG in the adapters).
+  * **Explicit measurement contract**: layout-and-dispatch-v1 includes routing-layout
+    generation in dispatch timing. Combine excludes staging.
+    Isolated sum is derived independently at each percentile and is not a measured chained op.
+  * **Correct collective percentile**: each iteration's latency is reduced MAX across
+    ranks first (a collective finishes with its slowest rank), THEN percentiled —
+    `median_i(max_r)`, not `max_r(median_i)`.
+  * **One line = one fixed config**; only T varies. Both `tokens_per_rank` and
+    `global_tokens = T * ep_size` are recorded as explicit chart coordinates.
+
+stdlib-only at module top (torch is passed in by the entrypoint; `routing` is imported
+lazily inside run_sweep) so this file `py_compile`s without torch.
+
+Backend protocol:
+    name, mode, combine_needs_redispatch, backend_provenance(dict)
+    buffer_cap(args) -> int|None
+    make_problem(T, idx, weights, x) -> problem   # materialize this rank's trace slice
+    dispatch(problem) -> handle                   # pure dispatch comm (timed)
+    stage(problem, handle)                        # untimed expert-output placement
+    combine(problem, handle) -> tensor            # pure combine comm (timed)
+    inspect_dispatch(problem, handle) -> view     # normalized payload/expert/weight metadata
+    combine_transformed(problem, handle, tensor) -> tensor
+    recv_tokens(handle) -> int                    # realized tokens received this rank
+    finalize(rc) -> int|NoReturn
+"""
+from __future__ import annotations
+
+import argparse
+import datetime as _dt
+import hashlib
+import json
+import math
+import os
+
+import contracts
+import identity
+import workload as workload_contract
+
+# Raw v1 result emitted by one benchmark case. Publication uses a separate contract.
+SCHEMA_VERSION = 1
+
+# Every comparison-grade EP point uses the same literal timing profile on every SKU/backend.
+# Eight timed iterations keep each MoRI burst well below its sustained-iteration wedge, 64 trials
+# provide 512 observations per operation, and 32 warmups meet Blackwell's measured clock-ramp floor.
+SAMPLING_CONTRACT = identity.V1_CASE_PROFILE["sampling_contract"]
+TIMED_SAMPLES_PER_POINT = 512
+TIMED_ITERS_PER_TRIAL = 8
+TRIALS_PER_POINT = 64
+WARMUP_ITERS_PER_TRIAL = 32
+WARMUP_SEMANTICS = "full-roundtrip-before-each-component-trial-point-v1"
+ROUTING_SEED = 67
+ROUTING_GENERATOR = workload_contract.GENERATOR_VERSION
+ACTIVATION_PROFILE = "canonical-counter-source-v3"
+ACTIVATION_GENERATOR = workload_contract.ACTIVATION_GENERATOR
+PLACEMENT = "packed"
+COMPONENT_ORDER_CONTRACT = "roundtrip-dispatch-activation-only-combine-v2"
+
+# Phase-default sweeps — token-size regimes, NOT distinct kernels (both run normal
+# mode; "decode"/"prefill" name the small/large-token regime). Powers of two for a
+# clean log x-axis; clamped to the backend buffer ceiling (MoRI's registerable heap).
+DECODE_LADDER = [1, 2, 4, 8, 16, 32, 64, 128]
+PREFILL_LADDER = [128, 256, 512, 1024, 2048, 4096]
+CONDITIONING_LADDERS = {
+    phase: list(ladder) for phase, ladder in contracts.V1_CONDITIONING_LADDERS.items()
+}
+CONDITIONING_ROUNDS_PER_SHAPE = contracts.V1_CONDITIONING_ROUNDS_PER_SHAPE
+CONDITIONING_CONTRACT = identity.V1_CASE_PROFILE["conditioning_contract"]
+ORACLE_CONTRACT = identity.V1_CASE_PROFILE["oracle_contract"]
+ORACLE_RTOL = 5e-2
+ORACLE_ATOL = 2e-2
+
+BF16_BYTES = 2
+EPLB_REDUNDANT_EXPERTS = 32
+EPLB_REFERENCE_TOKENS_PER_RANK = 2048
+EPLB_PLANNER = "greedy-rank-major-v1"
+V1_PROFILE = {
+    "dispatch_dtype": "bf16",
+    "combine_dtype": "bf16",
+    "combine_quant_mode": "none",
+    "mode": "normal",
+    "measurement_contract": "layout-and-dispatch-v1",
+    "resource_mode": "tuned",
+    "placement": PLACEMENT,
+    "activation_profile": ACTIVATION_PROFILE,
+    "activation_generator": ACTIVATION_GENERATOR,
+    "routing_generator": ROUTING_GENERATOR,
+    "component_order_contract": COMPONENT_ORDER_CONTRACT,
+    "conditioning_contract": CONDITIONING_CONTRACT,
+    "eplb_reference_tokens_per_rank": EPLB_REFERENCE_TOKENS_PER_RANK,
+    "eplb_redundant_experts": EPLB_REDUNDANT_EXPERTS,
+    "eplb_planner": EPLB_PLANNER,
+    # DeepEP/UCCL use this only as the fallback when their tuned default is not exported.
+    "num_sms": 24,
+}
+
+def format_collective_version(raw) -> str:
+    """Normalize PyTorch's tuple or packed NCCL/RCCL version representation."""
+    if isinstance(raw, int):
+        if raw < 10_000:
+            return f"{raw // 1000}.{raw // 100 % 10}.{raw % 100}"
+        return f"{raw // 10_000}.{raw // 100 % 100}.{raw % 100}"
+    if isinstance(raw, (tuple, list)):
+        return ".".join(map(str, raw))
+    return str(raw) if raw not in (None, "") else "unknown"
+
+
+def add_common_args(ap: argparse.ArgumentParser) -> None:
+    """Add the varying v1 inputs; fixed profile values are not CLI axes."""
+    ap.set_defaults(**V1_PROFILE)
+    ap.add_argument("--phase", default="decode", choices=["decode", "prefill"],
+                    help="token-size regime: decode (small T) / prefill (large T) — picks the default ladder")
+    ap.add_argument("--tokens-ladder", default="",
+                    help="space/comma-separated source-tokens-per-rank sweep; blank = phase default")
+    ap.add_argument("--hidden", type=int, default=7168)
+    ap.add_argument("--topk", type=int, default=8)
+    ap.add_argument("--experts", type=int, default=256, help="TOTAL experts (fixed across EP degrees)")
+    ap.add_argument("--routing", default="uniform", choices=["uniform", "zipf"])
+    # EPLB (Expert-Parallel Load Balancer): replicate hot experts onto redundant physical
+    # slots + balanced-place so per-rank load equalizes. A pure routing-trace transform
+    # (tests/eplb.py); experts becomes num_logical+redundant. The remedy for `zipf` skew.
+    ap.add_argument("--eplb", action="store_true",
+                    help="apply EPLB expert replication/placement to the routing trace")
+    # Canonical workloads consume pre-generated trace bytes instead of the
+    # seeded runtime generator, so a result is provably the SAME workload as another machine's
+    # (checksum match). Points at a dir of <workload_id>.npz/.manifest.json (make_workloads.py).
+    ap.add_argument("--workload-dir", default="",
+                    help="dir of canonical workload traces; empty = seeded runtime generation (dev)")
+    ap.add_argument("--case-id", default="")
+    ap.add_argument("--suite", default="")
+    ap.add_argument("--workload-name", default="")
+    ap.add_argument("--required-publication", default="")
+    ap.add_argument("--seed", type=int, default=ROUTING_SEED)
+    # 32: B300/Blackwell needs ~30 untimed iters to reach steady-state GPU clocks +
+    # establish NVLink/NVSHMEM connections — at warmup=8 its dispatch read ~1787us
+    # (cold), at warmup>=30 it settles to ~85us (faster than H100, reproducible within
+    # ~2.5%). H100/MI355X reach steady state much sooner; the extra iters are harmless.
+    ap.add_argument("--warmup", type=int, default=WARMUP_ITERS_PER_TRIAL,
+                    help=f"untimed full roundtrips before each trial/point; fixed by "
+                         f"{SAMPLING_CONTRACT} to {WARMUP_ITERS_PER_TRIAL}")
+    ap.add_argument("--iters", type=int, default=TIMED_ITERS_PER_TRIAL,
+                    help=f"timed iterations per trial; fixed by {SAMPLING_CONTRACT} to "
+                         f"{TIMED_ITERS_PER_TRIAL}")
+    ap.add_argument("--trials", type=int, default=TRIALS_PER_POINT,
+                    help=f"timed trials; fixed by {SAMPLING_CONTRACT} to {TRIALS_PER_POINT}")
+    # provenance / output
+    ap.add_argument("--runner", required=True)
+    ap.add_argument("--topology-class", required=True)
+    ap.add_argument("--transport", default="")
+    # gpus-per-node=0 means one node containing the whole EP group.
+    ap.add_argument("--gpus-per-node", type=int, default=0)
+    ap.add_argument("--scale-up-domain", type=int, default=0, help="0 = gpus_per_node*ep (one domain)")
+    ap.add_argument("--timestamp")
+    ap.add_argument("--out", required=True)
+
+
+def token_ladder(spec: str, phase: str, cap: int | None) -> tuple[list[int], list[int]]:
+    """Return (ladder, dropped): explicit spec else the phase default; positive ints;
+    clamped to `cap` with dropped points reported (never silently truncated)."""
+    if spec and spec.strip():
+        want = [int(t) for t in spec.replace(",", " ").split() if t]
+    else:
+        want = DECODE_LADDER if phase == "decode" else PREFILL_LADDER
+    want = sorted({t for t in want if t > 0})
+    if cap is not None:
+        return [t for t in want if t <= cap], [t for t in want if t > cap]
+    return want, []
+
+
+def sampling_contract_error(iters: int, trials: int, warmup: int) -> str | None:
+    """Return a user-facing error unless the exact cross-SKU timing profile is used."""
+    expected = (TIMED_ITERS_PER_TRIAL, TRIALS_PER_POINT, WARMUP_ITERS_PER_TRIAL)
+    observed = (iters, trials, warmup)
+    if observed != expected:
+        return (f"{SAMPLING_CONTRACT} requires exactly iters:trials:warmup="
+                f"{expected[0]}:{expected[1]}:{expected[2]} on every SKU/backend; got "
+                f"{observed[0]}:{observed[1]}:{observed[2]} "
+                f"({iters * trials if iters > 0 and trials > 0 else 'invalid'} timed samples)")
+    return None
+
+
+def _stats_vec(xs: list[int]) -> dict:
+    """min/mean/max/CV (+ empty count) of a per-rank count vector — self-describing source-token
+    or load summary without dumping the full vector."""
+    n = len(xs) or 1
+    mean = sum(xs) / n
+    var = sum((x - mean) ** 2 for x in xs) / n
+    cv = (var ** 0.5 / mean) if mean > 0 else 0.0
+    return {"min": min(xs) if xs else 0, "mean": round(mean, 3),
+            "max": max(xs) if xs else 0, "cv": round(cv, 4),
+            "empty_ranks": sum(1 for x in xs if x == 0), "total": sum(xs), "ranks": n}
+
+
+def percentile(xs: list[float], q: float) -> float:
+    if not xs:
+        return float("nan")
+    s = sorted(xs)
+    i = max(0, min(len(s) - 1, math.ceil(q / 100.0 * len(s)) - 1))
+    return s[i]
+
+
+def _sha256_json(value) -> str:
+    payload = json.dumps(
+        value, allow_nan=False, ensure_ascii=False, sort_keys=True, separators=(",", ":")
+    ).encode()
+    return hashlib.sha256(payload).hexdigest()
+
+
+def _series_provenance(provenance: dict) -> dict:
+    """Retain stable semantic build identity while keeping raw binaries diagnostic."""
+    return contracts.series_provenance(provenance)
+
+
+def _write_bytes_atomic(path: str, payload: bytes) -> tuple[str, int]:
+    os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
+    temporary = f"{path}.tmp-{os.getpid()}"
+    try:
+        with open(temporary, "wb") as handle:
+            handle.write(payload)
+            handle.flush()
+            os.fsync(handle.fileno())
+        os.replace(temporary, path)
+    finally:
+        try:
+            os.unlink(temporary)
+        except FileNotFoundError:
+            pass
+    return hashlib.sha256(payload).hexdigest(), len(payload)
+
+
+def _write_json_atomic(path: str, value) -> tuple[str, int]:
+    payload = (
+        json.dumps(value, allow_nan=False, ensure_ascii=False, indent=2) + "\n"
+    ).encode()
+    return _write_bytes_atomic(path, payload)
+
+
+def time_us(torch, fn, warmup: int, iters: int, pre=None) -> list[float]:
+    """Per-iteration CUDA-event latencies (µs) for THIS rank.
+
+    Without `pre`: times `fn()`. With `pre`: runs `pre()` UNTIMED each iteration (sync
+    before the start event so its GPU work can't bleed in), then times `fn(pre_result)`
+    — how combine is isolated when it consumes the dispatch state and needs a fresh
+    untimed dispatch+stage before every sample. Returns the raw per-iteration series;
+    the caller reduces across ranks per iteration before percentiling.
+    """
+    def sample():
+        arg = pre() if pre is not None else None
+        if pre is not None:
+            torch.cuda.synchronize()
+        s = torch.cuda.Event(enable_timing=True)
+        e = torch.cuda.Event(enable_timing=True)
+        s.record()
+        fn(arg) if pre is not None else fn()
+        e.record()
+        torch.cuda.synchronize()
+        return s.elapsed_time(e) * 1000.0  # ms -> us
+
+    for _ in range(max(0, warmup)):
+        if pre is not None:
+            a = pre()
+            torch.cuda.synchronize()
+            fn(a)
+        else:
+            fn()
+        # sync EACH warmup iteration, not just once after the loop: the measured-roundtrip fn
+        # interleaves dispatch+combine on a backend's persistent comm buffer, so back-to-back
+        # un-synced warmup iterations let iter N+1's dispatch race iter N's combine (CUDA abort
+        # on a rank -> NCCL-watchdog SIGABRT). Cheap (warmup is small); timed samples already sync.
+        torch.cuda.synchronize()
+    return [sample() for _ in range(iters)]
+
+
+def kernel_generation(backend) -> str:
+    """Return the adapter's explicit kernel family when one exists."""
+    declared = getattr(backend, "kernel_generation", None)
+    if declared:
+        return declared
+    return {
+        "deepep": "v1",
+        "deepep-v2": "v2-elastic-buffer",
+        "deepep-hybrid": "hybrid",
+    }.get(backend.name, "n-a")
+
+
+def _reduce_vec(torch, dist, device, vals, op):
+    t = torch.tensor(vals, device=device, dtype=torch.float64)
+    dist.all_reduce(t, op=op)
+    return [float(x) for x in t.tolist()]
+
+
+def _reduce_int(torch, dist, device, v: int, op) -> int:
+    t = torch.tensor([int(v)], device=device, dtype=torch.int64)
+    dist.all_reduce(t, op=op)
+    return int(t.item())
+
+
+def _same_hash_across_ranks(torch, dist, device, digest: str) -> bool:
+    parts = [int(digest[offset:offset + 8], 16) for offset in range(0, 64, 8)]
+    low = torch.tensor(parts, device=device, dtype=torch.int64)
+    high = low.clone()
+    dist.all_reduce(low, op=dist.ReduceOp.MIN)
+    dist.all_reduce(high, op=dist.ReduceOp.MAX)
+    return bool(torch.equal(low, high))
+
+
+def _tensor_sha256(*tensors) -> str:
+    digest = hashlib.sha256()
+    for tensor in tensors:
+        digest.update(tensor.detach().contiguous().cpu().numpy().tobytes())
+    return digest.hexdigest()
+
+
+def _normalized_expert_metadata(torch, expert_ids, weights):
+    """Sort each row by global expert ID while keeping -1 sentinels last."""
+    valid = expert_ids >= 0
+    keys = torch.where(valid, expert_ids.to(torch.int64), torch.full_like(expert_ids, 1 << 30))
+    order = torch.argsort(keys, dim=1, stable=True)
+    sorted_ids = torch.gather(expert_ids.to(torch.int64), 1, order)
+    sorted_weights = torch.gather(weights.to(torch.float32), 1, order)
+    sorted_valid = sorted_ids >= 0
+    return (
+        torch.where(sorted_valid, sorted_ids, torch.full_like(sorted_ids, -1)),
+        sorted_weights.masked_fill(~sorted_valid, 0),
+    )
+
+
+def _expert_transform(torch, payload, expert_ids, weights, combine_weight_semantics):
+    """Build one local expert aggregate for the v1 unweighted combine contract."""
+    if combine_weight_semantics != "unweighted-rank-sum":
+        raise ValueError("v1 requires unweighted rank-sum combine")
+    valid = expert_ids >= 0
+    expert = expert_ids.clamp(min=0).to(torch.int64)
+    gate = weights.to(torch.float32).masked_fill(~valid, 0)
+    scale = ((expert * 17 + 5) % 31 + 1).to(torch.float32) / 32
+    offset_a = (((expert * 29 + 7) % 37) - 18).to(torch.float32) / 64
+    offset_b = (((expert * 43 + 11) % 41) - 20).to(torch.float32) / 128
+    scale_sum = (gate * scale).sum(dim=1, keepdim=True)
+    offset_a_sum = (gate * offset_a).sum(dim=1, keepdim=True)
+    offset_b_sum = (gate * offset_b).sum(dim=1, keepdim=True)
+    columns = torch.arange(payload.shape[1], device=payload.device, dtype=torch.int64)
+    pattern = (((columns * 13) % 17) - 8).to(torch.float32) / 8
+    transformed = (
+        payload.float() * scale_sum + offset_a_sum + offset_b_sum * pattern.unsqueeze(0)
+    )
+    return transformed.to(payload.dtype)
+
+
+def _expected_transformed_combine(torch, problem):
+    """Independently derive sum_i gate_i * expert_i(x) for each source token."""
+    expected = torch.zeros_like(problem.x, dtype=torch.float32)
+    expert_ids = problem.topk_idx.to(torch.int64)
+    weights = problem.topk_weights.to(torch.float32)
+    columns = torch.arange(problem.x.shape[1], device=problem.x.device, dtype=torch.int64)
+    pattern = (((columns * 13) % 17) - 8).to(torch.float32) / 8
+    for slot in range(expert_ids.shape[1]):
+        expert = expert_ids[:, slot]
+        gate = weights[:, slot].unsqueeze(1)
+        scale = (((expert * 17 + 5) % 31 + 1).to(torch.float32) / 32).unsqueeze(1)
+        offset_a = ((((expert * 29 + 7) % 37) - 18).to(torch.float32) / 64).unsqueeze(1)
+        offset_b = ((((expert * 43 + 11) % 41) - 20).to(torch.float32) / 128).unsqueeze(1)
+        expert_output = problem.x.float() * scale + offset_a + offset_b * pattern.unsqueeze(0)
+        expected.add_(gate * expert_output)
+    return expected
+
+
+def _run_expert_oracle(
+    torch,
+    routing,
+    backend,
+    problem,
+    global_idx,
+    global_weights,
+    rank: int,
+    experts_per_rank: int,
+    seed: int,
+):
+    """Verify one real dispatch/transform/combine without entering a timed region."""
+    handle = backend.dispatch(problem)
+    torch.cuda.synchronize()
+    try:
+        view = backend.inspect_dispatch(problem, handle)
+        source_ids = routing.decode_source_ids(view.payload, seed)
+    except Exception as inspection_error:
+        try:
+            problem.recv_tokens = backend.recv_tokens(handle)
+            backend.stage(problem, handle)
+            backend.combine(problem, handle)
+            torch.cuda.synchronize()
+        except Exception as cleanup_error:
+            raise inspection_error from cleanup_error
+        return {
+            "contract": ORACLE_CONTRACT,
+            "passed": False,
+            "ordering_contract": "adapter-inspection-failed",
+            "order_sha256": None,
+            "dispatch_sha256": None,
+            "combine_weight_semantics": getattr(
+                backend, "combine_weight_semantics", "undeclared"
+            ),
+            "receive_count": 0,
+            "atol": ORACLE_ATOL,
+            "max_absolute_error": None,
+            "max_elementwise_relative_error": None,
+            "max_relative_error": None,
+            "max_weight_error": None,
+            "rtol": ORACLE_RTOL,
+            "checks": {
+                "combine_values": False,
+                "counts": False,
+                "metadata": False,
+                "multiplicity": False,
+                "payload": False,
+                "source_set": False,
+                "weights": False,
+            },
+        }
+
+    receive_count = int(view.payload.shape[0])
+    shape_ok = (
+        view.payload.ndim == 2
+        and view.expert_ids.shape == (receive_count, problem.topk_idx.shape[1])
+        and view.weights.shape == view.expert_ids.shape
+    )
+    source_range = bool(
+        receive_count == 0
+        or ((source_ids >= 0) & (source_ids < global_idx.shape[0])).all().item()
+    )
+    if source_range:
+        expected_idx = global_idx.to(problem.x.device).index_select(0, source_ids)
+        expected_weights = global_weights.to(problem.x.device).index_select(0, source_ids)
+        local = (expected_idx // experts_per_rank) == rank
+        expected_ids = torch.where(local, expected_idx, torch.full_like(expected_idx, -1))
+        expected_weights = expected_weights.masked_fill(~local, 0)
+        expected_payload = routing.activations_for_source_ids(
+            source_ids, problem.x.shape[1], seed, problem.x.dtype
+        )
+    else:
+        expected_ids = torch.full_like(view.expert_ids, -1)
+        expected_weights = torch.zeros_like(view.weights)
+        expected_payload = torch.empty_like(view.payload)
+    actual_ids, actual_weights = _normalized_expert_metadata(
+        torch, view.expert_ids, view.weights
+    )
+    expected_ids, expected_weights = _normalized_expert_metadata(
+        torch, expected_ids, expected_weights
+    )
+    expected_sources = (
+        ((global_idx // experts_per_rank) == rank).any(dim=1).nonzero(as_tuple=True)[0]
+    ).to(problem.x.device)
+    source_set_ok = (
+        source_range
+        and source_ids.numel() == torch.unique(source_ids).numel()
+        and torch.equal(torch.sort(source_ids).values, expected_sources)
+    )
+    payload_ok = source_range and torch.equal(view.payload, expected_payload)
+    metadata_ok = shape_ok and torch.equal(actual_ids, expected_ids)
+    max_weight_error = (
+        float((actual_weights - expected_weights).abs().max().item())
+        if actual_weights.numel()
+        else 0.0
+    )
+    weights_ok = max_weight_error == 0.0
+    valid_expected = expected_ids >= 0
+    expected_local = expected_ids[valid_expected] - rank * experts_per_rank
+    expected_counts = torch.bincount(expected_local, minlength=experts_per_rank)
+    counts_ok = torch.equal(
+        view.local_expert_counts.to(torch.int64), expected_counts.to(torch.int64)
+    )
+    multiplicity_ok = torch.equal(
+        (actual_ids >= 0).sum(dim=1), (expected_ids >= 0).sum(dim=1)
+    )
+    # Receive-slot assignment may use atomics and is not a semantic EP guarantee. Compare
+    # pre/post dispatch evidence in canonical source-token order without changing the native path.
+    canonical_order = torch.argsort(source_ids.to(torch.int64), stable=True)
+    canonical_sources = source_ids.to(torch.int64).index_select(0, canonical_order)
+    canonical_ids = actual_ids.to(torch.int64).index_select(0, canonical_order)
+    canonical_weights = actual_weights.index_select(0, canonical_order)
+    ordering_contract = f"canonical-source-id-v1/{view.ordering_contract}"
+    order_sha256 = _tensor_sha256(canonical_sources)
+    dispatch_sha256 = _tensor_sha256(
+        canonical_sources, canonical_ids, canonical_weights
+    )
+
+    problem.recv_tokens = receive_count
+    combine_weight_semantics = backend.combine_weight_semantics
+    transformed = _expert_transform(
+        torch, view.payload, actual_ids, actual_weights, combine_weight_semantics
+    )
+    combined = backend.combine_transformed(problem, handle, transformed)
+    torch.cuda.synchronize()
+    expected_combined = _expected_transformed_combine(torch, problem)
+    if combined.shape == expected_combined.shape and combined.numel():
+        absolute_error = (combined.float() - expected_combined).abs()
+        max_absolute_error = float(absolute_error.max().item())
+        max_relative_error = max_absolute_error / (
+            float(expected_combined.abs().max().item()) + 1e-6
+        )
+        max_elementwise_relative_error = float(
+            (absolute_error / expected_combined.abs().clamp_min(ORACLE_ATOL)).max().item()
+        )
+        combine_values_ok = bool(torch.allclose(
+            combined.float(), expected_combined, rtol=ORACLE_RTOL, atol=ORACLE_ATOL
+        ))
+    elif combined.shape == expected_combined.shape:
+        max_absolute_error = 0.0
+        max_elementwise_relative_error = 0.0
+        max_relative_error = 0.0
+        combine_values_ok = True
+    else:
+        max_absolute_error = None
+        max_elementwise_relative_error = None
+        max_relative_error = None
+        combine_values_ok = False
+    tolerance = float(getattr(backend, "tolerance", 5e-2))
+    checks = {
+        "combine_values": combine_values_ok,
+        "counts": counts_ok,
+        "metadata": metadata_ok,
+        "multiplicity": multiplicity_ok,
+        "payload": payload_ok,
+        "source_set": source_set_ok,
+        "weights": weights_ok,
+    }
+    return {
+        "contract": ORACLE_CONTRACT,
+        "passed": bool(
+            all(checks.values())
+            and ordering_contract
+            and max_relative_error is not None
+            and max_relative_error < tolerance
+        ),
+        "atol": ORACLE_ATOL,
+        "combine_weight_semantics": combine_weight_semantics,
+        "ordering_contract": ordering_contract,
+        "order_sha256": order_sha256,
+        "dispatch_sha256": dispatch_sha256,
+        "receive_count": receive_count,
+        "max_absolute_error": max_absolute_error,
+        "max_elementwise_relative_error": max_elementwise_relative_error,
+        "max_relative_error": max_relative_error,
+        "max_weight_error": max_weight_error,
+        "rtol": ORACLE_RTOL,
+        "checks": checks,
+    }
+
+
+def _histogram(xs: list[float], nbins: int = 40) -> dict:
+    """Compact equal-width summary of the exact private cross-rank-max samples."""
+    if not xs:
+        return {"n": 0}
+    lo, hi = min(xs), max(xs)
+    if hi <= lo:
+        return {"n": len(xs), "min": lo, "max": hi, "bins": nbins, "counts": [len(xs)]}
+    counts = [0] * nbins
+    span = hi - lo
+    for x in xs:
+        b = min(nbins - 1, int((x - lo) / span * nbins))
+        counts[b] += 1
+    return {"n": len(xs), "min": round(lo, 3), "max": round(hi, 3), "bins": nbins, "counts": counts}
+
+
+def _derive_publication_status(v: dict) -> str:
+    """Classify raw attempts; only the isolated coverage publisher may promote evidence."""
+    if v["execution_status"] != "complete":
+        return "failed"
+    if v["semantic_correctness"] != "pass" or v["measurement_conformance"] != "conformant" \
+       or v["workload_identity"] == "inconsistent":
+        return "invalid"
+    # Per-case producers cannot prove exact matrix coverage, repeat stability, or controlled
+    # cohorts. Keep even sound attempts diagnostic until the isolated publisher validates them.
+    return "diagnostic"
+
+
+def run_sweep(args, backend, torch, dist, device, rank: int, world_size: int) -> int:
+    """Drive the source-tokens-per-rank sweep for one fully-specified line."""
+    sampling_error = sampling_contract_error(args.iters, args.trials, args.warmup)
+    if sampling_error:
+        if rank == 0:
+            print(f"ERROR: {sampling_error}")
+        return 2
+    import routing  # torch-based; imported lazily so the module byte-compiles without torch
+    import eplb     # stdlib planner + torch remap (the EPLB transform)
+
+    ep_size = world_size
+    # EPLB (if on): run_ep.py already bumped args.experts to the PHYSICAL count and stashed the
+    # logical count, so experts_per_rank below is physical. The trace is built over LOGICAL
+    # experts then remapped to physical (build_trace), so the whole sweep runs over the
+    # balanced physical placement with no adapter change.
+    eplb_on = getattr(args, "eplb", False)
+    num_logical = getattr(args, "num_logical_experts", args.experts)
+    if args.experts % ep_size != 0:
+        if rank == 0:
+            print(f"ERROR: experts ({args.experts}) must divide ep_size ({ep_size})")
+        return 2
+    experts_per_rank = args.experts // ep_size
+    if getattr(backend, "combine_weight_semantics", None) != "unweighted-rank-sum":
+        if rank == 0:
+            print("ERROR: v1 requires activation-only unweighted combine")
+        return 2
+
+    cap = backend.buffer_cap(args)
+    conditioning_ladder = CONDITIONING_LADDERS[args.phase]
+    if cap is not None and cap < conditioning_ladder[-1]:
+        if rank == 0:
+            print(f"ERROR: {backend.name} buffer cap {cap} cannot run the v1 conditioning ladder")
+        return 2
+    ladder, dropped = token_ladder(args.tokens_ladder, args.phase, cap)
+    if rank == 0 and dropped:
+        print(f"NOTE: dropped tokens/rank {dropped} — exceed {backend.name} buffer cap {cap} "
+              f"(hidden={args.hidden}); not silently truncated.")
+    if not ladder:
+        if rank == 0:
+            print(f"ERROR: empty token ladder (phase={args.phase}, cap={cap})")
+        return 2
+    MAX, MIN, SUM = dist.ReduceOp.MAX, dist.ReduceOp.MIN, dist.ReduceOp.SUM
+
+    # EPLB plan (once): estimate logical load from the global logical trace at the largest
+    # ladder T (most samples), then replicate+place. Held fixed across all T (as real EPLB
+    # plans from an observed load estimate). build_trace builds the LOGICAL trace and remaps
+    # to physical when the plan is present; otherwise it's the identity (logical == physical).
+    eplb_plan = None
+    if eplb_on:
+        ref_idx, _ = routing.build_global_routing(
+            EPLB_REFERENCE_TOKENS_PER_RANK * ep_size,
+            num_logical,
+            args.topk,
+            args.routing,
+            args.seed,
+        )
+        load = torch.bincount(ref_idx.reshape(-1), minlength=num_logical).float().tolist()
+        eplb_plan = eplb.build_plan(load, args.experts, ep_size)
+        if rank == 0:
+            print(f"NOTE: EPLB {num_logical}->{args.experts} experts ({ep_size}x{experts_per_rank}); "
+                  f"per-rank load imbalance {eplb_plan['imbalance_before']:.2f}x -> "
+                  f"{eplb_plan['imbalance_after']:.2f}x; {eplb_plan['replicated_experts']} experts "
+                  f"replicated (hottest {eplb_plan['max_replicas']}x)")
+
+    canonical = bool(getattr(args, "workload_dir", ""))
+    loaded_workload_ids, loaded_checksums = [], {}
+    if canonical:
+        import workload as _wl
+
+    def build_trace(gt):
+        # canonical: load pre-serialized trace bytes (verified by checksum) so this run is
+        # provably the SAME workload as any other consuming the same files. else: seeded gen.
+        if canonical:
+            wid = _wl.compute_workload_id(
+                args.routing, args.hidden, args.topk, num_logical, ep_size, gt, args.seed
+            )
+            idx_np, w_np, man = _wl.load_workload(os.path.join(args.workload_dir, f"{wid}.npz"), verify=True)
+            idx_l = torch.from_numpy(idx_np).to(torch.int64)
+            w = torch.from_numpy(w_np).to(torch.float32)
+            if wid not in loaded_workload_ids:
+                loaded_workload_ids.append(wid)
+                loaded_checksums[wid] = man.get("checksums")
+        else:
+            idx_l, w = routing.build_global_routing(
+                gt, num_logical, args.topk, args.routing, args.seed
+            )
+        return (eplb.remap_idx(idx_l, eplb_plan) if eplb_plan is not None else idx_l), w
+
+    # Fabric/clock warm-up BEFORE any timed point (review: H200 had an anomalous cold
+    # first point and a 40% decode-vs-prefill mismatch at the shared T=128). Gradually
+    # ramp through the small ladder shapes untimed — warms clocks/fabric for everyone
+    # and is also cold-jump-safe for MoRI.
+    def warm_roundtrips(problem, count):
+        for _ in range(count):
+            handle = backend.dispatch(problem)
+            if not hasattr(problem, "recv_tokens"):
+                # Dynamic receive cardinality is stable for this fixed routing trace. Cache it
+                # during untimed conditioning so adapters never read a device scalar in timing.
+                problem.recv_tokens = backend.recv_tokens(handle)
+            backend.stage(problem, handle)
+            backend.combine(problem, handle)
+            torch.cuda.synchronize()
+
+    for wt in conditioning_ladder:
+        # Warm-only shapes need not have canonical manifests: they are never measured or emitted.
+        wi, ww = routing.build_global_routing(
+            wt * ep_size, num_logical, args.topk, args.routing, args.seed,
+        )
+        if eplb_plan is not None:
+            wi = eplb.remap_idx(wi, eplb_plan)
+        wsi, wsw = routing.rank_slice(wi, ww, rank, wt)
+        wx = routing.rank_activations(wt, args.hidden, args.seed, rank, device, torch.bfloat16)
+        wp = backend.make_problem(wt, wsi.to(device), wsw.to(device), wx)
+        warm_roundtrips(wp, CONDITIONING_ROUNDS_PER_SHAPE)
+    torch.cuda.synchronize()
+    dist.barrier()
+    # Setup may materialize deferred provenance such as DeepEP V2 JIT CUBINs.
+    # Resolve it after conditioning but before correctness or timed measurements.
+    capture_deferred_provenance = getattr(backend, "capture_deferred_provenance", None)
+    if capture_deferred_provenance is not None:
+        capture_deferred_provenance()
+    provenance_issues = contracts.backend_provenance_issues(
+        backend.name, backend.backend_provenance
+    )
+    if provenance_issues:
+        if rank == 0:
+            print(
+                f"ERROR: unpinned provenance {provenance_issues} "
+                f"in {backend.backend_provenance}"
+            )
+        return 4
+    elem_dispatch = BF16_BYTES
+
+    # ---- Pass 1: build each deterministic problem and run the expert oracle. ----
+    problems, gate, gts, global_traces, input_snapshots = {}, {}, {}, {}, {}
+    routing_hashes = set()
+    for T in ladder:
+        counts = [T] * ep_size
+        gt = T * ep_size
+        gts[T] = gt
+        idx_g, w_g = build_trace(gt)
+        rstats = routing.routing_stats(idx_g, args.experts, experts_per_rank, weights=w_g)
+        gpn = args.gpus_per_node or ep_size
+        rstats["locality"] = routing.routing_locality(idx_g, experts_per_rank, ep_size, max(1, T),
+                                                      gpn, args.scale_up_domain or None)
+        rstats["source_token_stats"] = _stats_vec(counts)
+        routing_hashes.add(rstats["routing_hash"])
+        my_off, my_cnt = rank * T, T
+        idx_s = idx_g[my_off:my_off + my_cnt].contiguous()
+        w_s = w_g[my_off:my_off + my_cnt].contiguous()
+        x = routing.rank_activations(my_cnt, args.hidden, args.seed, rank, device, torch.bfloat16)
+        problem = backend.make_problem(my_cnt, idx_s.to(device), w_s.to(device), x)
+        input_snapshots[T] = (
+            problem.x.clone(), problem.topk_idx.clone(), problem.topk_weights.clone()
+        )
+        oracle = _run_expert_oracle(
+            torch, routing, backend, problem, idx_g, w_g, rank, experts_per_rank,
+            args.seed,
+        )
+        before_x, before_idx, before_weights = input_snapshots[T]
+        pre_input_unchanged = (
+            torch.equal(problem.x, before_x)
+            and torch.equal(problem.topk_idx, before_idx)
+            and torch.equal(problem.topk_weights, before_weights)
+        )
+        problems[T] = problem
+        global_traces[T] = (idx_g, w_g)
+        gate[T] = {
+            "rstats": rstats,
+            "recv_local": oracle["receive_count"],
+            "max_rel": oracle["max_relative_error"] or 0.0,
+            "local_ok": int(oracle["passed"]),
+            "oracle_pre": oracle,
+            "pre_input_unchanged": pre_input_unchanged,
+        }
+
+    # ---- Pass 2: every backend uses the same ascending point order and conditioning ramp.
+    # Per-iteration cross-rank MAX samples are pooled across trials. ----
+    disp_pool = {T: [] for T in ladder}     # pooled per-iteration cross-rank MAX (dispatch)
+    comb_pool = {T: [] for T in ladder}     # ... combine
+    rt_pool = {T: [] for T in ladder}       # independently measured round trip
+    disp_trials = {T: [] for T in ladder}
+    comb_trials = {T: [] for T in ladder}
+    rt_trials = {T: [] for T in ladder}
+    order = list(ladder)
+    for _trial in range(args.trials):
+        for T in order:
+            problem = problems[T]
+            # Stateful paired APIs may expose only a measured round trip.
+            # Do not synthesize component latency from that measurement.
+            roundtrip_only = getattr(backend, "roundtrip_only", False)
+
+            def rt_once(p=problem):
+                hh = backend.dispatch(p)
+                backend.stage(p, hh)
+                return backend.combine(p, hh)
+
+            # Every available component starts after the same synchronized full-roundtrip warmup.
+            # Roundtrip is first on every backend because it is the comparison headline.
+            warm_roundtrips(problem, args.warmup)
+            rt_iters = time_us(torch, lambda p=problem: rt_once(p), 0, args.iters)
+            if roundtrip_only:
+                disp_iters = comb_iters = []
+            else:
+                warm_roundtrips(problem, args.warmup)
+                disp_iters = time_us(torch, lambda p=problem: backend.dispatch(p),
+                                     0, args.iters)
+
+                def prep(p=problem):
+                    hh = backend.dispatch(p)
+                    backend.stage(p, hh)
+                    return hh
+                warm_roundtrips(problem, args.warmup)
+                if backend.combine_needs_redispatch:
+                    comb_iters = time_us(torch, lambda hh, p=problem: backend.combine(p, hh),
+                                         0, args.iters, pre=prep)
+                else:
+                    hh = prep()
+                    torch.cuda.synchronize()
+                    comb_iters = time_us(torch, lambda p=problem, hx=hh: backend.combine(p, hx),
+                                         0, args.iters)
+            # per-iteration cross-rank MAX (the distributed-op latency per iter), pooled.
+            if disp_iters:
+                reduced_dispatch = _reduce_vec(torch, dist, device, disp_iters, MAX)
+                reduced_combine = _reduce_vec(torch, dist, device, comb_iters, MAX)
+                disp_trials[T].append(reduced_dispatch)
+                comb_trials[T].append(reduced_combine)
+                disp_pool[T] += reduced_dispatch
+                comb_pool[T] += reduced_combine
+            reduced_roundtrip = _reduce_vec(torch, dist, device, rt_iters, MAX)
+            rt_trials[T].append(reduced_roundtrip)
+            rt_pool[T] += reduced_roundtrip
+
+    # ---- Pass 3: prove timed inputs were immutable and repeat the full oracle. ----
+    for T in ladder:
+        problem = problems[T]
+        before_x, before_idx, before_weights = input_snapshots[T]
+        input_unchanged = gate[T]["pre_input_unchanged"] and (
+            torch.equal(problem.x, before_x)
+            and torch.equal(problem.topk_idx, before_idx)
+            and torch.equal(problem.topk_weights, before_weights)
+        )
+        idx_g, w_g = global_traces[T]
+        post = _run_expert_oracle(
+            torch, routing, backend, problem, idx_g, w_g, rank, experts_per_rank,
+            args.seed,
+        )
+        pre = gate[T]["oracle_pre"]
+        order_stable = (
+            pre["ordering_contract"] == post["ordering_contract"]
+            and pre["order_sha256"] == post["order_sha256"]
+            and pre["dispatch_sha256"] == post["dispatch_sha256"]
+        )
+        gate[T].update({
+            "input_unchanged": input_unchanged,
+            "local_ok": int(pre["passed"] and post["passed"] and input_unchanged and order_stable),
+            "max_rel": max(pre["max_relative_error"] or 0.0, post["max_relative_error"] or 0.0),
+            "oracle_post": post,
+            "order_stable": order_stable,
+        })
+
+    # ---- Pass 4: percentiles (p50/p90/p95/p99, nearest-rank) from pooled samples + bytes + row ----
+    def pcts(xs):
+        return ({"p50": percentile(xs, 50), "p90": percentile(xs, 90),
+                 "p95": percentile(xs, 95), "p99": percentile(xs, 99)} if xs else None)
+
+    def component(percentiles, count, *, derived=False):
+        if percentiles is None:
+            return {"availability": "unavailable", "origin": None,
+                    "percentiles_us": None, "sample_count": 0}
+        return {
+            "availability": "derived" if derived else "measured",
+            "origin": "derived-percentile-sum" if derived else "measured",
+            "percentiles_us": percentiles,
+            "sample_count": 0 if derived else count,
+        }
+    rows = []
+    all_anomalies = []
+    thr_rt = 3.0
+    for T in ladder:
+        gt = gts[T]
+        g = gate[T]
+        rstats = g["rstats"]
+        d, c, rt = disp_pool[T], comb_pool[T], rt_pool[T]
+        dp, cp, rtp = pcts(d), pcts(c), pcts(rt)
+        # isolated_sum = SUM of the isolated dispatch+combine percentiles. NOT a measured op
+        # (can't reveal shared sync / launch amortization / overlap) — do NOT use for throughput
+        # or SLO capacity. The MEASURED round trip (rtp) is the real chained latency.
+        isum = {key: dp[key] + cp[key] for key in dp} if dp and cp else None
+        recv_total = _reduce_int(torch, dist, device, g["recv_local"], SUM)
+        recv_max = _reduce_int(torch, dist, device, g["recv_local"], MAX)
+        recv_min = _reduce_int(torch, dist, device, g["recv_local"], MIN)
+        global_ok = _reduce_int(torch, dist, device, g["local_ok"], MIN)
+        max_rel = _reduce_vec(torch, dist, device, [g["max_rel"]], MAX)[0]
+        point_ok = bool(global_ok) and recv_total > 0
+        rank_evidence = [None] * world_size
+        dist.all_gather_object(
+            rank_evidence,
+            {
+                "input_unchanged": g["input_unchanged"],
+                "order_stable": g["order_stable"],
+                "post_timing": g["oracle_post"],
+                "pre_timing": g["oracle_pre"],
+                "rank": rank,
+            },
+        )
+        # Canonical LOGICAL payload byte contracts (from the routing trace, NOT backend recv
+        # tensors): token-rank = one copy per unique (token,dest-rank); token-expert = one copy
+        # per routed (token,expert). routed_copies = token-rank copies; gt*topk = token-expert.
+        token_rank_copies = rstats["routed_copies"]
+        H = args.hidden
+        throughput = {
+            percentile_name: gt / (latency_us * 1e-6)
+            for percentile_name, latency_us in rtp.items()
+        }
+        disp_bytes_l = token_rank_copies * H * elem_dispatch
+        comb_bytes_l = token_rank_copies * H * 2
+        # Contract-level anomalies are attached to the row and rolled into validity.
+        #   roundtrip_gt_isolated_sum: measured RT p99 >> Σ(isolated dispatch+combine) p99.
+        #   roundtrip_lt_component_floor: measured RT p50 < max(dispatch,combine) p50 — a chained
+        #     op can't finish faster than its slowest required component (sync semantics violated).
+        row_anoms = []
+        if isum and isum["p99"] > 0 and rtp["p99"] > thr_rt * isum["p99"]:
+            row_anoms.append({"type": "roundtrip_gt_isolated_sum", "T": T,
+                              "roundtrip_p99": round(rtp["p99"], 2), "isolated_sum_p99": round(isum["p99"], 2),
+                              "ratio": round(rtp["p99"] / isum["p99"], 2), "threshold": thr_rt})
+        floor = max(dp["p50"], cp["p50"]) if dp and cp else None
+        if floor and rtp["p50"] > 0 and rtp["p50"] < 0.95 * floor:
+            row_anoms.append({"type": "roundtrip_lt_component_floor", "T": T,
+                              "roundtrip_p50": round(rtp["p50"], 2), "component_floor_p50": round(floor, 2)})
+        all_anomalies.extend(row_anoms)
+        rows.append({
+            "anomalies": row_anoms,
+            "components": {
+                "combine": component(cp, len(c)),
+                "dispatch": component(dp, len(d)),
+                "isolated_sum": component(isum, 0, derived=True),
+                "roundtrip": component(rtp, len(rt)),
+            },
+            "correctness": {
+                "contract": ORACLE_CONTRACT,
+                "max_relative_error": max_rel,
+                "passed": point_ok,
+                "rank_evidence": rank_evidence,
+                "scope": "dispatch-metadata-and-transformed-combine",
+            },
+            "global_tokens": gt,
+            "logical_bytes": {
+                "combine": comb_bytes_l,
+                "dispatch": disp_bytes_l,
+                "roundtrip": disp_bytes_l + comb_bytes_l,
+            },
+            "receive": {
+                "max": recv_max,
+                "mean": recv_total / world_size,
+                "min": recv_min,
+                "total": recv_total,
+            },
+            "routing": {
+                "empty_expert_count": rstats["empty_expert_count"],
+                "empty_rank_count": rstats["empty_rank_count"],
+                "expert_assignment_rank_cv": rstats["expert_assignment_rank_cv"],
+                "expert_assignments_per_rank": rstats["expert_assignments_per_rank"],
+                "expert_load_cv": rstats["expert_load_cv"],
+                "expert_load_max": rstats["expert_load_max"],
+                "expert_load_mean": rstats["expert_load_mean"],
+                "expert_load_min": rstats["expert_load_min"],
+                "fanout_histogram": rstats["fanout_hist"],
+                "fanout_max": rstats["fanout_max"],
+                "fanout_mean": rstats["fanout_mean"],
+                "fanout_min": rstats["fanout_min"],
+                "hash": rstats["routing_hash"],
+                "hotspot_ratio": rstats["hotspot_ratio"],
+                "locality": rstats.get("locality"),
+                "payload_copies_per_rank": rstats["payload_copies_per_rank"],
+                "payload_rank_cv": rstats["payload_rank_cv"],
+                "routed_copies": rstats["routed_copies"],
+                "source_token_stats": rstats.get("source_token_stats"),
+            },
+            "sample_histograms": {
+                "dispatch": _histogram(d) if d else None,
+                "combine": _histogram(c) if c else None,
+                "roundtrip": _histogram(rt),
+            },
+            "token_rate_at_latency_percentile": throughput,
+            "tokens_per_rank": T,
+        })
+        if rank == 0:
+            component_log = (f"disp p50/p99={dp['p50']:7.1f}/{dp['p99']:7.1f} "
+                             f"comb {cp['p50']:6.1f}/{cp['p99']:6.1f} " if dp and cp
+                             else "components=unavailable ")
+            print(f"  T={T:<5} {component_log}"
+                  f"RT p50/p99={rtp['p50']:7.1f}/{rtp['p99']:7.1f}us n={len(rt)} fanout={rstats['fanout_mean']:.2f} "
+                  f"recv[min/mean/max]={recv_min}/{recv_total // world_size}/{recv_max} "
+                  f"correct={point_ok}")
+
+    # Cross-rank workload-identity proof: every rank must have built the SAME global routing
+    # (one hash per T here); confirm all ranks agree by hashing the per-T hash set and
+    # MIN/MAX-reducing it — a mismatch means NVIDIA and AMD did NOT run identical routing.
+    trace_sig = hashlib.sha256("|".join(sorted(routing_hashes)).encode()).hexdigest()
+    routing_consistent = _same_hash_across_ranks(torch, dist, device, trace_sig)
+
+    # Capture again after correctness and timing so no lazily generated kernel can escape
+    # the implementation identity recorded in the artifact.
+    if capture_deferred_provenance is not None:
+        capture_deferred_provenance()
+
+    if rank != 0:
+        return 0
+
+    # status=valid requires correctness AND a proven-identical routing trace across ranks.
+    all_ok = bool(rows) and all(r["correctness"]["passed"] for r in rows) and routing_consistent
+
+    # Adapters never self-label official; status is derived from these gates.
+    prov = backend.backend_provenance
+    provenance_complete = contracts.provenance_complete(
+        prov,
+        backend.name,
+        getattr(args, "git_run", None),
+        image_digest=getattr(args, "image_digest", None),
+        image_verified=getattr(args, "image_digest_verified", False),
+        squash_sha256=getattr(args, "squash_sha256", None),
+    )
+    resource_profile = contracts.project_resource_profile(prov)
+    resource_conformance = resource_profile["conformance_class"]
+    # record the canonical workload identity consumed (one trace per T -> set of ids/checksums).
+    if canonical and loaded_workload_ids:
+        args.workload_id = identity.workload_id(
+            {
+                "members": [
+                    {"checksums": loaded_checksums[member], "workload_id": member}
+                    for member in sorted(loaded_workload_ids)
+                ]
+            }
+        )
+        args.workload_members = sorted(loaded_workload_ids)
+        args.workload_checksums = loaded_checksums
+    canonical_workload = bool(getattr(args, "workload_id", None))
+    activation_identity = workload_contract.compute_activation_identity(args.seed, args.hidden)
+    # EPLB identity covers replica placement, not only counts.
+    eplb_mapping_hash = None
+    if eplb_plan is not None:
+        eplb_mapping_hash = eplb.mapping_hash(eplb_plan)
+    anomaly_free = len(all_anomalies) == 0
+    validity = {
+        "execution_status": "complete" if rows else "failed",
+        "semantic_correctness": (
+            "pass" if rows and all(r["correctness"]["passed"] for r in rows) else "fail"
+        ),
+        "workload_identity": "consistent-across-ranks" if routing_consistent else "inconsistent",
+        "workload_source": "canonical-serialized" if canonical_workload else "seeded-runtime",
+        "measurement_conformance": "conformant",   # run_ep gate rejects nonconformant pre-run
+        "sampling_conformance": "conformant",      # fixed-512-v1 gate rejects any other profile
+        "resource_conformance": resource_conformance,
+        "provenance_complete": provenance_complete,
+        # anomaly-free unless a contract-level timing anomaly fired (then diagnostic, see above).
+        "anomaly_free": anomaly_free,
+    }
+    publication_status = _derive_publication_status(validity)
+
+    shape = {  # FIXED line identity (no T, no per-backend resource knobs)
+        "hidden": args.hidden, "topk": args.topk, "experts": args.experts,
+        "experts_per_rank": experts_per_rank, "dispatch_dtype": "bf16",
+        "routing": args.routing, "eplb": bool(eplb_plan), "num_logical_experts": num_logical,
+        # V2 is reserved for the PR #605 ElasticBuffer adapter; package versions never imply it.
+        "kernel_gen": kernel_generation(backend),
+        "activation_profile": ACTIVATION_PROFILE,
+        "quant": {
+            "combine_input_dtype": "bf16",
+            "combine_accum_dtype": getattr(backend, "combine_accum_dtype", "fp32"),
+            "combine_output_dtype": "bf16", "combine_quant_mode": "none",
+            "scale_layout": None,
+        },
+    }
+    generated_at = args.timestamp or _dt.datetime.now().astimezone().isoformat()
+    realized_placement = getattr(args, "realized_placement", None)
+    nodes = (
+        realized_placement["nodes"]
+        if realized_placement is not None
+        else int(os.environ.get("SLURM_NNODES", "1"))
+    )
+    case_factors = {
+        "case": {
+            "backend": backend.name,
+            "canonical": canonical,
+            "eplb": bool(eplb_plan),
+            "ep": ep_size,
+            "experts": num_logical,
+            "gpus_per_node": args.gpus_per_node or ep_size,
+            "hidden": args.hidden,
+            "ladder": " ".join(map(str, ladder)),
+            "nodes": nodes,
+            "phase": args.phase,
+            "required_publication": args.required_publication or "diagnostic",
+            "routing": args.routing,
+            "samples_per_point": TIMED_SAMPLES_PER_POINT,
+            "scale_up_domain": args.scale_up_domain or (args.gpus_per_node or ep_size),
+            "suite": args.suite or "manual",
+            "timing": f"{args.iters}:{args.trials}:{args.warmup}",
+            "topk": args.topk,
+            "warmup_semantics": WARMUP_SEMANTICS,
+            "workload": args.workload_name or "manual",
+        },
+        "profile": identity.V1_CASE_PROFILE,
+        "sku": args.runner,
+    }
+    computed_case_id = identity.digest("case", case_factors)
+    if args.case_id and args.case_id != computed_case_id:
+        raise ValueError(
+            f"scheduled case ID does not match realized factors: {args.case_id} != {computed_case_id}"
+        )
+    case_identifier = args.case_id or computed_case_id
+    git_run = getattr(args, "git_run", None) or {}
+    allocation_factors = {
+        "artifact": git_run.get("artifact"),
+        "execution_id": getattr(args, "allocation_execution_id", None),
+        "job": git_run.get("job"),
+        "repo": git_run.get("repo"),
+        "run_attempt": git_run.get("run_attempt"),
+        "run_id": git_run.get("run_id"),
+        "runner": args.runner,
+        "source_sha": git_run.get("source_sha"),
+    }
+    allocation_identifier = identity.allocation_id(allocation_factors)
+    try:
+        attempt_ordinal = int(os.environ.get("CX_ATTEMPT_ID", "1"))
+    except ValueError:
+        attempt_ordinal = 0
+    if attempt_ordinal <= 0:
+        raise ValueError("CX_ATTEMPT_ID must be a positive integer")
+    attempt_identifier = identity.attempt_id(
+        allocation=allocation_identifier, case=case_identifier, ordinal=attempt_ordinal
+    )
+    runtime_fingerprint = getattr(args, "runtime_fingerprint", None) or {}
+    implementation_contract = {
+        "kernel_generation": kernel_generation(backend),
+        "name": backend.name,
+        "provenance": _series_provenance(backend.backend_provenance),
+        "resource_profile": resource_profile,
+    }
+    public_config = contracts.public_series_config(
+        kernel_generation=implementation_contract["kernel_generation"],
+        provenance=backend.backend_provenance,
+        resource_profile=resource_profile,
+        resource_mode=args.resource_mode,
+        device_product=getattr(args, "runtime_device_product", None),
+    )
+    series_factors = {
+        "backend": backend.name,
+        "implementation_contract_sha256": _sha256_json(implementation_contract),
+        "public_config_sha256": contracts.public_series_config_sha256(public_config),
+        "routing_control_sha256": contracts.routing_implementation_control_sha256(
+            implementation_contract
+        ),
+        "case_id": case_identifier,
+        "image_digest": getattr(args, "image_digest", None),
+        "runtime_fingerprint_sha256": _sha256_json(runtime_fingerprint),
+        "source_sha": git_run.get("source_sha"),
+        "squash_sha256": getattr(args, "squash_sha256", None),
+        "workload_id": getattr(args, "workload_id", None) or trace_sig,
+    }
+    series_identifier = identity.series_id(series_factors)
+
+    sample_points = []
+    for row in rows:
+        token_count = row["tokens_per_rank"]
+
+        def sampled_component(trials):
+            return {
+                "availability": "measured" if trials else "unavailable",
+                "sample_count": sum(len(trial) for trial in trials),
+                "trials": trials if trials else None,
+            }
+
+        sample_point = {
+            "components": {
+                "combine": sampled_component(comb_trials[token_count]),
+                "dispatch": sampled_component(disp_trials[token_count]),
+                "roundtrip": sampled_component(rt_trials[token_count]),
+            },
+            "tokens_per_rank": token_count,
+        }
+        sample_sha256 = _sha256_json(sample_point)
+        point_identifier = identity.point_id(
+            series=series_identifier, tokens_per_rank=token_count
+        )
+        evidence_identifier = identity.evidence_id(
+            point=point_identifier,
+            allocation=allocation_identifier,
+            attempt=attempt_identifier,
+            sample_sha256=sample_sha256,
+        )
+        sample_point.update(
+            {
+                "evidence_id": evidence_identifier,
+                "point_id": point_identifier,
+                "sample_sha256": sample_sha256,
+            }
+        )
+        sample_points.append(sample_point)
+        row.update({
+            "evidence_id": evidence_identifier,
+            "point_id": point_identifier,
+            "sample_sha256": sample_sha256,
+        })
+
+    samples_path = args.out[:-5] + ".samples.json" if args.out.endswith(".json") else args.out + ".samples.json"
+    samples_document = {
+        "allocation_id": allocation_identifier,
+        "attempt_id": attempt_identifier,
+        "case_id": case_identifier,
+        "format": "collectivex.samples.v1",
+        "points": sample_points,
+        "sampling": {
+            "iterations_per_trial": args.iters,
+            "reduction": identity.V1_CASE_PROFILE["rank_reduction"],
+            "trials": args.trials,
+        },
+        "schema_version": 1,
+        "series_id": series_identifier,
+    }
+    samples_payload = contracts.canonical_json_bytes(samples_document)
+    samples_sha256 = hashlib.sha256(samples_payload).hexdigest()
+    samples_bytes = len(samples_payload)
+    sample_artifact = {
+        "bytes": samples_bytes,
+        "format": "collectivex.samples.v1",
+        "path": os.path.basename(samples_path),
+        "sha256": samples_sha256,
+    }
+    headline = next((r for r in rows if r["tokens_per_rank"] == 64), rows[len(rows) // 2])
+    eplb_record = (
+        {
+            "enabled": True,
+            "imbalance_after": eplb_plan["imbalance_after"],
+            "imbalance_before": eplb_plan["imbalance_before"],
+            "mapping_hash": eplb_mapping_hash,
+            "max_replicas": eplb_plan["max_replicas"],
+            "num_logical_experts": num_logical,
+            "num_physical_experts": args.experts,
+            "num_redundant": args.experts - num_logical,
+            "planner": EPLB_PLANNER,
+            "reference_tokens_per_rank": EPLB_REFERENCE_TOKENS_PER_RANK,
+            "replicated_experts": eplb_plan["replicated_experts"],
+        }
+        if eplb_plan
+        else {
+            "enabled": False,
+            "imbalance_after": None,
+            "imbalance_before": None,
+            "mapping_hash": None,
+            "max_replicas": None,
+            "num_logical_experts": num_logical,
+            "num_physical_experts": args.experts,
+            "num_redundant": 0,
+            "planner": None,
+            "reference_tokens_per_rank": None,
+            "replicated_experts": 0,
+        }
+    )
+    doc = {
+        "format": "collectivex.ep.v1",
+        "schema_version": SCHEMA_VERSION,
+        "record_type": "case-attempt",
+        "generated_at": generated_at,
+        "identity": {
+            "allocation_factors": allocation_factors,
+            "allocation_id": allocation_identifier,
+            "attempt_id": attempt_identifier,
+            "attempt_ordinal": attempt_ordinal,
+            "case_factors": case_factors,
+            "case_id": case_identifier,
+            "series_factors": series_factors,
+            "series_id": series_identifier,
+        },
+        "case": {
+            "attempt_ordinal": attempt_ordinal,
+            "backend": backend.name,
+            "eplb": eplb_record,
+            "ep_size": ep_size,
+            "mode": "normal",
+            "phase": args.phase,
+            "required_publication": args.required_publication or "diagnostic",
+            "resource_mode": "tuned",
+            "runner": args.runner,
+            "shape": shape,
+            "suite": args.suite or "manual",
+            "workload_name": args.workload_name or "manual",
+        },
+        "workload": {
+            "activation_generator": ACTIVATION_GENERATOR,
+            "activation_identity": activation_identity,
+            "activation_profile": ACTIVATION_PROFILE,
+            "cross_rank_consistent": routing_consistent,
+            "manifest_checksums": getattr(args, "workload_checksums", None),
+            "members": getattr(args, "workload_members", None),
+            "routing_generator": ROUTING_GENERATOR,
+            "source": validity["workload_source"],
+            "trace_hashes": sorted(routing_hashes),
+            "trace_signature": trace_sig,
+            "workload_id": getattr(args, "workload_id", None),
+        },
+        "measurement": {
+            "component_order_contract": COMPONENT_ORDER_CONTRACT,
+            "conditioning": {
+                "contract": CONDITIONING_CONTRACT,
+                "ladder": conditioning_ladder,
+                "roundtrips_per_shape": CONDITIONING_ROUNDS_PER_SHAPE,
+            },
+            "contract": "layout-and-dispatch-v1",
+            "rows": rows,
+            "sampling": {
+                "contract": SAMPLING_CONTRACT,
+                "iterations_per_trial": args.iters,
+                "percentile_method": identity.V1_CASE_PROFILE["percentile_method"],
+                "reduction": identity.V1_CASE_PROFILE["rank_reduction"],
+                "samples_per_component": TIMED_SAMPLES_PER_POINT,
+                "trials": args.trials,
+                "warmup_iterations": args.warmup,
+                "warmup_semantics": WARMUP_SEMANTICS,
+            },
+            "source_allocation": "even",
+        },
+        "implementation": {
+            "kernel_generation": kernel_generation(backend),
+            "name": backend.name,
+            "provenance": backend.backend_provenance,
+            "resource_profile": resource_profile,
+        },
+        "topology": {
+            "device_count": getattr(args, "runtime_device_count", None),
+            "device_product": getattr(args, "runtime_device_product", None),
+            "gpus_per_node": args.gpus_per_node or ep_size,
+            "nodes": nodes,
+            "placement": "packed",
+            "realized_placement": realized_placement,
+            "scale_up_domain": args.scale_up_domain or (args.gpus_per_node or ep_size),
+            "topology_class": args.topology_class,
+            "transport": args.transport,
+            "world_size": world_size,
+        },
+        "runtime_fingerprint": runtime_fingerprint,
+        "provenance": {
+            "command": getattr(args, "reproduction_command", ""),
+            "distributed_launcher": getattr(args, "distributed_launcher", None),
+            "git_run": getattr(args, "git_run", None),
+            "image": {
+                "arch": getattr(args, "image_arch", None),
+                "digest": getattr(args, "image_digest", "") or None,
+                "digest_verified": getattr(args, "image_digest_verified", False),
+                "reference": getattr(args, "image", "") or None,
+                "squash_sha256": getattr(args, "squash_sha256", None),
+            },
+            "redaction": "sanitized-v1",
+        },
+        "sample_artifact": sample_artifact,
+        "outcome": {
+            "publication_status": publication_status,
+            "reasons": [] if all_ok else ["semantic correctness or routing identity failed"],
+            "status": "success" if all_ok else "invalid",
+            "validity": validity,
+        },
+    }
+    contracts.validate_raw_document(doc, samples_document)
+    _write_bytes_atomic(samples_path, samples_payload)
+    _write_json_atomic(args.out, doc)
+    dispatch_percentiles = headline["components"]["dispatch"]["percentiles_us"]
+    dispatch_p99 = dispatch_percentiles["p99"] if dispatch_percentiles else None
+    component_summary = (f"disp_p99={dispatch_p99:.1f}us "
+                         if dispatch_p99 is not None
+                         else "components=unavailable ")
+    print(f"{backend.name} ep-dispatch-combine [{args.phase}/normal/layout-and-dispatch-v1]: "
+          f"status={doc['outcome']['status']} {len(rows)} pts, routing_consistent={routing_consistent}, "
+          f"headline T={headline['tokens_per_rank']} {component_summary}"
+          f"-> {args.out}")
+    # A complete invalid document is still a successfully captured terminal outcome. Launchers
+    # inspect its status to fail the case without conflating it with an execution failure.
+    return 0
diff --git a/experimental/CollectiveX/tests/ep_mori.py b/experimental/CollectiveX/tests/ep_mori.py
new file mode 100644
index 0000000000..7f99990253
--- /dev/null
+++ b/experimental/CollectiveX/tests/ep_mori.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+"""CollectiveX MoRI adapter for the v1 BF16 normal-mode workload."""
+from __future__ import annotations
+
+import os
+from pathlib import Path
+import re
+import sys
+import types
+
+# MoRI registers the whole symmetric heap at import time.
+os.environ["MORI_SHMEM_HEAP_SIZE"] = "2G"
+
+import torch
+import torch.distributed as dist
+
+try:
+    import mori  # type: ignore
+except Exception as exc:  # pragma: no cover - requires the benchmark image
+    print(f"ERROR: mori import failed: {exc!r}", file=sys.stderr)
+    raise
+
+
+def _project_local_metadata(torch_module, raw_expert_ids, raw_weights, rank, experts_per_rank):
+    local_start = rank * experts_per_rank
+    local = (raw_expert_ids >= local_start) & (
+        raw_expert_ids < local_start + experts_per_rank
+    )
+    expert_ids = torch_module.where(
+        local, raw_expert_ids, torch_module.full_like(raw_expert_ids, -1)
+    )
+    weights = torch_module.where(local, raw_weights, torch_module.zeros_like(raw_weights))
+    return expert_ids, weights, raw_expert_ids[local] - local_start
+
+
+def _mori_source_commit() -> str:
+    module_path = Path(mori.__file__).resolve()
+    for root in module_path.parents:
+        head = root / ".git" / "HEAD"
+        if not head.is_symlink() and head.is_file() and head.stat().st_size <= 128:
+            value = head.read_text(encoding="ascii").strip()
+            if re.fullmatch(r"[0-9a-f]{40}", value):
+                return value
+            raise RuntimeError("MoRI image source is not pinned to a detached commit")
+    raise RuntimeError("MoRI image source revision is unavailable")
+
+
+class MoRIBackend:
+    name = "mori"
+    combine_needs_redispatch = True
+    combine_weight_semantics = "unweighted-rank-sum"
+
+    def __init__(self, args, rank, world_size, local_rank, device):
+        self.args = args
+        self.rank = rank
+        self.world_size = world_size
+        self.device = device
+        self.mode = "normal"
+
+        self.ep_size = world_size
+        self.experts_per_rank = args.experts // self.ep_size
+        device_cus = torch.cuda.get_device_properties(device).multi_processor_count
+        self.block_num = self._block_target = 80
+        self._block_floored = False
+        self._tuned_source = "default-80"
+        self.dispatch_warps = 16
+        self.combine_warps = 8
+
+        # MI355X uses the direct intranode kernel. MI325X uses MoRI's split
+        # AsyncLL send/receive kernel as its normal-mode XGMI transport.
+        kernel_request = os.environ.get("CX_MORI_KERNEL_TYPE", "intranode").strip().lower()
+        self._kernel_type = None
+        self._kernel_type_label = "IntraNode"
+        self._async_ll = False
+        if kernel_request in ("asyncll", "async_ll", "async-ll"):
+            kernel_enum = getattr(mori.ops, "EpDispatchCombineKernelType", None)
+            if kernel_enum is None or not hasattr(kernel_enum, "AsyncLL"):
+                raise RuntimeError(
+                    "CX_MORI_KERNEL_TYPE=asyncll requires "
+                    "EpDispatchCombineKernelType.AsyncLL"
+                )
+            self._kernel_type = kernel_enum.AsyncLL
+            self._kernel_type_label = "AsyncLL"
+            self._async_ll = True
+            self.block_num = self._block_target = 64
+            self.dispatch_warps = self.combine_warps = 8
+            self._tuned_source = "upstream-asyncll-64x8-external-input"
+        elif kernel_request not in ("intranode", "intra_node", "intra-node", ""):
+            raise RuntimeError(
+                f"unknown CX_MORI_KERNEL_TYPE={kernel_request!r} (expected intranode|asyncll)"
+            )
+        self.kernel_generation = "async-ll" if self._async_ll else "intranode"
+
+        world_group = torch.distributed.group.WORLD
+        torch._C._distributed_c10d._register_process_group("default", world_group)
+        mori.shmem.shmem_torch_process_group_init("default")
+
+        self._cap = self.buffer_cap(args)
+        config_kwargs = {
+            "data_type": torch.bfloat16,
+            "rank": rank,
+            "world_size": world_size,
+            "hidden_dim": args.hidden,
+            "scale_dim": 0,
+            "scale_type_size": 1,
+            "max_token_type_size": torch.tensor([], dtype=torch.float32).element_size(),
+            "max_num_inp_token_per_rank": max(512, self._cap),
+            "num_experts_per_rank": self.experts_per_rank,
+            "num_experts_per_token": args.topk,
+            "use_external_inp_buf": self._async_ll,
+            "quant_type": "none",
+        }
+        if self._async_ll:
+            config_kwargs["kernel_type"] = self._kernel_type
+            config_kwargs["max_total_recv_tokens"] = 0
+            config_kwargs["block_num"] = self.block_num
+            config_kwargs["warp_num_per_block"] = self.dispatch_warps
+        self.config = mori.ops.EpDispatchCombineConfig(**config_kwargs)
+        if self._async_ll and (
+            self.config.block_num != self.block_num
+            or self.config.warp_num_per_block != self.dispatch_warps
+        ):
+            raise RuntimeError("MoRI AsyncLL launch configuration was not realized")
+        self.op = mori.ops.EpDispatchCombineOp(self.config)
+
+        expected_mori_commit = os.environ.get("MORI_COMMIT")
+        mori_commit = _mori_source_commit()
+        if expected_mori_commit and mori_commit != expected_mori_commit:
+            raise RuntimeError("MoRI image source revision differs from canonical provenance")
+        self.backend_provenance = {
+            "mori_commit": mori_commit,
+            "api": (
+                "mori.ops.EpDispatchCombineOp/external-input"
+                if self._async_ll
+                else "mori.ops.EpDispatchCombineOp/registered-input"
+            ),
+            "mode": "normal",
+            "dispatch_dtype": "bf16",
+            "combine_dtype": "bf16",
+            "kernel_type": self._kernel_type_label,
+            "enable_sdma": os.environ.get("MORI_ENABLE_SDMA"),
+            "heap_size": os.environ.get("MORI_SHMEM_HEAP_SIZE"),
+            "max_num_inp_token_per_rank": max(512, self._cap),
+            "max_total_recv_tokens": config_kwargs.get("max_total_recv_tokens"),
+            "num_qps": 1,
+            "resource_mode": "tuned",
+            "block_num": self.block_num,
+            "block_num_target": self._block_target,
+            "block_num_floored": self._block_floored,
+            "dispatch_warps": self.dispatch_warps,
+            "combine_warps": self.combine_warps,
+            "device_cus": device_cus,
+            "sm_fraction": None if self._async_ll else self.block_num / device_cus,
+            "tuned_source": self._tuned_source,
+        }
+
+    def buffer_cap(self, args):
+        return 512
+
+    def make_problem(self, T, idx, weights, x):
+        indices = idx.to(torch.int32)
+        gate_weights = weights.to(torch.float32)
+        return types.SimpleNamespace(
+            T=T,
+            x=x,
+            topk_idx=indices,
+            topk_weights=gate_weights,
+            indices=indices,
+            weights=gate_weights,
+            scales=torch.empty((T, 0), dtype=torch.uint8, device=self.device),
+        )
+
+    def dispatch(self, p):
+        dispatch_output, dispatch_weights, _scales, dispatch_indices, recv_num = (
+            self.op.dispatch(
+                p.x,
+                p.weights,
+                p.scales,
+                p.indices,
+                block_num=self.block_num,
+                warp_per_block=self.dispatch_warps,
+            )
+        )
+        if self._async_ll:
+            self.op.dispatch_recv(warp_per_block=self.dispatch_warps)
+        return types.SimpleNamespace(
+            dispatch_output=dispatch_output,
+            dispatch_weights=dispatch_weights,
+            dispatch_indices=dispatch_indices,
+            recv_num=recv_num[0],
+            combine_input=dispatch_output.to(torch.bfloat16),
+        )
+
+    def stage(self, p, h):
+        rows = getattr(p, "recv_tokens", None)
+        if not isinstance(rows, int) or rows < 0 or rows > h.combine_input.size(0):
+            raise RuntimeError("MoRI receive count was not validated before staging")
+        if self._async_ll:
+            return None
+        buffer = self.op.get_registered_combine_input_buffer(
+            torch.bfloat16, hidden_dim=h.combine_input.size(1)
+        )
+        buffer[:rows, :].copy_(h.combine_input[:rows, :])
+        h.combine_input = buffer
+
+    def combine(self, p, h):
+        combine_indices = p.indices if self._async_ll else h.dispatch_indices
+        combined, _weights = self.op.combine(
+            h.combine_input,
+            None,
+            combine_indices,
+            block_num=self.block_num,
+            warp_per_block=self.combine_warps,
+        )
+        if self._async_ll:
+            self.op.combine_recv(warp_per_block=self.combine_warps)
+        return combined[:p.T]
+
+    def inspect_dispatch(self, p, h):
+        count = self.recv_tokens(h)
+        if h.dispatch_weights is None:
+            raise RuntimeError("MoRI dispatch did not expose gate weights")
+        if count < 0 or any(
+            tensor.ndim == 0 or count > tensor.size(0)
+            for tensor in (h.dispatch_output, h.dispatch_indices, h.dispatch_weights)
+        ):
+            raise RuntimeError("MoRI receive count exceeds dispatch metadata")
+        raw_expert_ids = h.dispatch_indices[:count].to(torch.int64)
+        expert_ids, weights, local_expert_ids = _project_local_metadata(
+            torch,
+            raw_expert_ids,
+            h.dispatch_weights[:count].to(torch.float32),
+            self.rank,
+            self.experts_per_rank,
+        )
+        return types.SimpleNamespace(
+            payload=h.dispatch_output[:count],
+            expert_ids=expert_ids,
+            weights=weights,
+            local_expert_counts=torch.bincount(
+                local_expert_ids, minlength=self.experts_per_rank
+            ),
+            ordering_contract="mori-global-topk-masked-v1",
+        )
+
+    def combine_transformed(self, p, h, transformed):
+        h.combine_input = transformed.to(torch.bfloat16)
+        self.stage(p, h)
+        return self.combine(p, h)
+
+    def recv_tokens(self, h):
+        return int(h.recv_num.item())
+
+    def finalize(self, rc):
+        try:
+            dist.barrier()
+        except Exception:
+            pass
+        sys.stdout.flush()
+        sys.stderr.flush()
+        os._exit(rc if 0 <= rc <= 255 else 1)
diff --git a/experimental/CollectiveX/tests/ep_nccl.py b/experimental/CollectiveX/tests/ep_nccl.py
new file mode 100644
index 0000000000..327a4063f8
--- /dev/null
+++ b/experimental/CollectiveX/tests/ep_nccl.py
@@ -0,0 +1,177 @@
+"""CollectiveX NCCL all-to-all expert-parallel reference backend.
+
+The canonical "token-shuffle" EP built on torch.distributed's NCCL ``all_to_all_single``. Like the
+DeepEP-family APIs, dispatch sends one hidden-state copy to each distinct destination rank, even when
+multiple selected experts live on that rank. Combine reverses the shuffle and sums those rank copies.
+
+Why this exists alongside DeepEP/UCCL/MoRI: it is the portable collective reference baseline for the
+same rank-deduplicated payload and routing metadata. It keeps the library comparison anchored to the
+platform collective stack without claiming the custom fused kernels use the same transport algorithm.
+
+Scope: BF16, normal mode, layout-and-dispatch-v1. The timed dispatch includes layout, count exchange,
+payload, rank-masked expert indices, gate weights, and source-token metadata; combine returns only
+the activation payload. RCCL exposes the same API. The v1 AMD matrix uses this backend at EP8.
+"""
+
+import re
+import types
+
+import torch
+import torch.distributed as dist
+import contracts
+
+
+def _runtime_collective(args, torch_module) -> tuple[str, str]:
+    expected = "rccl" if torch_module.version.hip else "nccl"
+    fingerprint = getattr(args, "runtime_fingerprint", None)
+    collective = fingerprint.get("collective_library") if isinstance(fingerprint, dict) else None
+    if (
+        not isinstance(collective, dict)
+        or collective.get("kind") != expected
+        or not isinstance(collective.get("version"), str)
+        or not re.fullmatch(r"[0-9]+\.[0-9]+\.[0-9]+", collective["version"])
+    ):
+        raise RuntimeError("loaded collective runtime identity is unavailable")
+    return expected, collective["version"]
+
+
+class NCCLBackend:
+    name = "nccl-ep"
+    combine_needs_redispatch = False  # dispatch saves the permutation + splits
+    combine_weight_semantics = "unweighted-rank-sum"
+
+    def __init__(self, args, rank, world_size, local_rank, device):
+        self.args = args
+        self.rank = rank
+        self.world_size = world_size
+        self.device = device
+        self.experts = args.experts
+        if args.experts % world_size:
+            raise ValueError(f"experts({args.experts}) must divide world_size({world_size})")
+        self.experts_per_rank = args.experts // world_size
+        self.tolerance = 5e-2  # bf16 round-trip
+        _library, _version = _runtime_collective(args, torch)
+        self.kernel_generation = contracts.collective_kernel_generation(_library)
+        self.backend_provenance = {
+            "backend": f"{_library}-all2all",
+            "backend_lineage": _library,
+            "collective_library": _library,
+            "nccl_version": _version,
+            "transport": f"{_library}-all_to_all_single",
+            "resource_mode": "tuned",
+            "num_sms": None,
+            "device_sms": torch.cuda.get_device_properties(device).multi_processor_count,
+            "tuned_source": "nccl-collective",
+            "reference_semantics": "rank-deduplicated-payload-plus-routing-metadata-v2",
+            "routing_metadata": "expert-index-gate-weight-source-token",
+        }
+
+    def buffer_cap(self, args):
+        return None  # no fixed pre-allocated buffer; all-to-all sizes itself per step
+
+    def make_problem(self, T, idx, weights, x):
+        # idx[T,topk] int64, weights[T,topk] f32, x[T,hidden] bf16 — the shared routing-trace slice.
+        return types.SimpleNamespace(T=T, x=x, topk_idx=idx.to(torch.int64),
+                                     topk_weights=weights.to(torch.float32), layout=None)
+
+    def dispatch(self, p):
+        ws = self.world_size
+        x = p.x  # [T, H] bf16
+        idx = p.topk_idx  # [T, topk]
+        T, H = int(x.shape[0]), int(x.shape[1])
+        dev = x.device
+        # DeepEP dispatches one token per destination rank, not one copy per expert. Build the same
+        # rank-deduplicated routing map so NCCL traffic and combine semantics are comparable.
+        destinations = (idx // self.experts_per_rank).clamp_(0, ws - 1)
+        present = torch.zeros((T, ws), dtype=torch.bool, device=dev)
+        present.scatter_(1, destinations, True)
+        flat_token, flat_dest = present.nonzero(as_tuple=True)
+        # Group rank copies by destination (stable -> deterministic, invertible permutation).
+        order = torch.argsort(flat_dest, stable=True)
+        ordered_token = flat_token.index_select(0, order)
+        ordered_dest = flat_dest.index_select(0, order)
+        send_counts = torch.bincount(flat_dest, minlength=ws)  # [ws]
+        send_x = x.index_select(0, ordered_token).contiguous()
+        send_topk_idx = idx.index_select(0, ordered_token).contiguous()
+        expert_start = ordered_dest.unsqueeze(1) * self.experts_per_rank
+        local_mask = ((send_topk_idx >= expert_start)
+                      & (send_topk_idx < expert_start + self.experts_per_rank))
+        send_topk_idx = torch.where(
+            local_mask, send_topk_idx - expert_start, torch.full_like(send_topk_idx, -1)
+        )
+        send_topk_weights = p.topk_weights.index_select(0, ordered_token).contiguous()
+        send_topk_weights.masked_fill_(~local_mask, 0)
+        send_src_metadata = (ordered_token.to(torch.int64) | (self.rank << 32)).contiguous()
+        # Exchange per-rank counts so every rank can size its receive buffer.
+        recv_counts = torch.empty_like(send_counts)
+        dist.all_to_all_single(recv_counts, send_counts)
+        sc = send_counts.tolist()
+        rc = recv_counts.tolist()
+        total_recv = int(sum(rc))
+        recv_x = torch.empty((total_recv, H), dtype=x.dtype, device=dev)
+        recv_topk_idx = torch.empty((total_recv, int(idx.shape[1])), dtype=idx.dtype, device=dev)
+        recv_topk_weights = torch.empty((total_recv, int(idx.shape[1])),
+                                        dtype=p.topk_weights.dtype, device=dev)
+        recv_src_metadata = torch.empty((total_recv,), dtype=torch.int64, device=dev)
+        # Dispatch the uneven per-rank splits over the configured collective transport.
+        dist.all_to_all_single(recv_x, send_x, rc, sc)
+        dist.all_to_all_single(recv_topk_idx, send_topk_idx, rc, sc)
+        dist.all_to_all_single(recv_topk_weights, send_topk_weights, rc, sc)
+        dist.all_to_all_single(recv_src_metadata, send_src_metadata, rc, sc)
+        return types.SimpleNamespace(
+            recv_x=recv_x, combine_input=None, order=order, flat_token=flat_token,
+            recv_topk_idx=recv_topk_idx,
+            recv_topk_weights=recv_topk_weights, recv_src_rank=recv_src_metadata >> 32,
+            recv_src_token=recv_src_metadata & ((1 << 32) - 1), send_counts=sc, recv_counts=rc,
+            T=T, H=H, topk=int(idx.shape[1]), total_recv=total_recv)
+
+    def stage(self, p, h):
+        # No expert compute: the expert "output" is the received tokens as-is (the round-trip identity).
+        h.combine_input = h.recv_x
+        return None
+
+    def combine(self, p, h):
+        # Reverse all-to-all: ship expert outputs back to their origin ranks (swap the split lists).
+        send_back = torch.empty((int(h.order.shape[0]), h.H), dtype=h.combine_input.dtype,
+                                device=h.combine_input.device)
+        dist.all_to_all_single(send_back, h.combine_input.contiguous(),
+                               h.send_counts, h.recv_counts)
+        # send_back is in send (sorted) order; invert the argsort to token-copy order.
+        copies = torch.empty_like(send_back)
+        copies[h.order] = send_back
+        # Sum one copy per destination rank under this reference's explicit unweighted contract.
+        out = torch.zeros((h.T, h.H), dtype=torch.float32, device=send_back.device)
+        out.index_add_(0, h.flat_token, copies.float())
+        return out.to(p.x.dtype)
+
+    def inspect_dispatch(self, p, h):
+        valid = h.recv_topk_idx >= 0
+        expert_ids = torch.where(
+            valid,
+            h.recv_topk_idx + self.rank * self.experts_per_rank,
+            h.recv_topk_idx,
+        )
+        return types.SimpleNamespace(
+            payload=h.recv_x,
+            expert_ids=expert_ids,
+            weights=h.recv_topk_weights.masked_fill(~valid, 0),
+            local_expert_counts=torch.bincount(
+                h.recv_topk_idx[valid], minlength=self.experts_per_rank
+            ),
+            ordering_contract="source-rank-major-stable-v1",
+        )
+
+    def combine_transformed(self, p, h, transformed):
+        h.combine_input = transformed.to(h.recv_x.dtype)
+        return self.combine(p, h)
+
+    def recv_tokens(self, h):
+        return int(h.total_recv)
+
+    def finalize(self, rc):
+        try:
+            dist.barrier()
+            dist.destroy_process_group()
+        except Exception:
+            pass
+        return rc
diff --git a/experimental/CollectiveX/tests/ep_uccl.py b/experimental/CollectiveX/tests/ep_uccl.py
new file mode 100644
index 0000000000..c962b4ce13
--- /dev/null
+++ b/experimental/CollectiveX/tests/ep_uccl.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+"""CollectiveX UCCL adapter for the v1 BF16 normal-mode workload."""
+from __future__ import annotations
+
+import importlib.metadata as metadata
+import json
+import os
+from pathlib import Path
+from pathlib import PurePosixPath
+import sys
+import types
+
+import torch
+import torch.distributed as dist
+import contracts
+
+try:
+    import uccl
+    import uccl_deepep
+    from uccl_deepep import Buffer  # type: ignore
+except Exception as exc:  # pragma: no cover - requires the benchmark image
+    print(f"ERROR: uccl.ep import failed: {exc!r}", file=sys.stderr)
+    raise
+
+
+def _uccl_version() -> str:
+    try:
+        return metadata.version("uccl")
+    except Exception:
+        return getattr(uccl, "__version__", "unknown")
+
+
+def _uccl_dependency_versions() -> dict[str, str]:
+    versions = {
+        package: metadata.version(package)
+        for package in contracts.UCCL_DEPENDENCY_VERSIONS
+    }
+    if versions != contracts.UCCL_DEPENDENCY_VERSIONS:
+        raise RuntimeError(
+            "UCCL runtime dependency versions differ from the v1 contract"
+        )
+    return versions
+
+
+def _is_uccl_runtime_payload(name: str) -> bool:
+    path = PurePosixPath(name)
+    return (
+        bool(path.parts)
+        and path.parts[0] in {"uccl", "uccl.libs"}
+        and "__pycache__" not in path.parts
+        and path.suffix != ".pyc"
+    )
+
+
+def _python_dependency_evidence(package: str, version: str) -> dict[str, str]:
+    distribution = metadata.distribution(package)
+    runtime_files = []
+    for entry in distribution.files or ():
+        logical = PurePosixPath(entry.as_posix())
+        path = Path(distribution.locate_file(entry))
+        if (
+            logical.parts
+            and logical.parts[0] == package
+            and "__pycache__" not in logical.parts
+            and logical.suffix != ".pyc"
+            and path.is_file()
+        ):
+            runtime_files.append((entry.as_posix(), path))
+    return contracts.content_manifest_evidence(
+        role=f"{package}-distribution",
+        name=f"{package}-{version}",
+        files=runtime_files,
+    )
+
+
+def _loaded_libcudart_evidence(
+    version: str, maps_path: Path = Path("/proc/self/maps")
+) -> dict[str, str]:
+    distribution = metadata.distribution("nvidia-cuda-runtime-cu12")
+    candidates = {
+        Path(distribution.locate_file(entry)).resolve()
+        for entry in distribution.files or ()
+        if PurePosixPath(entry.as_posix()).name.startswith("libcudart.so")
+        and Path(distribution.locate_file(entry)).is_file()
+    }
+    candidate_names = {path.name for path in candidates}
+    if not candidates or not candidate_names:
+        raise RuntimeError("pinned CUDA runtime distribution has no libcudart payload")
+
+    loaded: set[Path] = set()
+    try:
+        mappings = maps_path.read_text().splitlines()
+    except OSError as exc:
+        raise RuntimeError("cannot inspect mapped UCCL runtime libraries") from exc
+    for mapping in mappings:
+        columns = mapping.split(maxsplit=5)
+        if len(columns) != 6:
+            continue
+        raw_path = columns[5]
+        deleted = raw_path.endswith(" (deleted)")
+        if deleted:
+            raw_path = raw_path.removesuffix(" (deleted)")
+        mapped = Path(raw_path)
+        if mapped.name not in candidate_names:
+            continue
+        if deleted or not mapped.is_file():
+            raise RuntimeError(
+                "mapped libcudart is unavailable for content verification"
+            )
+        resolved = mapped.resolve()
+        if resolved not in candidates:
+            raise RuntimeError(
+                "mapped libcudart is not owned by the pinned CUDA runtime package"
+            )
+        loaded.add(resolved)
+    if len(loaded) != 1:
+        raise RuntimeError(
+            "expected exactly one mapped libcudart from the pinned CUDA runtime"
+        )
+    return contracts.content_manifest_evidence(
+        role="cuda-runtime",
+        name=f"nvidia-cuda-runtime-cu12-{version}",
+        files=[("libcudart.so", loaded.pop())],
+    )
+
+
+def _uccl_build_evidence(
+    version: str, dependency_versions: dict[str, str]
+) -> list[dict[str, str]]:
+    distribution = metadata.distribution("uccl")
+    distribution_files = [
+        (entry.as_posix(), distribution.locate_file(entry))
+        for entry in distribution.files or ()
+        if _is_uccl_runtime_payload(entry.as_posix())
+        and Path(distribution.locate_file(entry)).is_file()
+    ]
+    wrapper_root = Path(uccl_deepep.__file__).resolve().parent
+    wrapper_files = [
+        (path.relative_to(wrapper_root).as_posix(), path)
+        for path in wrapper_root.rglob("*.py")
+        if path.is_file()
+    ]
+    return [
+        contracts.content_manifest_evidence(
+            role="uccl-distribution",
+            name=f"uccl-{version}",
+            files=distribution_files,
+        ),
+        contracts.content_manifest_evidence(
+            role="uccl-wrapper",
+            name="uccl-deepep-wrapper",
+            files=wrapper_files,
+        ),
+        _python_dependency_evidence("intervaltree", dependency_versions["intervaltree"]),
+        _python_dependency_evidence(
+            "sortedcontainers", dependency_versions["sortedcontainers"]
+        ),
+        _loaded_libcudart_evidence(dependency_versions["nvidia-cuda-runtime-cu12"]),
+    ]
+
+
+def _require_cross_rank_equal(value, label: str) -> None:
+    gathered = [None] * dist.get_world_size()
+    dist.all_gather_object(gathered, value)
+    canonical = {json.dumps(item, sort_keys=True, separators=(",", ":")) for item in gathered}
+    if len(canonical) != 1:
+        raise RuntimeError(f"UCCL {label} differs across ranks")
+
+
+class UCCLBackend:
+    name = "uccl"
+    combine_needs_redispatch = False
+    combine_weight_semantics = "unweighted-rank-sum"
+    def __init__(self, args, rank, world_size, local_rank, device):
+        self.args = args
+        self.rank = rank
+        self.world_size = world_size
+        self.device = device
+        self.mode = "normal"
+
+        self.group = dist.group.WORLD
+        device_sms = torch.cuda.get_device_properties(device).multi_processor_count
+        num_nvl_bytes = 4 * 1024 * 1024 * 1024
+        self.buffer = Buffer(self.group, num_nvl_bytes, 0)
+
+        num_sms = int(getattr(Buffer, "num_sms", args.num_sms))
+        try:
+            Buffer.set_num_sms(num_sms)
+        except Exception as exc:  # pragma: no cover - version dependent
+            raise RuntimeError(
+                f"UCCL did not apply requested num_sms={num_sms}: {exc!r}"
+            ) from exc
+        applied_num_sms = int(getattr(Buffer, "num_sms", num_sms))
+        if applied_num_sms != num_sms:
+            raise RuntimeError(
+                f"UCCL num_sms mismatch: requested={num_sms} applied={applied_num_sms}"
+            )
+
+        version = _uccl_version()
+        dependency_versions = _uccl_dependency_versions()
+        loaded_libraries = _uccl_build_evidence(version, dependency_versions)
+        _require_cross_rank_equal(loaded_libraries, "installed content identities")
+        self.backend_provenance = {
+            "uccl_version": version,
+            "uccl_commit": os.environ.get("UCCL_COMMIT") or f"pkg-{version}",
+            "uccl_wrapper_commit": os.environ.get("UCCL_WRAPPER_COMMIT"),
+            "backend_lineage": "uccl",
+            "uccl_dependency_versions": dependency_versions,
+            "loaded_libraries": loaded_libraries,
+            "mode": "normal",
+            "dispatch_dtype": "bf16",
+            "combine_dtype": "bf16",
+            "resource_mode": "tuned",
+            "requested_num_sms": num_sms,
+            "num_sms": applied_num_sms,
+            "device_sms": device_sms,
+            "sm_fraction": applied_num_sms / device_sms,
+            "tuned_source": "uccl-default-num_sms",
+            "num_nvl_bytes": num_nvl_bytes,
+        }
+
+    def buffer_cap(self, args):
+        return None
+
+    def make_problem(self, T, idx, weights, x):
+        return types.SimpleNamespace(
+            T=T,
+            x=x,
+            topk_idx=idx.to(torch.int64),
+            topk_weights=weights.to(torch.float32),
+        )
+
+    def dispatch(self, p):
+        (
+            num_tokens_per_rank,
+            _,
+            num_tokens_per_expert,
+            is_token_in_rank,
+            _,
+        ) = self.buffer.get_dispatch_layout(p.topk_idx, self.args.experts)
+        recv_x, recv_topk_idx, recv_topk_weights, recv_counts, handle, _ = self.buffer.dispatch(
+            p.x,
+            topk_idx=p.topk_idx,
+            topk_weights=p.topk_weights,
+            num_tokens_per_rank=num_tokens_per_rank,
+            is_token_in_rank=is_token_in_rank,
+            num_tokens_per_expert=num_tokens_per_expert,
+        )
+        return types.SimpleNamespace(
+            recv_x=recv_x,
+            recv_topk_idx=recv_topk_idx,
+            recv_topk_weights=recv_topk_weights,
+            recv_counts=recv_counts,
+            handle=handle,
+        )
+
+    def stage(self, p, h):
+        h.combine_input = h.recv_x
+
+    def combine(self, p, h):
+        combined_x, _, _ = self.buffer.combine(h.combine_input, h.handle)
+        return combined_x
+
+    def inspect_dispatch(self, p, h):
+        valid = h.recv_topk_idx >= 0
+        expert_ids = torch.where(
+            valid,
+            h.recv_topk_idx + self.rank * (self.args.experts // self.world_size),
+            h.recv_topk_idx,
+        )
+        return types.SimpleNamespace(
+            payload=h.recv_x,
+            expert_ids=expert_ids,
+            weights=h.recv_topk_weights.masked_fill(~valid, 0),
+            local_expert_counts=torch.tensor(h.recv_counts, device=self.device, dtype=torch.int64),
+            ordering_contract="source-rank-major-stable-v1",
+        )
+
+    def combine_transformed(self, p, h, transformed):
+        combined, _, _ = self.buffer.combine(transformed.to(h.recv_x.dtype), h.handle)
+        return combined
+
+    def recv_tokens(self, h):
+        return int(h.recv_x.shape[0])
+
+    def finalize(self, rc):
+        # UCCL's proxy teardown can crash after results are written; preserve the real rc.
+        try:
+            dist.barrier()
+        except Exception:
+            pass
+        sys.stdout.flush()
+        sys.stderr.flush()
+        os._exit(rc if 0 <= rc <= 255 else 1)
diff --git a/experimental/CollectiveX/tests/eplb.py b/experimental/CollectiveX/tests/eplb.py
new file mode 100644
index 0000000000..b1479da9f1
--- /dev/null
+++ b/experimental/CollectiveX/tests/eplb.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python3
+"""CollectiveX — EPLB (Expert-Parallel Load Balancer), the DeepSeek-style remedy for
+skewed (zipf) expert load.
+
+Under skewed routing, the ranks hosting hot logical experts receive far more token-copies
+than the rest; dispatch/combine latency is gated by that busiest rank (the cross-rank MAX
+the harness measures), so the whole collective stalls on it. EPLB REPLICATES hot experts
+onto extra physical slots and PLACES the slots so every rank carries ~equal load.
+
+This module is backend-agnostic: it is purely a transform of the deterministic routing
+trace. The trick that keeps every adapter unchanged — DeepEP/MoRI both route expert i to
+rank `i // experts_per_rank` (contiguous block placement) — is to number the physical slots
+RANK-MAJOR (rank r owns physical ids [r*spp, (r+1)*spp)), so the standard contiguous mapping
+reproduces EPLB's balanced placement. The harness then runs with `experts = num_physical`
+and the remapped (physical) trace; nothing else changes.
+
+  num_physical = num_logical + redundant   (redundant rounded up to a multiple of ep_size)
+  build_plan(): greedy replicate-by-load + equal-cardinality balanced packing onto ep_size ranks
+  remap_idx():  each token's logical targets -> physical replicas, spread by global token id
+
+Pure-Python planner (no torch) so it unit-tests on a login node; remap_idx needs torch.
+"""
+from __future__ import annotations
+
+import hashlib
+import json
+
+
+def physical_count(num_logical: int, num_redundant: int, ep_size: int) -> int:
+    """num_logical + redundant, with redundant rounded UP to a multiple of ep_size so the
+    physical experts divide evenly across ranks (symmetric dispatch)."""
+    r = ((max(0, num_redundant) + ep_size - 1) // ep_size) * ep_size
+    return num_logical + r
+
+
+def _contiguous_rank_load(logical_load, ep_size):
+    """Per-rank received load WITHOUT EPLB: logical experts placed contiguously
+    (experts_per_rank = num_logical/ep_size), so rank r carries its block's total."""
+    n = len(logical_load)
+    per = n // ep_size
+    return [sum(logical_load[r * per:(r + 1) * per]) for r in range(ep_size)]
+
+
+def build_plan(logical_load, num_physical: int, ep_size: int) -> dict:
+    """logical_load: list[float] length num_logical (token-copies per logical expert).
+    Returns the replication+placement plan (all pure-Python lists) + before/after balance."""
+    num_logical = len(logical_load)
+    assert num_physical >= num_logical, "num_physical must be >= num_logical"
+    assert num_physical % ep_size == 0, "num_physical must divide ep_size"
+    assert num_logical % ep_size == 0, "num_logical must divide ep_size"
+    spp = num_physical // ep_size                      # physical slots per rank (fixed)
+
+    # 1) Replica allocation — start one slot per logical expert, then hand each redundant
+    #    slot to the expert with the highest CURRENT per-replica load (greedy min-max).
+    replicas = [1] * num_logical
+    for _ in range(num_physical - num_logical):
+        best, best_lps = 0, -1.0
+        for e in range(num_logical):
+            lps = logical_load[e] / replicas[e]
+            if lps > best_lps:
+                best, best_lps = e, lps
+        replicas[best] += 1
+
+    # 2) Slots = (per-replica load, logical expert), one per replica.
+    slots = []
+    for e in range(num_logical):
+        lps = logical_load[e] / replicas[e]
+        slots.extend((lps, e) for _ in range(replicas[e]))
+
+    # 3) Balanced packing into ep_size bins of EQUAL cardinality (spp each), minimizing the
+    #    max per-rank load: heaviest slot first -> least-loaded rank that still has capacity.
+    slots.sort(reverse=True)
+    rank_slots = [[] for _ in range(ep_size)]
+    rank_load = [0.0] * ep_size
+    for lps, e in slots:
+        r = min((r for r in range(ep_size) if len(rank_slots[r]) < spp),
+                key=lambda r: rank_load[r])
+        rank_slots[r].append(e)
+        rank_load[r] += lps
+
+    # 4) Rank-major physical numbering -> contiguous placement == this balanced placement.
+    phys2log, rank_of_phys = [], []
+    for r in range(ep_size):
+        for e in rank_slots[r]:
+            phys2log.append(e)
+            rank_of_phys.append(r)
+    log2phys = [[] for _ in range(num_logical)]
+    for pid, e in enumerate(phys2log):
+        log2phys[e].append(pid)
+
+    before = _contiguous_rank_load(logical_load, ep_size)
+    total = sum(logical_load) or 1.0
+    mean = total / ep_size
+    return {
+        "num_logical": num_logical, "num_physical": num_physical, "ep_size": ep_size,
+        "slots_per_rank": spp, "replicas": replicas, "max_replicas": max(replicas),
+        "phys2log": phys2log, "rank_of_phys": rank_of_phys, "log2phys": log2phys,
+        "rank_load_after": rank_load, "rank_load_before": before,
+        # imbalance = busiest rank / mean (1.0 = perfect). This is the number EPLB cuts.
+        "imbalance_before": max(before) / mean, "imbalance_after": max(rank_load) / mean,
+        "replicated_experts": sum(1 for r in replicas if r > 1),
+    }
+
+
+def mapping_hash(plan: dict) -> str:
+    """Hash the placement fields that fully determine the logical-to-physical remap."""
+    payload = {
+        "phys2log": plan["phys2log"],
+        "rank_of_phys": plan["rank_of_phys"],
+        "replicas": plan["replicas"],
+    }
+    return hashlib.sha256(json.dumps(payload, sort_keys=True).encode()).hexdigest()
+
+
+def remap_rows(indices: list[list[int]], plan: dict) -> list[list[int]]:
+    """Pure-Python equivalent of remap_idx for contract verification."""
+    replicas = plan["log2phys"]
+    return [
+        [replicas[expert][token % len(replicas[expert])] for expert in row]
+        for token, row in enumerate(indices)
+    ]
+
+
+def remap_idx(idx_logical, plan):
+    """idx_logical: torch [gt, topk] int64 logical-expert ids (global trace).
+    Returns idx_physical [gt, topk]: each token's logical target -> one of that expert's
+    physical replicas, SPREAD by global token id (row) so a hot expert's tokens fan out
+    across its replicas (= across ranks). Replicas of distinct logical experts are disjoint,
+    so a token's top-k physical ids stay distinct (dispatch invariant preserved)."""
+    import torch
+    replicas = plan["replicas"]
+    num_logical = len(replicas)
+    max_rc = plan["max_replicas"]
+    rc = torch.tensor(replicas, dtype=torch.int64)
+    # padded [num_logical, max_rc] table of physical ids (pad with replica 0; never indexed
+    # past rc[e] because the replica index is taken mod rc[e]).
+    padded = torch.zeros(num_logical, max_rc, dtype=torch.int64)
+    for e, phys in enumerate(plan["log2phys"]):
+        for k in range(max_rc):
+            padded[e, k] = phys[k] if k < len(phys) else phys[0]
+    gt = idx_logical.shape[0]
+    rows = torch.arange(gt, dtype=torch.int64).unsqueeze(1)     # [gt,1] global token id
+    e = idx_logical.to(torch.int64)                             # [gt,topk]
+    ridx = rows % rc[e]                                         # [gt,topk] replica index
+    return padded[e, ridx]                                      # [gt,topk] physical ids
+
+
+# --------------------------------------------------------------------------- self-test
+if __name__ == "__main__":
+    # Synthetic zipf load (popularity ∝ 1/(e+1)) — the case EPLB targets. No torch needed.
+    import sys
+    NUM_LOGICAL, EP, REDUNDANT = 256, 8, 32
+    load = [1.0 / (e + 1) for e in range(NUM_LOGICAL)]
+    nphys = physical_count(NUM_LOGICAL, REDUNDANT, EP)
+    plan = build_plan(load, nphys, EP)
+    print(f"num_logical={NUM_LOGICAL} ep={EP} num_physical={nphys} slots/rank={plan['slots_per_rank']}")
+    print(f"replicated experts={plan['replicated_experts']} max_replicas={plan['max_replicas']} "
+          f"(hottest expert 0 replicas={plan['replicas'][0]})")
+    print(f"per-rank load BEFORE (contiguous): {[round(x,3) for x in plan['rank_load_before']]}")
+    print(f"per-rank load AFTER  (EPLB):       {[round(x,3) for x in plan['rank_load_after']]}")
+    print(f"imbalance (max/mean)  BEFORE={plan['imbalance_before']:.2f}x  AFTER={plan['imbalance_after']:.2f}x")
+    # Gates: equal slot cardinality, every logical expert placed, big imbalance cut.
+    assert all(plan["replicas"][e] >= 1 for e in range(NUM_LOGICAL))
+    assert sum(plan["replicas"]) == nphys
+    assert len(plan["phys2log"]) == nphys
+    assert all(len(plan["log2phys"][e]) == plan["replicas"][e] for e in range(NUM_LOGICAL))
+    # rank-major numbering => contiguous block per rank => rank_of_phys is non-decreasing
+    assert plan["rank_of_phys"] == sorted(plan["rank_of_phys"])
+    assert plan["imbalance_after"] < plan["imbalance_before"], "EPLB must reduce imbalance"
+    assert plan["imbalance_after"] < 1.30, f"EPLB should get within ~30% of perfect, got {plan['imbalance_after']:.2f}"
+    # remap (if torch present): distinctness + balanced receive on a sampled zipf trace.
+    try:
+        import torch
+        g = torch.Generator().manual_seed(0)
+        p = torch.tensor(load)
+        p = (p / p.sum()).expand(4096, NUM_LOGICAL)
+        idx_l = torch.multinomial(p, 8, replacement=False, generator=g).to(torch.int64)
+        idx_p = remap_idx(idx_l, plan)
+        assert idx_p.shape == idx_l.shape
+        # top-k physical ids distinct per token
+        assert all(len(set(row.tolist())) == 8 for row in idx_p), "physical top-k must stay distinct"
+        spp = plan["slots_per_rank"]
+        recv_before = [0] * EP
+        recv_after = [0] * EP
+        per_log = NUM_LOGICAL // EP
+        for row_l, row_p in zip(idx_l.tolist(), idx_p.tolist()):
+            for e in row_l:
+                recv_before[e // per_log] += 1
+            for pid in row_p:
+                recv_after[pid // spp] += 1
+        ib = max(recv_before) / (sum(recv_before) / EP)
+        ia = max(recv_after) / (sum(recv_after) / EP)
+        print(f"sampled-trace receive imbalance BEFORE={ib:.2f}x  AFTER={ia:.2f}x")
+        assert ia < ib and ia < 1.35, "remap must balance per-rank receive load"
+        print("remap self-test: OK")
+    except ImportError:
+        print("(torch absent — skipped remap self-test; planner gates passed)")
+    print("EPLB self-test: PASS")
+    sys.exit(0)
diff --git a/experimental/CollectiveX/tests/make_workloads.py b/experimental/CollectiveX/tests/make_workloads.py
new file mode 100644
index 0000000000..862c3d0375
--- /dev/null
+++ b/experimental/CollectiveX/tests/make_workloads.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+"""Generate canonical serialized workloads. Runs the stdlib counter generator for
+each (routing, global_tokens) in a ladder and writes <workload_id>.npz + .manifest.json into a
+dir that runs then consume via `run_ep.py --workload-dir`. One trace is emitted per global-token
+count because global token count is part of workload identity.
+
+  python3 tests/make_workloads.py --out-dir /path/to/cx_workloads \\
+      --routing uniform --ep 8 --hidden 7168 --topk 8 --experts 256 --seed 67 \\
+      --tokens-ladder "1 2 4 8 16 32 64 128 256 512"
+
+Or by the named v1 workload in configs/workloads.yaml. Explicit dimension flags still override it:
+
+  python3 tests/make_workloads.py --out-dir /path/to/cx_workloads --workload deepseek-v3-v1 --routing uniform --ep 8
+
+--id-only prints the content-bound workload_id per ladder point without torch/numpy:
+
+  python3 tests/make_workloads.py --workload deepseek-v3-v1 --ep 8 --id-only
+
+Generate every routing the suites need by running once per --routing. Idempotent (same id => same
+file). The dir is the cross-hardware artifact: copy it to each cluster so all consume identical bytes.
+"""
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+import workload as wl   # noqa: E402
+
+# Repo root holds configs/ (this file is in tests/). Used only for --workload name resolution.
+_REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+def resolve_manifest(name):
+    """Look a workload name up in configs/workloads.yaml and return (hidden, topk, experts).
+    Searches synthetic + model_derived; expert count = `experts` or (for model-derived) `routed_experts`.
+    Raises SystemExit with the known names if the manifest is absent. Pure PyYAML + stdlib."""
+    import yaml
+    path = os.path.join(_REPO, "configs", "workloads.yaml")
+    with open(path) as handle:
+        cfg = yaml.safe_load(handle)
+    known = []
+    for section in ("synthetic", "model_derived"):
+        sec = cfg.get(section) or {}
+        known += list(sec)
+        m = sec.get(name)
+        if m is None:
+            continue
+        experts = m.get("experts", m.get("routed_experts"))
+        if m.get("hidden") is None or m.get("topk") is None or experts is None:
+            raise SystemExit(f"workload '{name}' is missing hidden/topk/experts in {path}")
+        return int(m["hidden"]), int(m["topk"]), int(experts)
+    raise SystemExit(f"unknown --workload '{name}'; known: {sorted(known)}")
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser(description="Generate canonical CollectiveX workloads")
+    ap.add_argument("--out-dir", help="required unless --id-only")
+    ap.add_argument("--workload", help="named manifest in configs/workloads.yaml (sets hidden/topk/experts)")
+    ap.add_argument("--routing", default="uniform", choices=["uniform", "zipf"])
+    ap.add_argument("--ep", type=int, required=True, help="ep_size (global_tokens = T * ep)")
+    ap.add_argument("--hidden", type=int, help="override (default 7168, or the --workload's hidden)")
+    ap.add_argument("--topk", type=int, help="override (default 8, or the --workload's topk)")
+    ap.add_argument("--experts", type=int, help="override (default 256, or the --workload's experts)")
+    ap.add_argument("--seed", type=int, default=67)
+    ap.add_argument("--tokens-ladder", default="1 2 4 8 16 32 64 128 256 512")
+    ap.add_argument("--id-only", action="store_true",
+                    help="print content-bound workload_id per point without torch/numpy")
+    a = ap.parse_args()
+
+    # Resolve dims: a named --workload supplies defaults; explicit --hidden/--topk/--experts override
+    # per field. With neither, fall back to the v1 DeepSeek dimensions (7168/8/256).
+    base_h, base_t, base_e = (7168, 8, 256)
+    if a.workload:
+        base_h, base_t, base_e = resolve_manifest(a.workload)
+    hidden = a.hidden if a.hidden is not None else base_h
+    topk = a.topk if a.topk is not None else base_t
+    experts = a.experts if a.experts is not None else base_e
+
+    if not a.id_only and not a.out_dir:
+        ap.error("--out-dir is required unless --id-only")
+
+    raw_ladder = [int(token) for token in a.tokens_ladder.replace(",", " ").split()]
+    if (a.ep <= 0 or min(hidden, topk, experts) <= 0 or topk > experts or experts % a.ep
+            or not raw_ladder or any(token <= 0 for token in raw_ladder)
+            or len(raw_ladder) != len(set(raw_ladder))):
+        ap.error("shape, EP, and token ladder must be positive, divisible, and unique")
+    ladder = sorted(raw_ladder)
+    epr = experts // a.ep
+    label = f"workload={a.workload} " if a.workload else ""
+
+    if a.id_only:
+        # The stdlib counter generator derives the same content-bound ID on every runtime.
+        made = []
+        for T in ladder:
+            gt = T * a.ep
+            wid = wl.compute_workload_id(a.routing, hidden, topk, experts, a.ep, gt, a.seed)
+            made.append((T, gt, wid))
+            print(f"  T={T:<5} gt={gt:<6} routing={a.routing} -> {wid}")
+        print(f"{label}id-only: {len(made)} workload_id(s) "
+              f"(hidden={hidden} topk={topk} experts={experts} ep={a.ep} routing={a.routing} seed={a.seed})")
+        return 0
+
+    os.makedirs(a.out_dir, exist_ok=True)
+    made = []
+    for T in ladder:
+        gt = T * a.ep
+        idx, w, man = wl.build_workload(hidden, topk, experts, a.routing, gt, a.seed, epr)
+        wid = wl.save_workload(a.out_dir, idx, w, man)
+        made.append((T, gt, wid))
+        print(f"  T={T:<5} gt={gt:<6} routing={a.routing} -> {wid}  "
+              f"(trace sha {man['checksums']['trace'][:12]})")
+    print(f"{label}wrote {len(made)} canonical workloads to {a.out_dir} (routing={a.routing}, ep={a.ep})")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/experimental/CollectiveX/tests/routing.py b/experimental/CollectiveX/tests/routing.py
new file mode 100644
index 0000000000..6065a06e43
--- /dev/null
+++ b/experimental/CollectiveX/tests/routing.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python3
+"""CollectiveX — deterministic, platform-independent MoE routing trace.
+
+Fair-comparison fix #1: routing (per-token expert IDs + gate weights) is generated
+ONCE from a fixed seed over the *global* token batch, indexed by global token id, and
+is identical on every SKU for the same (seed, routing, global_tokens, experts, top-k).
+Each rank materializes its slice `[rank*T,(rank+1)*T)`. Activations
+are per-rank (same rank ⇒ same x on any platform), so a given global token id has
+identical activation everywhere without materializing a global activation tensor.
+
+The v1 suite keeps two routing distributions:
+
+  * uniform   — top-k distinct experts drawn uniformly per token. The DEFAULT.
+                Expected fan-out for top-k=8, 256 experts, EP8 (32 experts/rank) ≈
+                8·(1 − C(224,8)/C(256,8)) ≈ 5.3 ranks/token. Load ~ Poisson.
+  * zipf      — expert popularity proportional to 1/rank, producing expert/rank load skew.
+
+Always publish the realized fan-out so the workload is never misread again
+(`routing_stats`).
+"""
+from __future__ import annotations
+
+import hashlib
+
+import torch
+
+ACTIVATION_GENERATOR = "collectivex-activation-counter-v3"
+SOURCE_ID_BASE = 128
+SOURCE_ID_COLUMNS = 4
+
+
+def build_global_routing(
+    global_tokens: int, experts: int, topk: int, routing: str, seed: int
+):
+    """Return byte-stable counter-generated routing tensors on CPU."""
+    import workload
+
+    indices, weights = workload.canonical_routing_rows(
+        int(global_tokens), int(experts), int(topk), routing, int(seed)
+    )
+    return (
+        torch.tensor(indices, dtype=torch.int64),
+        torch.tensor(weights, dtype=torch.float32),
+    )
+
+
+def rank_slice(idx, weights, rank: int, tokens_per_rank: int):
+    lo = rank * tokens_per_rank
+    return idx[lo:lo + tokens_per_rank].contiguous(), weights[lo:lo + tokens_per_rank].contiguous()
+
+
+def rank_activations(tokens: int, hidden: int, seed: int, rank: int, device,
+                     dtype=torch.bfloat16):
+    """Exact counter-derived inputs with a reversible global source-token prefix."""
+    source = torch.arange(tokens, device=device, dtype=torch.int64) + rank * tokens
+    return activations_for_source_ids(source, hidden, seed, dtype)
+
+
+def activations_for_source_ids(source, hidden: int, seed: int, dtype=torch.bfloat16):
+    """Materialize canonical activations for arbitrary global source-token IDs."""
+    if hidden < SOURCE_ID_COLUMNS:
+        raise ValueError(f"hidden must be at least {SOURCE_ID_COLUMNS}")
+    source = source.to(torch.int64)
+    column = torch.arange(hidden, device=source.device, dtype=torch.int64)
+    values = (source[:, None] * 131 + column[None, :] * 17 + int(seed) * 19) % 257 - 128
+    output = values.to(dtype).mul_(1 / 64)
+    output[:, 0] = source % SOURCE_ID_BASE
+    output[:, 1] = (source // SOURCE_ID_BASE) % SOURCE_ID_BASE
+    output[:, 2] = (source // (SOURCE_ID_BASE**2)) % SOURCE_ID_BASE
+    output[:, 3] = (source * 29 + int(seed) * 7) % SOURCE_ID_BASE
+    return output
+
+
+def decode_source_ids(payload, seed: int):
+    """Decode and validate source IDs carried by rank_activations."""
+    if payload.ndim != 2 or payload.shape[1] < SOURCE_ID_COLUMNS:
+        raise ValueError("received payload cannot carry the source-token prefix")
+    prefix = payload[:, :SOURCE_ID_COLUMNS].float()
+    digits = prefix.round().to(torch.int64)
+    if not torch.equal(prefix, digits.float()):
+        raise ValueError("received source-token prefix is not exact")
+    if bool(((digits < 0) | (digits >= SOURCE_ID_BASE)).any().item()):
+        raise ValueError("received source-token prefix is out of range")
+    source = digits[:, 0] + SOURCE_ID_BASE * digits[:, 1] + SOURCE_ID_BASE**2 * digits[:, 2]
+    checksum = (source * 29 + int(seed) * 7) % SOURCE_ID_BASE
+    if not torch.equal(checksum, digits[:, 3]):
+        raise ValueError("received source-token checksum differs")
+    return source
+
+
+def routing_locality(idx, experts_per_rank: int, ep_size: int, tokens_per_rank: int,
+                     gpus_per_node: int, scale_up_domain: int = None) -> dict:
+    """Locality of rank-deduplicated payload copies under packed placement."""
+    import torch as _t
+    gt = idx.shape[0]
+    assignments = (idx // experts_per_rank).clamp(max=ep_size - 1)
+    destinations = _t.zeros((gt, ep_size), dtype=_t.bool)
+    destinations.scatter_(1, assignments, True)
+    token, dest = destinations.nonzero(as_tuple=True)
+    src = (token // max(1, tokens_per_rank)).clamp(max=ep_size - 1)
+    sud = scale_up_domain or (gpus_per_node * ep_size)                  # default: all one domain
+    phys = _t.arange(ep_size, dtype=_t.int64)
+    pd, ps = phys[dest], phys[src]
+    local = (dest == src)
+    same_node = (pd // gpus_per_node) == (ps // gpus_per_node)
+    same_dom = (pd // sud) == (ps // sud)
+    n = dest.numel()
+    return {
+        "placement": "packed",
+        "local_rank_fraction": float(local.float().mean()),
+        "same_node_fraction": float(same_node.float().mean()),
+        "same_scaleup_domain_fraction": float(same_dom.float().mean()),
+        "cross_node_fraction": float((~same_node).float().mean()),
+        "cross_domain_fraction": float((~same_dom).float().mean()),
+        "gpus_per_node": gpus_per_node, "scale_up_domain": sud, "copies": int(n),
+    }
+
+
+def routing_stats(idx, experts: int, experts_per_rank: int, weights=None) -> dict:
+    """Realized routing properties for the GLOBAL trace — published per point so the
+    fan-out / load can never be silently misread. idx is the global [gt, topk] tensor;
+    weights the matching [gt, topk] gate weights (hashed too for workload identity).
+    """
+    ep = max(1, experts // max(1, experts_per_rank))
+    ranks = (idx // experts_per_rank)                       # [gt, topk] destination rank per assignment
+    # unique destination ranks per token (fan-out)
+    onehot = torch.zeros(idx.shape[0], ep, dtype=torch.bool)
+    onehot.scatter_(1, ranks.clamp(max=ep - 1), True)
+    fanout = onehot.sum(dim=1)                              # [gt]
+    hist = torch.bincount(fanout, minlength=ep + 1)[1:ep + 1].tolist()  # counts for fan-out 1..ep
+    load = torch.bincount(idx.reshape(-1), minlength=experts).float()
+    # Keep expert assignments (compute load) separate from rank-deduplicated payload copies
+    # (network load). Conflating them overstates traffic when two experts share a rank.
+    assignment_load = torch.bincount(
+        ranks.reshape(-1).clamp(max=ep - 1), minlength=ep
+    ).float()
+    payload_load = onehot.sum(dim=0).float()
+    # One-number imbalance summaries so a row is self-describing for the distribution-sensitivity
+    # suite (no need to read the full histograms): CV = std/mean of the load; hotspot_ratio =
+    # worst expert load over the mean. Zipf should be more concentrated than uniform.
+    def _cv(t):
+        m = float(t.mean())
+        return float(t.std(unbiased=False) / m) if m > 0 else 0.0
+    expert_load_cv = _cv(load)
+    assignment_rank_cv = _cv(assignment_load)
+    payload_rank_cv = _cv(payload_load)
+    hotspot_ratio = float(load.max() / load.mean()) if float(load.mean()) > 0 else 0.0
+    # Empty experts capture compute skew; empty destination ranks capture network skew.
+    empty_expert_count = int((load == 0).sum())
+    empty_rank_count = int((payload_load == 0).sum())
+    # SHA-256 workload identity over both topk_idx and gate weights: a chart
+    # point's routing is provably identical across SKUs only if both hashes match.
+    idx_bytes = idx.to(torch.int32).cpu().numpy().tobytes()
+    idx_hash = hashlib.sha256(idx_bytes).hexdigest()
+    if weights is not None:
+        w_bytes = weights.to(torch.float32).cpu().numpy().tobytes()
+        w_hash = hashlib.sha256(w_bytes).hexdigest()
+        routing_hash = hashlib.sha256(idx_bytes + w_bytes).hexdigest()
+    else:
+        w_hash, routing_hash = None, idx_hash
+    return {
+        "fanout_mean": float(fanout.float().mean()),
+        "fanout_min": int(fanout.min()), "fanout_max": int(fanout.max()),
+        "fanout_hist": hist,                               # index k-1 = #tokens with fan-out k
+        "expert_assignments_per_rank": [int(x) for x in assignment_load.tolist()],
+        "payload_copies_per_rank": [int(x) for x in payload_load.tolist()],
+        "routed_copies": int(fanout.sum()),                # total (token, dest-rank) pairs
+        "expert_load_min": int(load.min()), "expert_load_max": int(load.max()),
+        "expert_load_mean": float(load.mean()), "expert_load_cv": expert_load_cv,
+        "expert_assignment_rank_cv": assignment_rank_cv,
+        "payload_rank_cv": payload_rank_cv, "hotspot_ratio": hotspot_ratio,
+        "empty_expert_count": empty_expert_count, "empty_rank_count": empty_rank_count,
+        "routing_hash": routing_hash, "idx_hash": idx_hash, "weights_hash": w_hash,
+    }
+
+
+# --------------------------------------------------------------------------- self-test
+if __name__ == "__main__":
+    import sys
+    E, TOPK, EPR, GT = 256, 8, 32, 4096
+    ui, _ = build_global_routing(GT, E, TOPK, "uniform", 67)
+    zi, _ = build_global_routing(GT, E, TOPK, "zipf", 67)
+    assert all(len(set(row.tolist())) == TOPK for row in ui[:16])
+    uniform, zipf = routing_stats(ui, E, EPR), routing_stats(zi, E, EPR)
+    assert uniform["hotspot_ratio"] < zipf["hotspot_ratio"]
+    dev = torch.device("cpu")
+    first = rank_activations(8, 256, 67, 0, dev, dtype=torch.float32)
+    second = rank_activations(8, 256, 67, 0, dev, dtype=torch.float32)
+    assert torch.equal(first, second) and torch.isfinite(first).all()
+    print("routing self-test: PASS")
+    sys.exit(0)
diff --git a/experimental/CollectiveX/tests/run_ep.py b/experimental/CollectiveX/tests/run_ep.py
new file mode 100644
index 0000000000..cf019af28f
--- /dev/null
+++ b/experimental/CollectiveX/tests/run_ep.py
@@ -0,0 +1,381 @@
+#!/usr/bin/env python3
+"""CollectiveX v1 EP benchmark entrypoint for torchrun or rank environments."""
+
+from __future__ import annotations
+
+import argparse
+import ctypes
+import json
+import os
+import platform
+import re
+import shlex
+import socket
+import subprocess
+import sys
+
+# Make the sibling tests/ modules importable when run as `tests/run_ep.py` under
+# torchrun (it executes the file as __main__, not as a package).
+HERE = os.path.dirname(os.path.abspath(__file__))
+sys.path[:0] = [HERE, os.path.dirname(HERE)]
+
+import ep_harness  # noqa: E402  (stdlib-only; safe before torch)
+import identity  # noqa: E402
+
+
+def _numeric_version(command: list[str]) -> str | None:
+    try:
+        result = subprocess.run(
+            command, capture_output=True, check=False, text=True, timeout=10
+        )
+    except (OSError, subprocess.TimeoutExpired):
+        return None
+    if result.returncode != 0:
+        return None
+    match = re.search(r"\b[0-9]+(?:\.[0-9]+){1,3}\b", result.stdout)
+    return match.group(0) if match else None
+
+
+def _loaded_collective_version() -> str | None:
+    try:
+        with open("/proc/self/maps", encoding="utf-8") as handle:
+            paths = {
+                os.path.realpath(line.rstrip().split()[-1])
+                for line in handle
+                if any(name in line for name in ("libnccl.so", "librccl.so"))
+                and os.path.isfile(line.rstrip().split()[-1])
+            }
+        if len(paths) != 1:
+            return None
+        version = ctypes.c_int()
+        library = ctypes.CDLL(paths.pop())
+        if library.ncclGetVersion(ctypes.byref(version)) != 0:
+            return None
+        return ep_harness.format_collective_version(version.value)
+    except (AttributeError, OSError):
+        return None
+
+
+def _runtime_fingerprint(
+    torch, device, *, machine: str, vendor: str, arch: str
+) -> dict:
+    """Return strict runtime facts without hosts, addresses, UUIDs, or paths."""
+    properties = torch.cuda.get_device_properties(device)
+    if vendor == "nvidia":
+        driver = _numeric_version(
+            ["nvidia-smi", "--query-gpu=driver_version", "--format=csv,noheader"]
+        )
+        runtime_kind, runtime_version, collective_kind = (
+            "cuda",
+            torch.version.cuda,
+            "nccl",
+        )
+    else:
+        driver = _numeric_version(["rocm-smi", "--showdriverversion"])
+        runtime_kind, runtime_version, collective_kind = (
+            "hip",
+            torch.version.hip,
+            "rccl",
+        )
+    return {
+        "accelerator_runtime": {"kind": runtime_kind, "version": runtime_version},
+        "collective_library": {
+            "kind": collective_kind,
+            "version": _loaded_collective_version(),
+        },
+        "device": {
+            "arch": arch,
+            "compute_units": int(properties.multi_processor_count),
+            "memory_bytes": int(properties.total_memory),
+            "product": torch.cuda.get_device_name(device),
+            "warp_size": int(properties.warp_size),
+        },
+        "driver_version": driver,
+        "framework": {"kind": "torch", "version": str(torch.__version__)},
+        "machine": machine,
+        "python_version": platform.python_version(),
+        "vendor": vendor,
+    }
+
+
+def _summarize_realized_placement(
+    records: list[tuple[str, int]],
+    *,
+    expected_nodes: int,
+    expected_gpus_per_node: int,
+    expected_world_size: int,
+) -> dict:
+    """Validate private host/rank records and return only publication-safe aggregates."""
+    if expected_nodes < 1 or expected_gpus_per_node < 1:
+        raise ValueError("requested placement dimensions must be positive")
+    if expected_nodes * expected_gpus_per_node != expected_world_size:
+        raise ValueError("requested nodes x GPUs per node differs from world size")
+    if len(records) != expected_world_size:
+        raise ValueError("realized rank count differs from world size")
+
+    by_host: dict[str, list[int]] = {}
+    for host, local_rank in records:
+        if not isinstance(host, str) or not host or type(local_rank) is not int:
+            raise ValueError("realized placement record has invalid types")
+        by_host.setdefault(host, []).append(local_rank)
+
+    counts = sorted(len(local_ranks) for local_ranks in by_host.values())
+    complete_local_ranks = all(
+        sorted(local_ranks) == list(range(expected_gpus_per_node))
+        for local_ranks in by_host.values()
+    )
+    unique_pairs = len(set(records)) == len(records)
+    if len(by_host) != expected_nodes:
+        raise ValueError(
+            f"realized node count {len(by_host)} differs from requested {expected_nodes}"
+        )
+    if counts != [expected_gpus_per_node] * expected_nodes:
+        raise ValueError("realized ranks per node differ from requested GPUs per node")
+    if not complete_local_ranks or not unique_pairs:
+        raise ValueError("realized local ranks are incomplete or duplicated")
+    return {
+        "gpus_per_node": expected_gpus_per_node,
+        "nodes": expected_nodes,
+        "ranks_per_node": expected_gpus_per_node,
+        "unique_local_ranks": True,
+        "valid": True,
+    }
+
+
+def _common_runtime_fingerprint(records: list[dict]) -> dict:
+    """Return the shared sanitized fingerprint, rejecting heterogeneous ranks."""
+    if not records:
+        raise ValueError("runtime fingerprint evidence is empty")
+    canonical = {
+        json.dumps(record, allow_nan=False, sort_keys=True, separators=(",", ":"))
+        for record in records
+    }
+    if len(canonical) != 1:
+        raise ValueError("runtime fingerprint differs across distributed ranks")
+    return records[0]
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser(description="CollectiveX EP dispatch/combine sweep")
+    ap.add_argument(
+        "--backend",
+        required=True,
+        choices=[
+            "deepep",
+            "deepep-v2",
+            "deepep-hybrid",
+            "mori",
+            "uccl",
+            "nccl-ep",
+        ],
+    )
+    ep_harness.add_common_args(ap)
+    args = ap.parse_args()
+
+    if args.case_id and not identity.is_typed_id(args.case_id, "case"):
+        print(f"ERROR: invalid native case ID {args.case_id!r}", file=sys.stderr)
+        return 2
+    if args.case_id and args.seed != ep_harness.ROUTING_SEED:
+        print(
+            f"ERROR: scheduled v1 cases require seed={ep_harness.ROUTING_SEED}; got {args.seed}",
+            file=sys.stderr,
+        )
+        return 2
+
+    sampling_error = ep_harness.sampling_contract_error(
+        args.iters, args.trials, args.warmup
+    )
+    if sampling_error:
+        print(f"ERROR: {sampling_error}", file=sys.stderr)
+        return 2
+
+    try:
+        import torch
+        import torch.distributed as dist
+    except Exception as exc:  # pragma: no cover
+        print(f"ERROR: torch unavailable: {exc!r}", file=sys.stderr)
+        return 3
+
+    rank = int(os.environ.get("RANK", "0"))
+    world_size = int(os.environ.get("WORLD_SIZE", "1"))
+    local_rank = int(os.environ.get("LOCAL_RANK", "0"))
+    torch.cuda.set_device(local_rank)
+    device = torch.device(f"cuda:{local_rank}")
+    os.environ.setdefault("MASTER_ADDR", "localhost")
+    os.environ.setdefault("MASTER_PORT", "12355")
+
+    import capability
+
+    sku = capability.PLATFORMS.get(args.runner)
+    if sku is None:
+        print(f"ERROR: unknown runner identity {args.runner!r}", file=sys.stderr)
+        return 5
+    machine = {"x86_64": "amd64", "aarch64": "arm64"}.get(
+        platform.machine(), platform.machine()
+    )
+    props = torch.cuda.get_device_properties(device)
+    if torch.version.hip:
+        vendor = "amd"
+        accelerator = str(getattr(props, "gcnArchName", "")).split(":", 1)[0]
+    else:
+        vendor = "nvidia"
+        major, minor = torch.cuda.get_device_capability(device)
+        accelerator = f"sm{major}{minor}"
+    device_name = torch.cuda.get_device_name(device)
+    device_count = torch.cuda.device_count()
+    identity_issues = capability.runtime_identity_issues(
+        args.runner,
+        vendor=vendor,
+        arch=accelerator,
+        machine=machine,
+        device_name=device_name,
+        device_count=device_count,
+        world_size=world_size,
+    )
+    if identity_issues:
+        print(
+            f"ERROR: runtime identity does not match {args.runner}: "
+            + "; ".join(identity_issues),
+            file=sys.stderr,
+        )
+        return 5
+    if args.gpus_per_node and args.gpus_per_node != sku["gpus_per_node"]:
+        print(
+            f"ERROR: {args.runner} requires {sku['gpus_per_node']} GPUs per node",
+            file=sys.stderr,
+        )
+        return 5
+    args.runtime_device_product = device_name
+    args.runtime_device_count = device_count
+    args.allocation_execution_id = os.environ.get("COLLECTIVEX_EXECUTION_ID")
+
+    # EPLB bumps the expert count to PHYSICAL (logical + redundant) BEFORE backend construction
+    # so the backend sizes its buffers for the replicated set; ep_harness builds the LOGICAL
+    # routing trace and remaps it to the balanced physical placement (a pure routing transform,
+    # tests/eplb.py — no adapter change). Deterministic, so every rank agrees on the count.
+    if getattr(args, "eplb", False):
+        import eplb
+
+        args.num_logical_experts = args.experts
+        args.experts = eplb.physical_count(
+            args.experts, ep_harness.EPLB_REDUNDANT_EXPERTS, world_size
+        )
+
+    # Reproduction provenance (recorded in the artifact). Rack launchers provide ranks directly
+    # through srun, while single-node launchers use torchrun; do not claim torchrun for both.
+    if os.environ.get("TORCHELASTIC_RUN_ID"):
+        args.distributed_launcher = "torchrun"
+        prefix = f"torchrun --nproc_per_node={world_size}"
+    else:
+        args.distributed_launcher = "rank-environment"
+        prefix = f"RANK={rank} WORLD_SIZE={world_size} LOCAL_RANK={local_rank} python3"
+    args.reproduction_command = f"{prefix} tests/run_ep.py {shlex.join(sys.argv[1:])}"
+    args.image = os.environ.get("COLLECTIVEX_IMAGE", "")
+    args.image_digest = os.environ.get("COLLECTIVEX_IMAGE_DIGEST", "")
+    args.image_digest_verified = (
+        os.environ.get("COLLECTIVEX_IMAGE_DIGEST_VERIFIED") == "1"
+    )
+    # Container architecture and local squash hash for Enroot/Pyxis.
+    args.image_arch = machine
+    args.squash_sha256 = os.environ.get("COLLECTIVEX_SQUASH_SHA256")
+    # GitHub provenance: repo, run ID, attempt, ref, source SHA, job,
+    # artifact. A result is only publication-'official' when these are present (validity gate).
+    _run = {
+        "run_id": os.environ.get("GITHUB_RUN_ID"),
+        "run_attempt": os.environ.get("GITHUB_RUN_ATTEMPT"),
+        "ref": os.environ.get("GITHUB_REF_NAME") or os.environ.get("GITHUB_REF"),
+        "source_sha": os.environ.get("COLLECTIVEX_SOURCE_SHA")
+        or os.environ.get("GITHUB_SHA"),
+        "repo": os.environ.get("GITHUB_REPOSITORY"),
+        "job": os.environ.get("GITHUB_JOB"),
+        "artifact": os.environ.get("COLLECTIVEX_ARTIFACT_NAME"),
+    }
+    args.git_run = _run if any(_run.values()) else None
+
+    # Import the backend class only after torch initializes. Every adapter implements
+    # the same fixed v1 profile; the CLI has no precision/mode/contract fallbacks.
+    if args.backend == "mori":
+        from ep_mori import MoRIBackend as Backend
+    elif args.backend == "nccl-ep":
+        from ep_nccl import NCCLBackend as Backend
+    elif args.backend == "uccl":
+        from ep_uccl import UCCLBackend as Backend
+    elif args.backend == "deepep-hybrid":
+        from ep_deepep_hybrid import DeepEPHybridBackend as Backend
+    elif args.backend == "deepep-v2":
+        from ep_deepep_v2 import DeepEPV2Backend as Backend
+    else:
+        from ep_deepep import DeepEPBackend as Backend
+
+    # MoRI uses the gloo+NCCL group shape from its reference; other adapters use NCCL/RCCL.
+    if not dist.is_initialized():
+        if args.backend == "mori":
+            dist.init_process_group(
+                backend="cpu:gloo,cuda:nccl",
+                rank=rank,
+                world_size=world_size,
+                device_id=device,
+            )
+        elif args.backend == "deepep-v2":
+            # PR #605 reuses PyTorch's NCCL communicator through ``_comm_ptr``. Supplying
+            # device_id eagerly forms it before ElasticBuffer construction.
+            dist.init_process_group("nccl", device_id=device)
+        else:
+            dist.init_process_group("nccl")
+
+    args.runtime_fingerprint = _runtime_fingerprint(
+        torch, device, machine=machine, vendor=vendor, arch=accelerator
+    )
+
+    gpus_per_node = args.gpus_per_node or sku["gpus_per_node"]
+    try:
+        expected_nodes = int(
+            os.environ.get("SLURM_NNODES", str(world_size // gpus_per_node))
+        )
+    except ValueError as exc:
+        raise ValueError("SLURM_NNODES must be a positive integer") from exc
+    realized_records: list[tuple[str, int, dict] | None] = [None] * world_size
+    dist.all_gather_object(
+        realized_records,
+        (socket.gethostname(), local_rank, args.runtime_fingerprint),
+    )
+    complete_records = [record for record in realized_records if record is not None]
+    args.realized_placement = _summarize_realized_placement(
+        [(record[0], record[1]) for record in complete_records],
+        expected_nodes=expected_nodes,
+        expected_gpus_per_node=gpus_per_node,
+        expected_world_size=world_size,
+    )
+    args.runtime_fingerprint = _common_runtime_fingerprint(
+        [record[2] for record in complete_records]
+    )
+
+    # Construct + run inside a try so a backend exception (esp. a new adapter on GPU) prints its
+    # FULL traceback to STDOUT — torchrun captures per-rank stdout but only summarizes stderr, so an
+    # uncaught exception is otherwise invisible in CI. Print on every rank (prefixed) then re-raise.
+    try:
+        backend = Backend(args, rank, world_size, local_rank, device)
+        if rank == 0:
+            print(
+                f"[run_ep] backend={args.backend} phase={args.phase} mode=normal "
+                f"world={world_size} ep_size={world_size} hidden={args.hidden} "
+                f"topk={args.topk} experts={args.experts} dtype=bf16 "
+                f"routing={args.routing} seed={args.seed}"
+            )
+        rc = ep_harness.run_sweep(args, backend, torch, dist, device, rank, world_size)
+    except Exception:
+        import traceback
+
+        print(
+            f"[run_ep][rank{rank}] backend={args.backend} FAILED:\n"
+            + traceback.format_exc(),
+            flush=True,
+        )
+        raise
+    # finalize() handles backend-specific teardown: DeepEP returns rc cleanly;
+    # MoRI hard-exits past its post-shmem_finalize teardown assertion.
+    return backend.finalize(rc)
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/experimental/CollectiveX/tests/test_deepep_v2_contract.py b/experimental/CollectiveX/tests/test_deepep_v2_contract.py
new file mode 100644
index 0000000000..afd01ea3e6
--- /dev/null
+++ b/experimental/CollectiveX/tests/test_deepep_v2_contract.py
@@ -0,0 +1,1852 @@
+#!/usr/bin/env python3
+"""CPU-only structural and registry tests for the pinned DeepEP V2 path."""
+from __future__ import annotations
+
+import ast
+import copy
+import ctypes
+import hashlib
+import json
+import os
+from pathlib import Path
+from pathlib import PurePosixPath
+import shutil
+import stat
+import subprocess
+import sys
+import tempfile
+import types
+import unittest
+
+HERE = Path(__file__).resolve().parent
+ROOT = HERE.parent
+sys.path.insert(0, str(ROOT))
+
+import capability  # noqa: E402
+import contracts  # noqa: E402
+import ep_harness  # noqa: E402
+import run_ep  # noqa: E402
+
+
+COMMIT = "fa8a9b16898204afd347c663b89e65ef87dc6ce6"
+TREE = "29809e75c5874e6609dac4804e7b651d5226959f"
+FMT_COMMIT = "a4c7e17133ee9cb6a2f45545f6e974dd3c393efa"
+
+
+def deepep_v2_jit_provenance() -> list[dict[str, str]]:
+    return [
+        {
+            "cache_key": f"kernel.{name}.{index:032x}",
+            "cubin_sha256": f"{index + 1:x}" * 64,
+            "sass_sha256": f"{index + 2:x}" * 64,
+            "source_sha256": f"{index + 3:x}" * 64,
+        }
+        for index, name in enumerate(sorted(contracts.DEEPEP_V2_JIT_KERNELS))
+    ]
+
+
+def hybrid_realized_config() -> dict[str, object]:
+    config = {field: 1 for field in contracts.HYBRID_REALIZED_CONFIG_FIELDS}
+    for field in contracts.HYBRID_REALIZED_BOOL_FIELDS:
+        config[field] = True
+    config["token_data_type"] = "UINT16"
+    return config
+
+
+def hybrid_jit_provenance(ranks: int = 2) -> tuple[list[str], list[dict[str, object]]]:
+    keys = ["combine-key", "dispatch-key", "preprocess-key"]
+    artifacts = [
+        {
+            "kernel_key": key,
+            "rank_artifacts": [
+                {"bytes": 10 + index, "rank": rank, "sha256": f"{index + 1:x}" * 64}
+                for rank in range(ranks)
+            ],
+        }
+        for index, key in enumerate(keys)
+    ]
+    return keys, artifacts
+
+
+def load_uccl_function(name: str, namespace: dict[str, object]):
+    path = HERE / "ep_uccl.py"
+    function = next(
+        node
+        for node in ast.parse(path.read_text()).body
+        if isinstance(node, ast.FunctionDef) and node.name == name
+    )
+    exec(compile(ast.Module(body=[function], type_ignores=[]), str(path), "exec"), namespace)
+    return namespace[name]
+
+
+def operator_config(root: Path) -> dict[str, object]:
+    path = str(root)
+    runners = {
+        "h100-dgxc": {"partition": "test", "account": "test", "squash_dir": path},
+        "h200-dgxc": {"partition": "test", "squash_dir": path},
+        "b200-dgxc": {"partition": "test", "account": "test", "squash_dir": path},
+        "b300": {
+            "partition": "test", "account": "test", "squash_dir": path, "stage_dir": path,
+        },
+        "gb200": {"partition": "test", "account": "test", "storage_roots": [path]},
+        "gb300": {
+            "partition": "test", "account": "test", "squash_dir": path,
+            "stage_dir": path, "enroot_cache_path": path,
+        },
+        "mi325x": {"partition": "test", "squash_dir": path},
+        "mi355x": {"partition": "test", "squash_dir": path},
+    }
+    return {"schema_version": 1, "runners": runners}
+
+
+class DeepEPV2ContractTests(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.path = HERE / "ep_deepep_v2.py"
+        cls.tree = ast.parse(cls.path.read_text(), str(cls.path))
+
+    def test_capability_is_explicit_for_every_sku(self) -> None:
+        backend = capability.BACKENDS["deepep-v2"]
+        self.assertEqual(
+            (backend["implementation"], backend["commit"], backend["torch"], backend["nccl"]),
+            ("deep_ep.ElasticBuffer", COMMIT, "2.10.0+cu130", "2.30.4"),
+        )
+        self.assertEqual(backend["source"], "deepseek-ai/DeepEP#605+#630")
+        self.assertEqual(backend["communication_backend"], "nccl-device-lsa")
+        self.assertEqual(set(backend["sku_capabilities"]), set(capability.PLATFORMS))
+        for sku, platform in capability.PLATFORMS.items():
+            ok, _ = capability.resolve(sku, "deepep-v2")
+            self.assertEqual(ok, platform["vendor"] == "nvidia")
+            self.assertEqual(
+                set(backend["sku_capabilities"][sku]), {"basis", "schedulable"}
+            )
+
+    def test_adapter_ast_pins_elastic_api_and_weight_semantics(self) -> None:
+        imports = {
+            alias.name
+            for node in ast.walk(self.tree)
+            if isinstance(node, ast.ImportFrom) and node.module == "deep_ep"
+            for alias in node.names
+        }
+        self.assertEqual(imports, {"ElasticBuffer"})
+        constants = {
+            node.targets[0].id: ast.literal_eval(node.value)
+            for node in self.tree.body
+            if isinstance(node, ast.Assign)
+            and len(node.targets) == 1
+            and isinstance(node.targets[0], ast.Name)
+            and isinstance(node.value, ast.Constant)
+        }
+        self.assertEqual(constants["DEEPEP_V2_COMMIT"], COMMIT)
+        self.assertEqual(constants["DEEPEP_V2_TREE"], TREE)
+        self.assertEqual(constants["DEEPEP_V2_FMT_COMMIT"], FMT_COMMIT)
+        self.assertEqual(constants["DEEPEP_V2_PR"], 605)
+        self.assertEqual(constants["DEEPEP_V2_FIX_PR"], 630)
+        self.assertEqual(
+            constants["DEEPEP_V2_JIT_RANDOM_SEED"],
+            "collectivex-deepep-v2-fa8a9b1",
+        )
+        self.assertEqual(constants["NCCL_VERSION"], "2.30.4")
+        self.assertEqual(constants["NVSHMEM_VERSION"], "3.3.9")
+        backend = next(
+            node for node in self.tree.body
+            if isinstance(node, ast.ClassDef) and node.name == "DeepEPV2Backend"
+        )
+        assignments = {
+            node.targets[0].id: ast.literal_eval(node.value)
+            for node in backend.body
+            if isinstance(node, ast.Assign)
+            and isinstance(node.targets[0], ast.Name)
+            and isinstance(node.value, ast.Constant)
+        }
+        self.assertEqual(assignments["combine_weight_semantics"], "unweighted-rank-sum")
+        methods = {node.name for node in backend.body if isinstance(node, ast.FunctionDef)}
+        self.assertTrue({
+            "dispatch", "inspect_dispatch", "combine_transformed", "capture_deferred_provenance",
+            "finalize",
+        } <= methods)
+        self.assertNotIn("expected", methods)
+        constructor = next(
+            node for node in ast.walk(backend)
+            if isinstance(node, ast.Call)
+            and isinstance(node.func, ast.Name)
+            and node.func.id == "ElasticBuffer"
+        )
+        deterministic = next(
+            keyword for keyword in constructor.keywords if keyword.arg == "deterministic"
+        )
+        self.assertIs(ast.literal_eval(deterministic.value), False)
+        self.assertIn("deterministic", contracts.REQUIRED_BACKEND_PROVENANCE["deepep-v2"])
+        self.assertIn("num_experts", contracts.REQUIRED_BACKEND_PROVENANCE["deepep-v2"])
+        self.assertIn("tuning_num_experts", contracts.REQUIRED_BACKEND_PROVENANCE["deepep-v2"])
+        self.assertIn("jit_random_seed", contracts.REQUIRED_BACKEND_PROVENANCE["deepep-v2"])
+        self.assertIn("gin_enabled", contracts.REQUIRED_BACKEND_PROVENANCE["deepep-v2"])
+        self.assertIn("communication_backend", contracts.REQUIRED_BACKEND_PROVENANCE["deepep-v2"])
+        self.assertIn("deepep_pr", contracts.REQUIRED_BACKEND_PROVENANCE["deepep-v2"])
+        self.assertIn("deepep_fix_pr", contracts.REQUIRED_BACKEND_PROVENANCE["deepep-v2"])
+        source = self.path.read_text()
+        self.assertIn('getattr(args, "num_logical_experts", args.experts)', source)
+        self.assertIn('"use_expanded_layout": False', source)
+        self.assertIn("allow_hybrid_mode = _configure_gin_mode(args, world_size)", source)
+        self.assertIn("get_theoretical_num_sms(tuning_num_experts, args.topk)", source)
+
+        jit_function = next(
+            node for node in self.tree.body
+            if isinstance(node, ast.FunctionDef) and node.name == "_jit_cache_key"
+        )
+        namespace = {"hashlib": __import__("hashlib"), "json": json}
+        exec(compile(ast.Module(body=[jit_function], type_ignores=[]), str(self.path), "exec"), namespace)
+        key = namespace["_jit_cache_key"]
+        baseline = types.SimpleNamespace(
+            runner="h100-dgxc", hidden=7168, topk=8, experts=256,
+            routing="uniform", eplb=False, case_id="uniform",
+        )
+        zipf = types.SimpleNamespace(**{**vars(baseline), "routing": "zipf", "case_id": "zipf"})
+        eplb = types.SimpleNamespace(
+            **{**vars(zipf), "experts": 288, "num_logical_experts": 256, "eplb": True}
+        )
+        realized = {
+            "num_sms": 24,
+            "num_qps": 9,
+            "allocated_qps": 17,
+            "logical_scaleout_ranks": 1,
+            "logical_scaleup_ranks": 8,
+            "physical_rdma_ranks": 2,
+            "physical_nvlink_ranks": 4,
+            "is_scaleup_nvlink": False,
+            "device_arch_major": 9,
+            "device_arch_minor": 0,
+            "device_sms": 132,
+            "device_smem_bytes": 232448,
+            "gpu_timeout_cycles": 198000000000,
+        }
+        direct = key(baseline, 8, 128, False, realized)
+        self.assertTrue(direct.startswith("jitcfg-v3-"))
+        self.assertEqual(direct, key(zipf, 8, 128, False, realized))
+        self.assertNotEqual(direct, key(zipf, 8, 128, True, realized))
+        self.assertNotEqual(direct, key(eplb, 8, 128, False, realized))
+        for field, value in realized.items():
+            changed = not value if type(value) is bool else value + 1
+            self.assertNotEqual(
+                direct,
+                key(baseline, 8, 128, False, {**realized, field: changed}),
+                field,
+            )
+        init = next(
+            node for node in backend.body
+            if isinstance(node, ast.FunctionDef) and node.name == "__init__"
+        )
+        buffer_call = next(
+            node for node in ast.walk(init)
+            if isinstance(node, ast.Call)
+            and isinstance(node.func, ast.Name)
+            and node.func.id == "ElasticBuffer"
+        )
+        jit_config_check = next(
+            node for node in ast.walk(init)
+            if isinstance(node, ast.Call)
+            and isinstance(node.func, ast.Name)
+            and node.func.id == "_require_cross_rank_equal"
+            and ast.literal_eval(node.args[1]) == "JIT configuration"
+        )
+        cache_assignment = next(
+            node for node in ast.walk(init)
+            if isinstance(node, ast.Assign)
+            and isinstance(node.targets[0], ast.Subscript)
+            and ast.unparse(node.targets[0].value) == "os.environ"
+            and ast.literal_eval(node.targets[0].slice) == "EP_JIT_CACHE_DIR"
+        )
+        self.assertLess(buffer_call.lineno, jit_config_check.lineno)
+        self.assertLess(jit_config_check.lineno, cache_assignment.lineno)
+        capture = next(
+            node for node in backend.body
+            if isinstance(node, ast.FunctionDef)
+            and node.name == "capture_deferred_provenance"
+        )
+        calls = [node for node in ast.walk(capture) if isinstance(node, ast.Call)]
+        barrier = next(
+            node for node in calls
+            if isinstance(node.func, ast.Attribute) and node.func.attr == "barrier"
+        )
+        self.assertEqual(
+            {keyword.arg: ast.literal_eval(keyword.value) for keyword in barrier.keywords},
+            {"use_comm_stream": True, "with_cpu_sync": True},
+        )
+        scan = next(
+            node for node in calls
+            if isinstance(node.func, ast.Name) and node.func.id == "_jit_artifact_evidence"
+        )
+        self.assertLess(barrier.lineno, scan.lineno)
+        realized_check = next(
+            node for node in ast.walk(backend)
+            if isinstance(node, ast.Call)
+            and isinstance(node.func, ast.Name)
+            and node.func.id == "_require_cross_rank_equal"
+            and len(node.args) > 1
+            and isinstance(node.args[1], ast.Constant)
+            and node.args[1].value == "realized tuning/topology"
+        )
+        self.assertIsInstance(realized_check, ast.Call)
+        self.assertEqual(
+            (ROOT / "tests" / "ep_harness.py").read_text().count(
+                "capture_deferred_provenance()"
+            ),
+            2,
+        )
+        schema = json.loads((ROOT / "schemas" / "raw-case-v1.schema.json").read_text())
+        provenance = schema["properties"]["implementation"]["properties"]["provenance"]
+        self.assertEqual(provenance["properties"]["deterministic"], {"type": "boolean"})
+        self.assertEqual(
+            provenance["properties"]["num_experts"],
+            {"minimum": 1, "type": "integer"},
+        )
+        self.assertEqual(
+            provenance["properties"]["tuning_num_experts"],
+            {"minimum": 1, "type": "integer"},
+        )
+        self.assertEqual(
+            provenance["properties"]["jit_cubins"]["items"],
+            {"$ref": "#/$defs/deepep_v2_jit_cubin"},
+        )
+        self.assertEqual(
+            (
+                provenance["properties"]["jit_cubins"]["minItems"],
+                provenance["properties"]["jit_cubins"]["maxItems"],
+            ),
+            (5, 5),
+        )
+        self.assertEqual(
+            provenance["properties"]["jit_random_seed"],
+            {"const": "collectivex-deepep-v2-fa8a9b1"},
+        )
+        self.assertEqual(provenance["properties"]["allow_hybrid_mode"], {"const": False})
+        self.assertEqual(provenance["properties"]["gin_enabled"], {"const": False})
+        self.assertEqual(provenance["properties"]["deepep_pr"], {"const": 605})
+        self.assertEqual(provenance["properties"]["deepep_fix_pr"], {"const": 630})
+        self.assertEqual(
+            provenance["properties"]["communication_backend"],
+            {"const": "nccl-device-lsa"},
+        )
+        for field, value in (
+            ("num_experts", "288"),
+            ("tuning_num_experts", "not-an-integer"),
+            ("tuning_num_experts", 0),
+        ):
+            with self.subTest(provenance_field=field, value=value):
+                self.assertIn(
+                    field,
+                    contracts.backend_provenance_issues(
+                        "deepep-v2", {field: value}
+                    ),
+                )
+
+    def test_v2_gin_mode_uses_the_scale_up_domain_and_safe_fallbacks(self) -> None:
+        functions = {
+            node.name: node for node in self.tree.body if isinstance(node, ast.FunctionDef)
+        }
+        namespace = {"os": os}
+        exec(
+            compile(
+                ast.Module(
+                    body=[
+                        functions["_configure_gin_mode"],
+                        functions["_lsa_topology_is_valid"],
+                    ],
+                    type_ignores=[],
+                ),
+                str(self.path),
+                "exec",
+            ),
+            namespace,
+        )
+        configure = namespace["_configure_gin_mode"]
+        topology_is_valid = namespace["_lsa_topology_is_valid"]
+        original = os.environ.get("EP_DISABLE_GIN")
+        try:
+            args = types.SimpleNamespace(scale_up_domain=72, gpus_per_node=4)
+            self.assertFalse(configure(args, 8))
+            self.assertEqual(os.environ.get("EP_DISABLE_GIN"), "1")
+
+            os.environ["EP_DISABLE_GIN"] = "stale"
+            args = types.SimpleNamespace(scale_up_domain=8, gpus_per_node=4)
+            self.assertTrue(configure(args, 16))
+            self.assertNotIn("EP_DISABLE_GIN", os.environ)
+
+            args = types.SimpleNamespace(gpus_per_node=4)
+            self.assertTrue(configure(args, 8))
+            self.assertNotIn("EP_DISABLE_GIN", os.environ)
+
+            self.assertFalse(configure(types.SimpleNamespace(), 8))
+            self.assertEqual(os.environ.get("EP_DISABLE_GIN"), "1")
+
+            topology = {
+                "physical_rdma_ranks": 1,
+                "physical_nvlink_ranks": 8,
+                "logical_scaleout_ranks": 1,
+                "logical_scaleup_ranks": 8,
+                "is_scaleup_nvlink": True,
+            }
+            self.assertTrue(topology_is_valid(False, 8, topology))
+            self.assertTrue(topology_is_valid(True, 16, topology))
+            topology["physical_nvlink_ranks"] = 4
+            self.assertFalse(topology_is_valid(False, 8, topology))
+        finally:
+            if original is None:
+                os.environ.pop("EP_DISABLE_GIN", None)
+            else:
+                os.environ["EP_DISABLE_GIN"] = original
+
+    def test_ep_adapters_declare_unweighted_rank_sum(self) -> None:
+        adapters = {
+            "ep_deepep.py": "DeepEPBackend",
+            "ep_deepep_v2.py": "DeepEPV2Backend",
+            "ep_deepep_hybrid.py": "DeepEPHybridBackend",
+            "ep_mori.py": "MoRIBackend",
+            "ep_nccl.py": "NCCLBackend",
+            "ep_uccl.py": "UCCLBackend",
+        }
+        for filename, class_name in adapters.items():
+            with self.subTest(adapter=filename):
+                tree = ast.parse((HERE / filename).read_text())
+                backend = next(
+                    node for node in tree.body
+                    if isinstance(node, ast.ClassDef) and node.name == class_name
+                )
+                assignment = next(
+                    node for node in backend.body
+                    if isinstance(node, ast.Assign)
+                    and isinstance(node.targets[0], ast.Name)
+                    and node.targets[0].id == "combine_weight_semantics"
+                )
+                self.assertEqual(ast.literal_eval(assignment.value), "unweighted-rank-sum")
+                combine_methods = [
+                    item for item in backend.body
+                    if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef))
+                    and item.name in {"combine", "combine_transformed"}
+                ]
+                self.assertEqual(len(combine_methods), 2)
+                for method in combine_methods:
+                    self.assertNotIn("topk_weights", ast.unparse(method))
+                    self.assertNotIn("combine_topk_weights", ast.unparse(method))
+
+    def test_deepep_v2_jit_evidence_is_strict_and_stable(self) -> None:
+        valid = deepep_v2_jit_provenance()
+        self.assertTrue(contracts._deepep_v2_jit_cubins_are_valid(valid))
+        for invalid in (
+            [],
+            [{**valid[0], "path": "/private/kernel.cubin"}],
+            [{**item, "cache_key": "dispatch"} for item in valid],
+            [{**item, "cubin_sha256": "invalid"} for item in valid],
+            valid[:-1],
+            [*valid, valid[0]],
+            [
+                *valid,
+                {
+                    **valid[0],
+                    "cache_key": valid[0]["cache_key"][:-32] + "f" * 32,
+                },
+            ],
+        ):
+            with self.subTest(invalid=invalid):
+                self.assertFalse(contracts._deepep_v2_jit_cubins_are_valid(invalid))
+
+        backend = next(
+            node for node in self.tree.body
+            if isinstance(node, ast.ClassDef) and node.name == "DeepEPV2Backend"
+        )
+        capture = next(
+            node for node in backend.body
+            if isinstance(node, ast.FunctionDef)
+            and node.name == "capture_deferred_provenance"
+        )
+        artifacts = copy.deepcopy(valid)
+
+        class FakeBuffer:
+            @staticmethod
+            def barrier(*, use_comm_stream: bool, with_cpu_sync: bool) -> None:
+                self.assertTrue(use_comm_stream)
+                self.assertTrue(with_cpu_sync)
+
+        namespace = {
+            "torch": types.SimpleNamespace(
+                cuda=types.SimpleNamespace(synchronize=lambda: None)
+            ),
+            "_jit_artifact_evidence": lambda: copy.deepcopy(artifacts),
+            "_require_cross_rank_equal": lambda _value, _label: None,
+        }
+        exec(
+            compile(ast.Module(body=[capture], type_ignores=[]), str(self.path), "exec"),
+            namespace,
+        )
+        state = types.SimpleNamespace(
+            buffer=FakeBuffer(),
+            _deferred_jit_snapshot=None,
+            backend_provenance={"jit_cubins": []},
+        )
+        namespace["capture_deferred_provenance"](state)
+        namespace["capture_deferred_provenance"](state)
+        artifacts[0]["cubin_sha256"] = "f" * 64
+        with self.assertRaisesRegex(RuntimeError, "changed after measurement"):
+            namespace["capture_deferred_provenance"](state)
+
+    def test_deepep_v2_jit_files_are_complete_regular_and_content_bound(self) -> None:
+        functions = [
+            node for node in self.tree.body
+            if isinstance(node, ast.FunctionDef)
+            and node.name in {"_sha256", "_jit_artifact_evidence"}
+        ]
+        namespace = {
+            "hashlib": hashlib,
+            "os": os,
+            "Path": Path,
+            "re": __import__("re"),
+            "DEEPEP_V2_JIT_KERNELS": contracts.DEEPEP_V2_JIT_KERNELS,
+        }
+        exec(compile(ast.Module(body=functions, type_ignores=[]), str(self.path), "exec"), namespace)
+        with tempfile.TemporaryDirectory() as temporary:
+            cache = Path(temporary) / "cache"
+            cache.mkdir()
+            for index, name in enumerate(sorted(contracts.DEEPEP_V2_JIT_KERNELS)):
+                kernel = cache / f"kernel.{name}.{index:032x}"
+                kernel.mkdir()
+                for suffix in ("cu", "cubin", "sass"):
+                    (kernel / f"kernel.{suffix}").write_bytes(f"{name}-{suffix}".encode())
+            old_cache = os.environ.get("EP_JIT_CACHE_DIR")
+            os.environ["EP_JIT_CACHE_DIR"] = temporary
+            try:
+                evidence = namespace["_jit_artifact_evidence"]()
+                self.assertEqual(len(evidence), len(contracts.DEEPEP_V2_JIT_KERNELS))
+                self.assertEqual(
+                    set(evidence[0]),
+                    {"cache_key", "cubin_sha256", "sass_sha256", "source_sha256"},
+                )
+                first = cache / evidence[0]["cache_key"]
+                duplicate = cache / (evidence[0]["cache_key"][:-32] + "f" * 32)
+                duplicate.mkdir()
+                for suffix in ("cu", "cubin", "sass"):
+                    (duplicate / f"kernel.{suffix}").write_bytes(b"duplicate")
+                with self.assertRaisesRegex(RuntimeError, "kernel set"):
+                    namespace["_jit_artifact_evidence"]()
+                shutil.rmtree(duplicate)
+                (first / "kernel.sass").unlink()
+                with self.assertRaisesRegex(RuntimeError, "incomplete"):
+                    namespace["_jit_artifact_evidence"]()
+                (first / "kernel.sass").symlink_to(first / "kernel.cubin")
+                with self.assertRaisesRegex(RuntimeError, "regular file"):
+                    namespace["_jit_artifact_evidence"]()
+            finally:
+                if old_cache is None:
+                    os.environ.pop("EP_JIT_CACHE_DIR", None)
+                else:
+                    os.environ["EP_JIT_CACHE_DIR"] = old_cache
+
+    def test_runtime_and_shared_version_formatter_are_valid(self) -> None:
+        subprocess.run(
+            ["bash", "-n", str(ROOT / "runtime" / "run_in_container.sh")],
+            check=True,
+        )
+        self.assertEqual(ep_harness.format_collective_version(23004), "2.30.4")
+        self.assertEqual(ep_harness.format_collective_version((2, 30, 4)), "2.30.4")
+        source = self.path.read_text()
+        version_function = next(
+            node for node in self.tree.body
+            if isinstance(node, ast.FunctionDef) and node.name == "_loaded_nccl_version"
+        )
+
+        class FakeNccl:
+            @staticmethod
+            def ncclGetVersion(pointer) -> int:
+                pointer._obj.value = 23004
+                return 0
+
+        namespace = {
+            "ctypes": types.SimpleNamespace(
+                CDLL=lambda _path: FakeNccl(), byref=ctypes.byref, c_int=ctypes.c_int,
+            ),
+            "ep_harness": ep_harness,
+            "os": os,
+            "_loaded_library_paths": lambda: {"/safe/libnccl.so.2"},
+        }
+        exec(
+            compile(ast.Module(body=[version_function], type_ignores=[]), str(self.path), "exec"),
+            namespace,
+        )
+        self.assertEqual(namespace["_loaded_nccl_version"](), "2.30.4")
+        for paths in (set(), {"/safe/libnccl.so.2", "/other/libnccl.so.2"}):
+            namespace["_loaded_library_paths"] = lambda paths=paths: paths
+            with self.assertRaisesRegex(RuntimeError, "exactly one"):
+                namespace["_loaded_nccl_version"]()
+        evidence_function = next(
+            node for node in self.tree.body
+            if isinstance(node, ast.FunctionDef) and node.name == "_loaded_library_evidence"
+        )
+        paths = {
+            "/safe/_C.cpython-310-x86_64-linux-gnu.so",
+            "/safe/libnccl.so.2",
+            "/safe/libnvshmem_host.so.3",
+        }
+        namespace.update(
+            _loaded_library_paths=lambda: paths,
+            _sha256=lambda _path: "a" * 64,
+        )
+        exec(
+            compile(ast.Module(body=[evidence_function], type_ignores=[]), str(self.path), "exec"),
+            namespace,
+        )
+        evidence = namespace["_loaded_library_evidence"]()
+        self.assertIn(
+            {"name": "deep_ep._C", "role": "deepep-extension", "sha256": "a" * 64},
+            evidence,
+        )
+        self.assertTrue(
+            contracts._content_evidence_is_valid(
+                evidence, {"deepep-extension", "nccl", "nvshmem"}
+            )
+        )
+        self.assertNotIn("torch.cuda.nccl.version()", source)
+        fingerprint = {"runtime": "cuda", "version": "13.0"}
+        self.assertIs(
+            run_ep._common_runtime_fingerprint([fingerprint, dict(fingerprint)]),
+            fingerprint,
+        )
+        with self.assertRaises(ValueError):
+            run_ep._common_runtime_fingerprint([fingerprint, {"runtime": "cuda", "version": "12.8"}])
+
+    def test_conditioning_contract_is_exact_for_each_phase(self) -> None:
+        expected = {
+            "decode": [1, 2, 4, 8, 16, 32, 64, 128],
+            "prefill": [1, 2, 4, 8, 16, 32, 64, 128, 256, 512],
+        }
+        for phase, ladder in expected.items():
+            valid = {
+                "contract": "fixed-phase-ramp-8-roundtrips-v1",
+                "ladder": ladder,
+                "roundtrips_per_shape": 8,
+            }
+            self.assertIs(contracts.validate_conditioning_contract(valid, phase), valid)
+            for mutate in (
+                lambda item: item["ladder"].reverse(),
+                lambda item: item["ladder"].pop(),
+                lambda item: item.update(ladder=[1.0, *item["ladder"][1:]]),
+                lambda item: item.update(roundtrips_per_shape=7),
+                lambda item: item.update(roundtrips_per_shape=8.0),
+            ):
+                changed = copy.deepcopy(valid)
+                mutate(changed)
+                with self.assertRaises(contracts.ContractError):
+                    contracts.validate_conditioning_contract(changed, phase)
+            other = "prefill" if phase == "decode" else "decode"
+            with self.assertRaises(contracts.ContractError):
+                contracts.validate_conditioning_contract(valid, other)
+
+    def test_content_manifest_evidence_is_stable_and_content_sensitive(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            first, second = root / "first", root / "second"
+            first.write_bytes(b"first")
+            second.write_bytes(b"second")
+            files = [("pkg/first", first), ("pkg/second", second)]
+            evidence = contracts.content_manifest_evidence(
+                role="test-content", name="test-build", files=files,
+            )
+            self.assertNotIn(temporary, json.dumps(evidence))
+            self.assertEqual(
+                evidence,
+                contracts.content_manifest_evidence(
+                    role="test-content", name="test-build", files=reversed(files),
+                ),
+            )
+            self.assertRegex(evidence["sha256"], r"^[0-9a-f]{64}$")
+            second.write_bytes(b"changed")
+            self.assertNotEqual(
+                evidence,
+                contracts.content_manifest_evidence(
+                    role="test-content", name="test-build", files=files,
+                ),
+            )
+            for invalid in (
+                [("../first", first)],
+                [("same", first), ("same", second)],
+                [("missing", root / "missing")],
+            ):
+                with self.assertRaises(contracts.ContractError):
+                    contracts.content_manifest_evidence(
+                        role="test-content", name="test-build", files=invalid,
+                    )
+
+    def test_hybrid_realized_config_and_jit_evidence_are_path_free(self) -> None:
+        path = HERE / "ep_deepep_hybrid.py"
+        tree = ast.parse(path.read_text(), str(path))
+        selected = [
+            node for node in tree.body
+            if (
+                isinstance(node, ast.Assign)
+                and any(
+                    isinstance(target, ast.Name) and target.id == "HYBRID_CONFIG_FIELDS"
+                    for target in node.targets
+                )
+            )
+            or isinstance(node, ast.FunctionDef)
+            and node.name in {
+                "_hybrid_realized_config", "_sha256_with_size", "_hybrid_jit_evidence",
+            }
+        ]
+        namespace = {"Path": Path, "hashlib": hashlib, "re": __import__("re")}
+        exec(compile(ast.Module(body=selected, type_ignores=[]), str(path), "exec"), namespace)
+        fields = namespace["HYBRID_CONFIG_FIELDS"]
+        self.assertEqual(set(fields), contracts.HYBRID_REALIZED_CONFIG_FIELDS)
+
+        class TokenType:
+            def __init__(self, label: str, name: str | None = None) -> None:
+                self.label = label
+                if name is not None:
+                    self.name = name
+
+            def __str__(self) -> str:
+                return self.label
+
+        values = {field: 1 for field in fields}
+        values.update({field: True for field in contracts.HYBRID_REALIZED_BOOL_FIELDS})
+        for raw, expected in (("uint16_t", "UINT16"), ("uint8_t", "UINT8")):
+            values["token_data_type"] = TokenType(raw)
+            config = types.SimpleNamespace(**values)
+            realized = namespace["_hybrid_realized_config"](config)
+            self.assertEqual(realized["token_data_type"], expected)
+            self.assertEqual(set(realized), contracts.HYBRID_REALIZED_CONFIG_FIELDS)
+        values["token_data_type"] = TokenType("opaque-enum", "UINT16")
+        self.assertEqual(
+            namespace["_hybrid_realized_config"](types.SimpleNamespace(**values))[
+                "token_data_type"
+            ],
+            "UINT16",
+        )
+        values["token_data_type"] = TokenType("UINT16")
+        with self.assertRaisesRegex(RuntimeError, "token_data_type is invalid"):
+            namespace["_hybrid_realized_config"](types.SimpleNamespace(**values))
+        values["token_data_type"] = TokenType("uint16_t")
+        config = types.SimpleNamespace(**values)
+        delattr(config, "hidden_dim")
+        with self.assertRaisesRegex(RuntimeError, "omits hidden_dim"):
+            namespace["_hybrid_realized_config"](config)
+
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            for key, payload in (
+                ("preprocess-key", b"pre"),
+                ("combine-key", b"combine"),
+                ("dispatch-key", b"dispatch"),
+            ):
+                (root / f"{key}.so").write_bytes(payload)
+            evidence = namespace["_hybrid_jit_evidence"](root)
+            self.assertEqual(
+                [item["kernel_key"] for item in evidence],
+                ["combine-key", "dispatch-key", "preprocess-key"],
+            )
+            self.assertNotIn(temporary, json.dumps(evidence))
+            (root / "dispatch-key.so").write_bytes(b"changed")
+            self.assertNotEqual(evidence, namespace["_hybrid_jit_evidence"](root))
+            (root / "extra-key.so").write_bytes(b"extra")
+            with self.assertRaisesRegex(RuntimeError, "expected 3"):
+                namespace["_hybrid_jit_evidence"](root)
+            (root / "extra-key.so").unlink()
+            (root / "bad key.so").write_bytes(b"bad")
+            with self.assertRaisesRegex(RuntimeError, "kernel key"):
+                namespace["_hybrid_jit_evidence"](root)
+            (root / "bad key.so").unlink()
+            (root / "combine-key.so").unlink()
+            (root / "combine-key.so").symlink_to(root / "dispatch-key.so")
+            with self.assertRaisesRegex(RuntimeError, "regular file"):
+                namespace["_hybrid_jit_evidence"](root)
+            empty = root / "empty"
+            empty.mkdir()
+            with self.assertRaisesRegex(RuntimeError, "expected 3"):
+                namespace["_hybrid_jit_evidence"](empty)
+
+    def test_hybrid_deferred_provenance_wraps_before_conditioning_and_recaptures(self) -> None:
+        path = HERE / "ep_deepep_hybrid.py"
+        source = path.read_text()
+        tree = ast.parse(source, str(path))
+        backend = next(
+            node for node in tree.body
+            if isinstance(node, ast.ClassDef) and node.name == "DeepEPHybridBackend"
+        )
+        methods = {node.name for node in backend.body if isinstance(node, ast.FunctionDef)}
+        self.assertIn("capture_deferred_provenance", methods)
+        constructor = next(node for node in backend.body if isinstance(node, ast.FunctionDef) and node.name == "__init__")
+        buffer_call = next(
+            node for node in ast.walk(constructor)
+            if isinstance(node, ast.Call) and isinstance(node.func, ast.Name)
+            and node.func.id == "HybridEPBuffer"
+        )
+        wrapper_install = next(
+            node for node in ast.walk(constructor)
+            if isinstance(node, ast.Assign)
+            and any(
+                isinstance(target, ast.Attribute)
+                and target.attr == "update_template_config"
+                for target in node.targets
+            )
+        )
+        cache_line = source[:source.index('os.environ["HYBRID_EP_CACHE_DIR"]')].count("\n") + 1
+        self.assertLess(cache_line, buffer_call.lineno)
+        self.assertLess(buffer_call.lineno, wrapper_install.lineno)
+
+        capture = next(
+            node for node in backend.body
+            if isinstance(node, ast.FunctionDef) and node.name == "capture_deferred_provenance"
+        )
+        called = {
+            node.func.id if isinstance(node.func, ast.Name) else node.func.attr
+            for node in ast.walk(capture) if isinstance(node, ast.Call)
+            and isinstance(node.func, (ast.Name, ast.Attribute))
+        }
+        self.assertTrue({"_hybrid_jit_evidence", "_require_cross_rank_equal", "all_gather_object"} <= called)
+        self.assertIn("changed after measurement", ast.get_source_segment(source, capture))
+
+        artifacts = [[
+            {"bytes": 1, "kernel_key": key, "sha256": digit * 64}
+            for key, digit in (("a", "1"), ("b", "2"), ("c", "3"))
+        ]]
+
+        class FakeCuda:
+            @staticmethod
+            def synchronize() -> None:
+                return None
+
+        class FakeDist:
+            @staticmethod
+            def barrier() -> None:
+                return None
+
+            @staticmethod
+            def get_world_size() -> int:
+                return 2
+
+            @staticmethod
+            def all_gather_object(output, value) -> None:
+                output[:] = [copy.deepcopy(value), copy.deepcopy(value)]
+
+        namespace = {
+            "torch": types.SimpleNamespace(cuda=FakeCuda),
+            "dist": FakeDist,
+            "_hybrid_jit_evidence": lambda _root: copy.deepcopy(artifacts[0]),
+            "_require_cross_rank_equal": lambda _value, _label: None,
+        }
+        exec(compile(ast.Module(body=[capture], type_ignores=[]), str(path), "exec"), namespace)
+        state = types.SimpleNamespace(
+            _deferred_jit_diagnostics=None,
+            _deferred_semantic_snapshot=None,
+            _jit_root=Path("private-cache"),
+            _realized_config=hybrid_realized_config(),
+            backend_provenance={},
+        )
+        namespace["capture_deferred_provenance"](state)
+        artifacts[0][0]["kernel_key"] = "changed"
+        with self.assertRaisesRegex(RuntimeError, "kernel set changed"):
+            namespace["capture_deferred_provenance"](state)
+        artifacts[0][0]["kernel_key"] = "a"
+        artifacts[0][0]["sha256"] = "f" * 64
+        with self.assertRaisesRegex(RuntimeError, "artifacts changed"):
+            namespace["capture_deferred_provenance"](state)
+
+        harness = (HERE / "ep_harness.py").read_text()
+        captures = [
+            index for index in range(len(harness))
+            if harness.startswith("capture_deferred_provenance()", index)
+        ]
+        self.assertEqual(len(captures), 2)
+        self.assertLess(harness.index("for wt in conditioning_ladder:"), captures[0])
+        self.assertLess(captures[0], harness.index("oracle = _run_expert_oracle("))
+        self.assertLess(harness.index("trace_sig = hashlib.sha256"), captures[1])
+
+    def test_hybrid_diagnostic_hashes_do_not_split_series_identity(self) -> None:
+        keys, artifacts = hybrid_jit_provenance()
+        provenance = {
+            "deepep_tree": "b" * 40,
+            "jit_kernel_keys": keys,
+            "jit_shared_objects": artifacts,
+            "loaded_libraries": [{
+                "name": "hybrid_ep_cpp", "role": "deepep-hybrid-extension",
+                "sha256": "a" * 64,
+            }],
+            "realized_config": hybrid_realized_config(),
+        }
+        baseline = ep_harness._series_provenance(provenance)
+        changed = copy.deepcopy(provenance)
+        changed["jit_shared_objects"][0]["rank_artifacts"][0]["sha256"] = "f" * 64
+        self.assertEqual(ep_harness._series_provenance(changed), baseline)
+        changed = copy.deepcopy(provenance)
+        changed["loaded_libraries"][0]["sha256"] = "f" * 64
+        self.assertEqual(ep_harness._series_provenance(changed), baseline)
+        changed = copy.deepcopy(provenance)
+        changed["jit_kernel_keys"][0] = "changed-key"
+        self.assertNotEqual(ep_harness._series_provenance(changed), baseline)
+        changed = copy.deepcopy(provenance)
+        changed["realized_config"]["num_of_blocks_dispatch_api"] += 1
+        self.assertNotEqual(ep_harness._series_provenance(changed), baseline)
+        changed = copy.deepcopy(provenance)
+        changed["deepep_tree"] = "c" * 40
+        self.assertNotEqual(ep_harness._series_provenance(changed), baseline)
+
+    def test_v2_series_identity_uses_source_and_sass_not_container_metadata(self) -> None:
+        provenance = {
+            "deepep_tree": "a" * 40,
+            "loaded_libraries": [
+                {"name": "deep_ep._C.so", "role": "deepep-extension", "sha256": "1" * 64},
+                {"name": "libnccl.so.2", "role": "nccl", "sha256": "2" * 64},
+            ],
+            "jit_cubins": deepep_v2_jit_provenance(),
+            "jit_random_seed": "collectivex-deepep-v2-fa8a9b1",
+        }
+        baseline = contracts.series_provenance(provenance)
+        changed = copy.deepcopy(provenance)
+        changed["loaded_libraries"][0]["sha256"] = "f" * 64
+        changed["jit_cubins"][0]["cubin_sha256"] = "e" * 64
+        self.assertEqual(contracts.series_provenance(changed), baseline)
+        for mutate in (
+            lambda item: item["loaded_libraries"][1].update(sha256="f" * 64),
+            lambda item: item["jit_cubins"][0].update(source_sha256="f" * 64),
+            lambda item: item["jit_cubins"][0].update(sass_sha256="f" * 64),
+            lambda item: item.update(deepep_tree="f" * 40),
+        ):
+            changed = copy.deepcopy(provenance)
+            mutate(changed)
+            self.assertNotEqual(contracts.series_provenance(changed), baseline)
+
+    def test_mnnvl_resolution_has_no_ambiguous_signature_fallback(self) -> None:
+        self.assertEqual(
+            contracts.resolve_deepep_mnnvl(
+                requested=False, signature_parameters=(), deepep_commit=None,
+            ),
+            ({}, "not-requested"),
+        )
+        self.assertEqual(
+            contracts.resolve_deepep_mnnvl(
+                requested=True, signature_parameters=("allow_mnnvl",),
+                deepep_commit="a" * 40,
+            ),
+            ({"allow_mnnvl": True}, "explicit-allow-mnnvl"),
+        )
+        with self.assertRaises(contracts.ContractError):
+            contracts.resolve_deepep_mnnvl(
+                requested=True, signature_parameters=(),
+                deepep_commit="814e508537c6ffc775d59f6f1b9ba43f3a65968c",
+            )
+
+    def test_backend_provenance_requires_lineage_and_content_hashes(self) -> None:
+        def record(role: str, name: str, digit: str) -> dict[str, str]:
+            return {"role": role, "name": name, "sha256": digit * 64}
+
+        hybrid_keys, hybrid_artifacts = hybrid_jit_provenance()
+        v2 = {
+            **contracts.DEEPEP_V2_V1_PROVENANCE,
+            "api_signature_sha256": "c" * 64,
+            "loaded_libraries": [
+                record("deepep-extension", "deep_ep._C", "1"),
+                record("nccl", "libnccl.so.2", "2"),
+                record("nvshmem", "libnvshmem_host.so.3", "3"),
+            ],
+            "jit_cubins": deepep_v2_jit_provenance(),
+            "jit_random_seed": "collectivex-deepep-v2-fa8a9b1",
+            "deterministic": False,
+            "num_experts": 256,
+            "tuning_num_experts": 256,
+        }
+        deepep = {
+            "deepep_version": "1.1.0", "deepep_commit": "a" * 40,
+            "backend_lineage": "deepep-v1", "allow_mnnvl": False,
+            "mnnvl_comm": "not-requested",
+        }
+        hybrid = {
+            "deepep_commit": "a" * 40, "deepep_tree": "b" * 40,
+            "branch": "hybrid-ep", "backend_lineage": "deepep-hybrid",
+            "loaded_libraries": [
+                record("deepep-extension", "deep_ep_cpp", "1"),
+                record("deepep-hybrid-extension", "hybrid_ep_cpp", "2"),
+            ],
+            "jit_kernel_keys": hybrid_keys,
+            "jit_shared_objects": hybrid_artifacts,
+            "realized_config": hybrid_realized_config(),
+        }
+        uccl = {
+            "uccl_version": "0.1.1", "uccl_commit": "pkg-0.1.1",
+            "uccl_wrapper_commit": "c" * 40, "backend_lineage": "uccl",
+            "uccl_dependency_versions": dict(contracts.UCCL_DEPENDENCY_VERSIONS),
+            "loaded_libraries": [
+                record("uccl-distribution", "uccl-0.1.1", "3"),
+                record("uccl-wrapper", "uccl-deepep-wrapper", "4"),
+                record("intervaltree-distribution", "intervaltree-3.1.0", "5"),
+                record("sortedcontainers-distribution", "sortedcontainers-2.4.0", "6"),
+                record("cuda-runtime", "nvidia-cuda-runtime-cu12-12.9.79", "7"),
+            ],
+        }
+        reference = {
+            "nccl_version": "2.30.4", "collective_library": "nccl",
+            "backend_lineage": "nccl",
+        }
+        for backend, provenance in (
+            ("deepep", deepep), ("deepep-v2", v2), ("deepep-hybrid", hybrid),
+            ("uccl", uccl), ("nccl-ep", reference),
+        ):
+            self.assertEqual(contracts.backend_provenance_issues(backend, provenance), [])
+            changed = copy.deepcopy(provenance)
+            if "loaded_libraries" in changed:
+                changed["loaded_libraries"][0]["sha256"] = "invalid"
+                expected = "loaded_libraries"
+            else:
+                changed["backend_lineage"] = "wrong"
+                expected = "backend_lineage"
+            self.assertIn(expected, contracts.backend_provenance_issues(backend, changed))
+
+        changed = copy.deepcopy(uccl)
+        changed["uccl_dependency_versions"]["intervaltree"] = "3.2.0"
+        self.assertIn(
+            "uccl_dependency_versions",
+            contracts.backend_provenance_issues("uccl", changed),
+        )
+        changed = copy.deepcopy(uccl)
+        changed["loaded_libraries"] = [
+            item
+            for item in changed["loaded_libraries"]
+            if item["role"] != "sortedcontainers-distribution"
+        ]
+        self.assertIn(
+            "loaded_libraries", contracts.backend_provenance_issues("uccl", changed)
+        )
+
+        for field, mutate in (
+            ("realized_config", lambda item: item["realized_config"].pop("hidden_dim")),
+            ("jit_kernel_keys", lambda item: item["jit_kernel_keys"].reverse()),
+            (
+                "jit_shared_objects",
+                lambda item: item["jit_shared_objects"][0]["rank_artifacts"][0].update(
+                    sha256="invalid"
+                ),
+            ),
+        ):
+            with self.subTest(hybrid_field=field):
+                changed = copy.deepcopy(hybrid)
+                mutate(changed)
+                self.assertIn(
+                    field,
+                    contracts.backend_provenance_issues("deepep-hybrid", changed),
+                )
+
+        for field, value in (
+            ("jit_cubins", [{"cache_key": "invalid", "cubin_sha256": "4" * 64}]),
+            ("jit_random_seed", "different-seed"),
+        ):
+            with self.subTest(v2_field=field):
+                changed = copy.deepcopy(v2)
+                changed[field] = value
+                self.assertIn(
+                    field,
+                    contracts.backend_provenance_issues("deepep-v2", changed),
+                )
+
+        changed = copy.deepcopy(v2)
+        changed["gin_enabled"] = True
+        self.assertIn("gin_enabled", contracts.backend_provenance_issues("deepep-v2", changed))
+        changed = copy.deepcopy(v2)
+        changed["communication_backend"] = "nccl-gin"
+        self.assertIn(
+            "communication_backend", contracts.backend_provenance_issues("deepep-v2", changed)
+        )
+        changed = copy.deepcopy(v2)
+        changed.update(
+            allow_hybrid_mode=True,
+            gin_enabled=True,
+            communication_backend="nccl-gin",
+        )
+        self.assertEqual(
+            contracts.backend_provenance_issues("deepep-v2", changed),
+            ["allow_hybrid_mode", "communication_backend", "gin_enabled"],
+        )
+        for field, expected in contracts.DEEPEP_V2_V1_PROVENANCE.items():
+            with self.subTest(v2_pin_field=field):
+                changed = copy.deepcopy(v2)
+                changed[field] = not expected if type(expected) is bool else "wrong"
+                self.assertIn(
+                    field,
+                    contracts.backend_provenance_issues("deepep-v2", changed),
+                )
+
+        schema = json.loads((ROOT / "schemas" / "raw-case-v1.schema.json").read_text())
+        provenance_schema = schema["properties"]["implementation"]["properties"]["provenance"]
+        self.assertEqual(
+            provenance_schema["properties"]["realized_config"],
+            {"$ref": "#/$defs/hybrid_realized_config"},
+        )
+        self.assertFalse(schema["$defs"]["hybrid_realized_config"]["additionalProperties"])
+        self.assertEqual(provenance_schema["properties"]["jit_kernel_keys"]["minItems"], 3)
+        self.assertEqual(provenance_schema["properties"]["jit_shared_objects"]["minItems"], 3)
+
+        self.assertEqual(contracts.collective_kernel_generation("nccl"), "nccl")
+        self.assertEqual(contracts.collective_kernel_generation("rccl"), "rccl")
+        with self.assertRaises(contracts.ContractError):
+            contracts.collective_kernel_generation("unknown")
+
+    def test_routing_control_binds_binary_but_allows_treatment_configuration(self) -> None:
+        hybrid_keys, hybrid_artifacts = hybrid_jit_provenance()
+        implementation = {
+            "kernel_generation": "hybrid",
+            "name": "deepep-hybrid",
+            "provenance": {
+                "deepep_tree": "a" * 40,
+                "loaded_libraries": [{
+                    "role": "deepep-extension", "name": "deep_ep_cpp", "sha256": "1" * 64,
+                }],
+                "local_experts": 32,
+                "num_experts": 256,
+                "num_sms": 24,
+                "jit_cache_key": "case-one",
+                "jit_cubins": [{"cache_key": "one", "cubin_sha256": "2" * 64}],
+                "jit_kernel_keys": hybrid_keys,
+                "jit_shared_objects": hybrid_artifacts,
+                "realized_config": hybrid_realized_config(),
+            },
+            "resource_profile": {"configured_units": 24},
+        }
+        baseline = contracts.routing_implementation_control_sha256(implementation)
+        treatment = copy.deepcopy(implementation)
+        treatment["provenance"].update({
+            "local_experts": 36,
+            "num_experts": 288,
+            "jit_cache_key": "case-two",
+            "jit_cubins": [{"cache_key": "two", "cubin_sha256": "3" * 64}],
+            "jit_kernel_keys": ["changed-a", "changed-b", "changed-c"],
+            "jit_shared_objects": hybrid_jit_provenance(3)[1],
+            "realized_config": {
+                **hybrid_realized_config(),
+                "num_of_experts_per_rank": 36,
+            },
+        })
+        self.assertEqual(
+            contracts.routing_implementation_control_sha256(treatment), baseline
+        )
+        changed = copy.deepcopy(implementation)
+        changed["provenance"]["loaded_libraries"][0]["sha256"] = "4" * 64
+        self.assertEqual(
+            contracts.routing_implementation_control_sha256(changed), baseline
+        )
+        changed = copy.deepcopy(implementation)
+        changed["provenance"]["deepep_tree"] = "b" * 40
+        self.assertNotEqual(
+            contracts.routing_implementation_control_sha256(changed), baseline
+        )
+        changed = copy.deepcopy(implementation)
+        changed["provenance"]["num_sms"] = 20
+        self.assertNotEqual(
+            contracts.routing_implementation_control_sha256(changed), baseline
+        )
+
+    def test_runtime_pins_uccl_wheel_and_hybrid_source_tree(self) -> None:
+        runtime = (ROOT / "runtime" / "run_in_container.sh").read_text()
+        common = (ROOT / "runtime" / "common.sh").read_text()
+        self.assertIn("cd /ix/experimental/CollectiveX", runtime)
+        for launcher_name in ("launch_single-slurm.sh", "launch_gb-nv.sh"):
+            launcher = (ROOT / "launchers" / launcher_name).read_text()
+            self.assertIn("$MOUNT_SRC:/ix", launcher)
+            self.assertIn("cx_prepare_backend_cache", launcher)
+            self.assertNotIn('$(cx_prepare_backend_cache', launcher)
+            self.assertIn('BACKEND_CACHE="$CX_PREPARED_BACKEND_CACHE"', launcher)
+            self.assertIn("$BACKEND_CACHE:/cx-cache", launcher)
+            self.assertIn("CX_BACKEND_CACHE_ROOT=/cx-cache", launcher)
+            self.assertIn("CX_BACKEND_SOURCE_ROOT=/ix/experimental/CollectiveX/.cx_sources", launcher)
+            self.assertIn('|| [ "$CX_BENCH" = deepep-hybrid ]', launcher)
+            self.assertIn("cx_prepare_backend_source", launcher)
+            cache_block = launcher[launcher.index('if [ "$CX_BENCH" = deepep-v2 ]'):]
+            self.assertLess(
+                cache_block.index("cx_set_failure_stage backend-setup"),
+                cache_block.index("cx_prepare_backend_cache"),
+            )
+            self.assertLess(
+                cache_block.index("cx_prepare_backend_source"),
+                cache_block.index("cx_set_failure_stage scheduler-allocation"),
+            )
+        self.assertIn("--frandom-seed=$seed", runtime)
+        self.assertIn("DEEPEP_V2_JIT_RANDOM_SEED", runtime)
+        persisted = runtime[runtime.index("cx_persist_backend_env()") :]
+        self.assertIn("CUDA_HOME CPATH NVCC_PREPEND_FLAGS", persisted)
+        self.assertIn(
+            "390c1320918972206546e44d79b132988f2818ec07e23afcd0595f7183916cec",
+            runtime,
+        )
+        self.assertIn("--require-hashes", runtime)
+        self.assertIn("d77aeab7f1bb52b615666fe178d26ced41fae08e", common)
+        self.assertIn("HEAD^{tree}", runtime)
+        self.assertIn("$PWD/.cx_backend/deepep-hybrid-", runtime)
+        self.assertIn("cx_materialize_backend_source deepep-hybrid", runtime)
+        self.assertIn("cx_materialize_backend_source deepep-v2", runtime)
+        self.assertIn("cx_deepep_hybrid_marker_content_sha256", runtime)
+        self.assertIn("cx_deepep_hybrid_cache_is_valid", runtime)
+        self.assertIn("cx_extension_pair_sha256", runtime)
+        self.assertIn(".collectivex-complete.tmp.", runtime)
+        self.assertNotIn("cx_fetch_revision", runtime)
+        self.assertIn("cx_fetch_revision", common)
+        self.assertIn("third-party/fmt", common)
+        hybrid = runtime[
+            runtime.index("cx_build_deepep_hybrid()"):
+            runtime.index("# UCCL EP")
+        ]
+        self.assertIn("cx_prepare_cuda_cccl", hybrid)
+        self.assertIn("unset NVSHMEM_DIR HYBRID_EP_MULTINODE USE_NIXL", hybrid)
+        self.assertNotIn("cx_prepare_deepep_toolchain", hybrid)
+        toolchain = runtime[
+            runtime.index("cx_prepare_deepep_toolchain()"):
+            runtime.index("cx_probe_deepep()")
+        ]
+        self.assertIn('overlay="$root/nvshmem-overlay"', toolchain)
+        self.assertIn("flock 8 || exit 1", toolchain)
+        self.assertIn('mv "$temporary" "$overlay" || exit 1', toolchain)
+        self.assertNotIn("/tmp/collectivex-nvshmem", toolchain)
+        jit = runtime[
+            runtime.index("cx_enable_deepep_v2_jit_reproducibility()"):
+            runtime.index("cx_probe_deepep_v2()")
+        ]
+        self.assertIn('cccl="${CX_CUDA_CCCL:-}"', jit)
+        self.assertNotIn("/usr/local/cuda*", jit)
+        self.assertIn("deepep-v2-cache-v2|$cpu|sm${arch/./}", runtime)
+        self.assertNotIn("deepep-v2-cache-v1|", runtime)
+        self.assertIn('base="${CX_BACKEND_CACHE_ROOT:-}"', runtime)
+        self.assertNotIn("${CX_BACKEND_CACHE_ROOT:-$PWD/.cx_backend}", runtime)
+        self.assertIn(
+            "recipe=aot-persistent-nvshmem-active-cuda-maxjobs16-v2", runtime
+        )
+        self.assertNotIn("recipe=aot-source-date-epoch-arch-maxjobs16-v1", runtime)
+        self.assertNotIn("recipe=$source_sha", runtime)
+        self.assertIn("pip=26.1.2|setuptools=82.0.1|wheel=0.47.0|ninja=1.13.0", runtime)
+        self.assertIn("manual-unverified", runtime)
+        self.assertIn("cx_deepep_v2_content_sha256", runtime)
+        self.assertIn("DeepEP V2 cache validation failed", runtime)
+        probe = runtime[
+            runtime.index("cx_probe_deepep_v2()"):
+            runtime.index("cx_deepep_v2_content_sha256()")
+        ]
+        self.assertNotIn("torch.cuda.nccl.version", probe)
+        self.assertIn("ncclGetVersion", probe)
+        self.assertIn("runtime_version.value == 23004", probe)
+        self.assertIn("cx_nvidia_package_root nvidia-nccl-cu13 nccl", runtime)
+        self.assertIn("cx_nvidia_package_root nvidia-nvshmem-cu12 nvshmem", runtime)
+        self.assertNotIn("import os,nvidia.nccl", runtime)
+        self.assertNotIn("import os,nvidia.nvshmem", runtime)
+        self.assertIn(
+            'export EP_JIT_CACHE_DIR="$stage_root/.cx_backend/deepep-v2-jit"', runtime
+        )
+        self.assertIn('stage_root="${CX_BACKEND_SOURCE_ROOT%/.cx_sources}"', runtime)
+        self.assertNotIn('export EP_JIT_CACHE_DIR="$root/jit"', runtime)
+        self.assertIn('EP_NVSHMEM_ROOT_DIR="$NVSHMEM_DIR"', runtime)
+        reference = (HERE / "ep_nccl.py").read_text()
+        self.assertIn("self.kernel_generation = contracts.collective_kernel_generation", reference)
+
+    def test_deepep_v2_cache_recovers_from_an_unpublished_partial_build(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            cache_key = "a" * 64
+            content_hash = "b" * 64
+            root = Path(temporary) / f"deepep-v2-{cache_key}"
+            root.mkdir(mode=0o700)
+            marker = root / ".collectivex-complete"
+            stale = root / "stale-partial-build"
+            stale.write_text("partial\n")
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_build_deepep_v2()/,/^}/p' "$1")"
+              cache_root="$2"; expected_revision="$3"; expected_tree="$4"; expected_fmt="$5"
+              expected_content="$6"
+              cx_log() { :; }
+              cx_verify_backend_cache_mount() { return 0; }
+              cx_cuda_arch() { printf '9.0'; }
+              cx_deepep_v2_root() { printf '%s' "$cache_root"; }
+              cx_activate_deepep_v2() { export DEEPEP_V2_COMMIT="$expected_revision"; }
+              cx_prepare_deepep_toolchain() { export NVSHMEM_DIR=/tmp/cx-test-nvshmem; }
+              cx_probe_deepep_v2() { return 0; }
+              cx_deepep_v2_content_sha256() { printf '%s' "$expected_content"; }
+              cx_deepep_v2_cache_is_valid() {
+                test -f "$2" && test "$(wc -l < "$2" | tr -d ' ')" = 5
+              }
+              cx_enable_deepep_v2_jit_reproducibility() { return 0; }
+              cx_materialize_backend_source() { mkdir -p "$2/third-party/fmt"; }
+              flock() { return 0; }
+              python3() {
+                if [ "${1:-}" = -m ] && [ "${2:-}" = venv ]; then
+                  mkdir -p "$3/bin"
+                  printf '#!/bin/sh\nexit 0\n' > "$3/bin/python"
+                  chmod 700 "$3/bin/python"
+                fi
+                return 0
+              }
+              git() {
+                case " $* " in
+                  *' third-party/fmt rev-parse HEAD '*) printf '%s\n' "$expected_fmt" ;;
+                  *' rev-parse HEAD^{tree} '*) printf '%s\n' "$expected_tree" ;;
+                  *' show -s --format=%ct HEAD '*) printf '1\n' ;;
+                  *) return 0 ;;
+                esac
+              }
+              cx_git_in_tree() { shift; git "$@"; }
+              cx_build_deepep_v2
+            '''
+            subprocess.run(
+                [
+                    "bash", "-c", command, "_", str(runtime), str(root),
+                    COMMIT, TREE, FMT_COMMIT, content_hash,
+                ],
+                check=True,
+            )
+            self.assertFalse(stale.exists())
+            self.assertEqual(
+                marker.read_text(),
+                f"{COMMIT}\n{TREE}\n{FMT_COMMIT}\n{cache_key}\n{content_hash}\n",
+            )
+            self.assertEqual(list(root.glob(".collectivex-complete.tmp.*")), [])
+
+    def test_deepep_v2_published_cache_is_never_deleted_after_probe_failure(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            cache_key = "a" * 64
+            root = Path(temporary) / f"deepep-v2-{cache_key}"
+            root.mkdir(mode=0o700)
+            marker = root / ".collectivex-complete"
+            marker.write_text("published\n")
+            sentinel = root / "active-reader"
+            sentinel.write_text("active\n")
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_build_deepep_v2()/,/^}/p' "$1")"
+              cache_root="$2"
+              cx_log() { :; }
+              cx_verify_backend_cache_mount() { return 0; }
+              cx_cuda_arch() { printf '9.0'; }
+              cx_deepep_v2_root() { printf '%s' "$cache_root"; }
+              cx_deepep_v2_cache_is_valid() { return 0; }
+              cx_activate_deepep_v2() { return 0; }
+              cx_prepare_deepep_toolchain() { return 0; }
+              cx_enable_deepep_v2_jit_reproducibility() { return 0; }
+              cx_probe_deepep_v2() { return 1; }
+              ! cx_build_deepep_v2
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(runtime), str(root)],
+                check=True,
+            )
+            self.assertEqual(sentinel.read_text(), "active\n")
+            self.assertEqual(marker.read_text(), "published\n")
+
+    def test_deepep_v2_corrupt_published_cache_fails_without_reset(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            cache_key = "a" * 64
+            root = Path(temporary) / f"deepep-v2-{cache_key}"
+            root.mkdir(mode=0o700)
+            marker = root / ".collectivex-complete"
+            marker.write_text("corrupt\n")
+            sentinel = root / "active-reader"
+            sentinel.write_text("active\n")
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_build_deepep_v2()/,/^}/p' "$1")"
+              cache_root="$2"
+              cx_log() { :; }
+              cx_verify_backend_cache_mount() { return 0; }
+              cx_cuda_arch() { printf '9.0'; }
+              cx_deepep_v2_root() { printf '%s' "$cache_root"; }
+              cx_deepep_v2_cache_is_valid() { return 1; }
+              flock() { return 0; }
+              ! cx_build_deepep_v2
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(runtime), str(root)],
+                check=True,
+            )
+            self.assertEqual(sentinel.read_text(), "active\n")
+            self.assertEqual(marker.read_text(), "corrupt\n")
+
+    def test_deepep_v2_marker_requires_private_owned_cache_objects(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary) / "cache"
+            root.mkdir(mode=0o700)
+            (root / "source").mkdir(mode=0o700)
+            (root / "venv").mkdir(mode=0o700)
+            marker = root / ".collectivex-complete"
+            cache_key = "a" * 64
+            content_hash = "b" * 64
+            marker.write_text(
+                f"{COMMIT}\n{TREE}\n{FMT_COMMIT}\n{cache_key}\n{content_hash}\n"
+            )
+            root.chmod(0o2700)
+            marker.chmod(0o600)
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_deepep_v2_marker_content_sha256()/,/^}/p' "$1")"
+              cx_deepep_v2_marker_content_sha256 "$2" "$3" "$4" "$5" "$6" "$7"
+            '''
+            args = [
+                "bash", "-c", command, "_", str(runtime), str(root), str(marker),
+                COMMIT, TREE, FMT_COMMIT, cache_key,
+            ]
+            valid = subprocess.run(args, text=True, capture_output=True, check=True)
+            self.assertEqual(valid.stdout, content_hash)
+            marker.chmod(0o644)
+            self.assertNotEqual(subprocess.run(args).returncode, 0)
+
+    def test_deepep_hybrid_marker_requires_a_private_regular_file(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary) / "cache"
+            root.mkdir(mode=0o700)
+            marker = root / ".collectivex-complete"
+            content_hash = "b" * 64
+            marker.write_text(f"{COMMIT}\n{TREE}\n{content_hash}\n")
+            root.chmod(0o2700)
+            marker.chmod(0o600)
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_deepep_hybrid_marker_content_sha256()/,/^}/p' "$1")"
+              cx_deepep_hybrid_marker_content_sha256 "$2" "$3" "$4" "$5"
+            '''
+            args = [
+                "bash", "-c", command, "_", str(runtime), str(root), str(marker),
+                COMMIT, TREE,
+            ]
+            valid = subprocess.run(args, text=True, capture_output=True, check=True)
+            self.assertEqual(valid.stdout, content_hash)
+            marker_contract = runtime.read_text()
+            marker_contract = marker_contract[
+                marker_contract.index("cx_deepep_hybrid_marker_content_sha256()"):
+                marker_contract.index("cx_deepep_hybrid_cache_is_valid()")
+            ]
+            self.assertIn("marker_item.st_uid != root_item.st_uid", marker_contract)
+            self.assertNotIn("st_uid != os.getuid()", marker_contract)
+            marker.chmod(0o644)
+            self.assertNotEqual(subprocess.run(args).returncode, 0)
+
+    def test_deepep_v2_installed_content_digest_binds_every_distribution_file(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            site = Path(temporary) / "venv" / "lib" / "python3.11" / "site-packages"
+            package = site / "deep_ep"
+            info = site / "deep_ep-2.0.0.dist-info"
+            package.mkdir(parents=True)
+            info.mkdir()
+            (package / "__init__.py").write_text("__version__ = '2.0.0'\n")
+            extension = package / "_C.so"
+            extension.write_bytes(b"extension-one")
+            (info / "METADATA").write_text(
+                "Metadata-Version: 2.1\nName: deep_ep\nVersion: 2.0.0\n"
+            )
+            (info / "RECORD").write_text(
+                "deep_ep/__init__.py,,\n"
+                "deep_ep/_C.so,,\n"
+                "deep_ep-2.0.0.dist-info/METADATA,,\n"
+                "deep_ep-2.0.0.dist-info/RECORD,,\n"
+            )
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_deepep_v2_content_sha256()/,/^}/p' "$1")"
+              cx_deepep_v2_content_sha256
+            '''
+            env = {
+                **os.environ,
+                "PYTHONPATH": str(site),
+                "VIRTUAL_ENV": str(Path(temporary) / "venv"),
+            }
+            first = subprocess.run(
+                ["bash", "-c", command, "_", str(runtime)],
+                text=True, capture_output=True, check=True, env=env,
+            ).stdout
+            extension.write_bytes(b"extension-two")
+            second = subprocess.run(
+                ["bash", "-c", command, "_", str(runtime)],
+                text=True, capture_output=True, check=True, env=env,
+            ).stdout
+            self.assertRegex(first, r"^[0-9a-f]{64}$")
+            self.assertRegex(second, r"^[0-9a-f]{64}$")
+            self.assertNotEqual(first, second)
+            extension.unlink()
+            outside = Path(temporary) / "outside.so"
+            outside.write_bytes(b"outside")
+            extension.symlink_to(outside)
+            self.assertNotEqual(
+                subprocess.run(
+                    ["bash", "-c", command, "_", str(runtime)], env=env,
+                ).returncode,
+                0,
+            )
+
+    def test_uccl_content_identity_excludes_install_generated_files(self) -> None:
+        keep = load_uccl_function(
+            "_is_uccl_runtime_payload", {"PurePosixPath": PurePosixPath}
+        )
+        self.assertTrue(keep("uccl/ep.abi3.so"))
+        self.assertTrue(keep("uccl.libs/libnuma.so"))
+        self.assertFalse(keep("uccl/__pycache__/collective.cpython-312.pyc"))
+        self.assertFalse(keep("uccl-0.1.1.dist-info/RECORD"))
+
+    def test_uccl_dependency_versions_are_exact(self) -> None:
+        installed = dict(contracts.UCCL_DEPENDENCY_VERSIONS)
+        dependency_versions = load_uccl_function(
+            "_uccl_dependency_versions",
+            {
+                "contracts": contracts,
+                "metadata": types.SimpleNamespace(
+                    version=lambda package: installed[package]
+                ),
+            },
+        )
+        self.assertEqual(dependency_versions(), contracts.UCCL_DEPENDENCY_VERSIONS)
+        installed["intervaltree"] = "3.2.0"
+        with self.assertRaisesRegex(RuntimeError, "differ from the v1 contract"):
+            dependency_versions()
+
+        schema = json.loads((ROOT / "schemas" / "raw-case-v1.schema.json").read_text())
+        dependency_schema = schema["properties"]["implementation"]["properties"][
+            "provenance"
+        ]["properties"]["uccl_dependency_versions"]
+        self.assertFalse(dependency_schema["additionalProperties"])
+        self.assertEqual(
+            {
+                package: definition["const"]
+                for package, definition in dependency_schema["properties"].items()
+            },
+            contracts.UCCL_DEPENDENCY_VERSIONS,
+        )
+
+    def test_uccl_support_dependency_content_is_path_free(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            source_entry = PurePosixPath("intervaltree/__init__.py")
+            cache_entry = PurePosixPath("intervaltree/__pycache__/__init__.pyc")
+            metadata_entry = PurePosixPath("intervaltree-3.1.0.dist-info/RECORD")
+            for entry in (source_entry, cache_entry, metadata_entry):
+                path = root / entry
+                path.parent.mkdir(parents=True, exist_ok=True)
+                path.write_bytes(entry.as_posix().encode())
+            distribution = types.SimpleNamespace(
+                files=[source_entry, cache_entry, metadata_entry],
+                locate_file=lambda item: root / item,
+            )
+            evidence_for = load_uccl_function(
+                "_python_dependency_evidence",
+                {
+                    "Path": Path,
+                    "PurePosixPath": PurePosixPath,
+                    "contracts": contracts,
+                    "metadata": types.SimpleNamespace(
+                        distribution=lambda package: distribution
+                    ),
+                },
+            )
+            evidence = evidence_for("intervaltree", "3.1.0")
+            self.assertEqual(
+                evidence,
+                contracts.content_manifest_evidence(
+                    role="intervaltree-distribution",
+                    name="intervaltree-3.1.0",
+                    files=[(source_entry.as_posix(), root / source_entry)],
+                ),
+            )
+            self.assertNotIn(str(root), json.dumps(evidence))
+
+    def test_uccl_hashes_the_mapped_pinned_libcudart_without_exposing_paths(
+        self,
+    ) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            entry = PurePosixPath("nvidia/cuda_runtime/lib/libcudart.so.12")
+            library = root / entry
+            library.parent.mkdir(parents=True)
+            library.write_bytes(b"pinned CUDA 12 runtime")
+            distribution = types.SimpleNamespace(
+                files=[entry],
+                locate_file=lambda item: root / item,
+            )
+            evidence_for = load_uccl_function(
+                "_loaded_libcudart_evidence",
+                {
+                    "Path": Path,
+                    "PurePosixPath": PurePosixPath,
+                    "contracts": contracts,
+                    "metadata": types.SimpleNamespace(
+                        distribution=lambda package: distribution
+                    ),
+                },
+            )
+            maps = root / "maps"
+            maps.write_text(f"7f00-7f10 r-xp 00000000 00:00 0 {library}\n")
+            evidence = evidence_for("12.9.79", maps)
+            self.assertEqual(
+                evidence,
+                contracts.content_manifest_evidence(
+                    role="cuda-runtime",
+                    name="nvidia-cuda-runtime-cu12-12.9.79",
+                    files=[("libcudart.so", library)],
+                ),
+            )
+            self.assertNotIn(str(root), json.dumps(evidence))
+
+            unowned = root / "unowned" / library.name
+            unowned.parent.mkdir()
+            unowned.write_bytes(library.read_bytes())
+            maps.write_text(f"7f00-7f10 r-xp 00000000 00:00 0 {unowned}\n")
+            with self.assertRaisesRegex(RuntimeError, "not owned") as raised:
+                evidence_for("12.9.79", maps)
+            self.assertNotIn(str(root), str(raised.exception))
+
+    def test_private_runtime_logs_are_not_public_artifacts(self) -> None:
+        path = subprocess.check_output(
+            [
+                "bash", "-c", 'source "$1"; cx_private_log_path test', "_",
+                str(ROOT / "runtime" / "common.sh"),
+            ],
+            text=True,
+            env={**os.environ, "COLLECTIVEX_EXECUTION_ID": "contract-test"},
+        ).strip()
+        try:
+            log = Path(path)
+            self.assertEqual(stat.S_IMODE(log.stat().st_mode), 0o600)
+            self.assertEqual(stat.S_IMODE(log.parent.stat().st_mode), 0o700)
+            self.assertFalse(log.is_relative_to(ROOT))
+        finally:
+            shutil.rmtree(Path(path).parent, ignore_errors=True)
+
+    def test_private_runtime_logs_reject_traversal_and_symlinks(self) -> None:
+        common = str(ROOT / "runtime" / "common.sh")
+        for variable, value in (
+            ("COLLECTIVEX_EXECUTION_ID", ".."),
+            ("CX_TEST_LABEL", ".."),
+        ):
+            environment = {
+                **os.environ,
+                "COLLECTIVEX_EXECUTION_ID": "contract-adversarial",
+                "CX_TEST_LABEL": "test",
+                variable: value,
+            }
+            result = subprocess.run(
+                ["bash", "-c", 'source "$1"; cx_private_log_path "$CX_TEST_LABEL"', "_", common],
+                text=True,
+                capture_output=True,
+                env=environment,
+            )
+            self.assertNotEqual(result.returncode, 0)
+            self.assertNotIn(value, result.stderr)
+
+        private_root = Path(f"/tmp/inferencex-collectivex-{os.getuid()}")
+        private_root.mkdir(mode=0o700, exist_ok=True)
+        self.assertFalse(private_root.is_symlink())
+        os.chmod(private_root, 0o700)
+        with tempfile.TemporaryDirectory() as temporary:
+            target = Path(temporary)
+            tag = f"contract-symlink-{os.getpid()}"
+            link = private_root / tag
+            link.symlink_to(target, target_is_directory=True)
+            try:
+                result = subprocess.run(
+                    ["bash", "-c", 'source "$1"; cx_private_log_path test', "_", common],
+                    text=True,
+                    capture_output=True,
+                    env={**os.environ, "COLLECTIVEX_EXECUTION_ID": tag},
+                )
+                self.assertNotEqual(result.returncode, 0)
+                self.assertEqual(list(target.iterdir()), [])
+            finally:
+                link.unlink(missing_ok=True)
+
+            tag = f"contract-log-symlink-{os.getpid()}"
+            directory = private_root / tag
+            directory.mkdir(mode=0o700)
+            target_file = target / "target"
+            target_file.write_text("unchanged")
+            log_link = directory / "test.log"
+            log_link.symlink_to(target_file)
+            try:
+                result = subprocess.run(
+                    ["bash", "-c", 'source "$1"; cx_private_log_path test', "_", common],
+                    text=True,
+                    capture_output=True,
+                    env={**os.environ, "COLLECTIVEX_EXECUTION_ID": tag},
+                )
+                self.assertNotEqual(result.returncode, 0)
+                self.assertEqual(target_file.read_text(), "unchanged")
+            finally:
+                log_link.unlink(missing_ok=True)
+                directory.rmdir()
+
+    def test_operator_config_failure_is_value_free(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            config = Path(temporary) / "operator.env"
+            config.write_text("printf 'private-config-token\\n' >&2\nfalse\n")
+            config.chmod(0o600)
+            result = subprocess.run(
+                ["bash", "-c",
+                 'export COLLECTIVEX_EXECUTION_ID="operator-failure-$$"; '
+                 "trap 'cx_cleanup_private_logs 0' EXIT; source \"$1\"; "
+                 "cx_load_operator_config", "_",
+                 str(ROOT / "runtime" / "common.sh")],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "CX_RUNNER": "h100-dgxc",
+                    "COLLECTIVEX_OPERATOR_CONFIG": str(config),
+                },
+            )
+            self.assertNotEqual(result.returncode, 0)
+            self.assertIn("runner-local configuration failed", result.stderr)
+            self.assertNotIn("private-config-token", result.stderr)
+
+    def test_ephemeral_operator_config_is_removed_after_source(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            config = Path(temporary) / "operator.env"
+            decoy = Path(temporary) / "decoy"
+            decoy.write_text("keep")
+            config.write_text(json.dumps(operator_config(Path(temporary) / "storage")))
+            config.chmod(0o600)
+            result = subprocess.run(
+                [
+                    "bash", "-c",
+                    'export COLLECTIVEX_EXECUTION_ID="operator-ephemeral-$$"; '
+                    "trap 'cx_cleanup_private_logs 0' EXIT; "
+                    'config="$COLLECTIVEX_OPERATOR_CONFIG"; source "$1"; '
+                    'cx_load_operator_config; test ! -e "$config"; '
+                    'test "$CX_PARTITION" = test; '
+                    'test -z "${COLLECTIVEX_OPERATOR_CONFIG+x}"',
+                    "_", str(ROOT / "runtime" / "common.sh"),
+                ],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "CX_RUNNER": "h100-dgxc",
+                    "COLLECTIVEX_OPERATOR_CONFIG": str(config),
+                    "COLLECTIVEX_OPERATOR_CONFIG_EPHEMERAL": "1",
+                },
+            )
+            self.assertEqual(result.returncode, 0, result.stderr)
+            self.assertFalse(config.exists())
+            self.assertEqual(decoy.read_text(), "keep")
+
+    def test_operator_config_is_strict_per_runner_json(self) -> None:
+        command = (
+            'source "$1"; export COLLECTIVEX_EXECUTION_ID="operator-config-$$"; '
+            "trap 'cx_cleanup_private_logs 0' EXIT; cx_load_operator_config; "
+            'test "$CX_PARTITION" = test; '
+            'test -z "${COLLECTIVEX_OPERATOR_CONFIG_CONTENT+x}"; '
+            'test -z "${ENROOT_CACHE_PATH+x}"'
+        )
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            document = operator_config(root / "storage")
+            config = root / "operator.json"
+            config.write_text(json.dumps(document))
+            config.chmod(0o600)
+            for runner in capability.PLATFORMS:
+                with self.subTest(runner=runner):
+                    result = subprocess.run(
+                        ["bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh")],
+                        text=True,
+                        capture_output=True,
+                        env={
+                            **os.environ,
+                            "CX_RUNNER": runner,
+                            "ENROOT_CACHE_PATH": "/private/stale-enroot-cache",
+                            "COLLECTIVEX_OPERATOR_CONFIG": str(config),
+                        },
+                    )
+                    self.assertEqual(result.returncode, 0, result.stderr)
+
+            lock_dir = root / "amd-locks"
+            document["runners"]["mi355x"]["lock_dir"] = str(lock_dir)
+            config.write_text(json.dumps(document))
+            config.chmod(0o600)
+            canonical = subprocess.run(
+                [
+                    "bash",
+                    "-c",
+                    'source "$1"; export COLLECTIVEX_EXECUTION_ID="canonical-lock-$$"; '
+                    "trap 'cx_cleanup_private_logs 0' EXIT; cx_load_operator_config; "
+                    'cx_lock_canonical_gha_env mi355x; test "$CX_LOCK_DIR" = "$2"',
+                    "_",
+                    str(ROOT / "runtime" / "common.sh"),
+                    str(lock_dir),
+                ],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "CX_RUNNER": "mi355x",
+                    "CX_SHARD_FILE": ".shards/test.json",
+                    "CX_SHARD_SKU": "mi355x",
+                    "CX_NODES": "1",
+                    "CX_GPUS_PER_NODE": "8",
+                    "COLLECTIVEX_CANONICAL_GHA": "1",
+                    "COLLECTIVEX_OPERATOR_CONFIG": str(config),
+                    "COLLECTIVEX_SOURCE_SHA": "a" * 40,
+                    "GITHUB_ACTIONS": "true",
+                    "GITHUB_RUN_ATTEMPT": "1",
+                    "GITHUB_RUN_ID": "1",
+                    "GITHUB_WORKSPACE": str(root.resolve()),
+                },
+            )
+            self.assertEqual(canonical.returncode, 0, canonical.stderr)
+
+            selected_only = {
+                "schema_version": 1,
+                "runners": {"h100-dgxc": document["runners"]["h100-dgxc"]},
+            }
+            result = subprocess.run(
+                [
+                    "bash", "-c", command + '; test -z "${CX_STAGE_DIR+x}"', "_",
+                    str(ROOT / "runtime" / "common.sh"),
+                ],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "CX_RUNNER": "h100-dgxc",
+                    "CX_STAGE_DIR": "/private/stale-stage",
+                    "ENROOT_CACHE_PATH": "/private/stale-enroot-cache",
+                    "COLLECTIVEX_OPERATOR_CONFIG_LOADED": "1",
+                    "COLLECTIVEX_OPERATOR_CONFIG_CONTENT": json.dumps(selected_only),
+                    "COLLECTIVEX_OPERATOR_CONFIG_REQUIRED": "1",
+                },
+            )
+            self.assertEqual(result.returncode, 0, result.stderr)
+
+            rejected = json.loads(json.dumps(document))
+            rejected["runners"]["h100-dgxc"]["shell"] = "private-command"
+            boolean_version = {**document, "schema_version": True}
+            for invalid in (rejected, boolean_version):
+                config.write_text(json.dumps(invalid))
+                config.chmod(0o600)
+                result = subprocess.run(
+                    ["bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh")],
+                    text=True,
+                    capture_output=True,
+                    env={
+                        **os.environ,
+                        "CX_RUNNER": "h100-dgxc",
+                        "COLLECTIVEX_OPERATOR_CONFIG": str(config),
+                    },
+                )
+                self.assertNotEqual(result.returncode, 0)
+                self.assertNotIn("private-command", result.stderr)
+
+            config.write_text(json.dumps(document))
+            config.chmod(0o644)
+            result = subprocess.run(
+                ["bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh")],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "CX_RUNNER": "h100-dgxc",
+                    "COLLECTIVEX_OPERATOR_CONFIG": str(config),
+                },
+            )
+            self.assertNotEqual(result.returncode, 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/experimental/CollectiveX/tests/test_publisher.py b/experimental/CollectiveX/tests/test_publisher.py
new file mode 100644
index 0000000000..86b1e9607d
--- /dev/null
+++ b/experimental/CollectiveX/tests/test_publisher.py
@@ -0,0 +1,2334 @@
+#!/usr/bin/env python3
+"""Focused end-to-end tests for the isolated CollectiveX publisher."""
+from __future__ import annotations
+
+import copy
+import hashlib
+import itertools
+import json
+import os
+from pathlib import Path
+import subprocess
+import sys
+import tempfile
+import types
+import unittest
+from unittest import mock
+import zipfile
+
+HERE = Path(__file__).resolve().parent
+ROOT = HERE.parent
+sys.path[:0] = [str(ROOT), str(HERE)]
+
+import contracts  # noqa: E402
+import identity  # noqa: E402
+import publisher  # noqa: E402
+import summarize  # noqa: E402
+import sweep_matrix  # noqa: E402
+
+
+RUN = {
+    "repository": "SemiAnalysisAI/InferenceX",
+    "run_id": "12345",
+    "run_attempt": 1,
+    "source_sha": "a" * 40,
+}
+
+
+def _unsupported_delivery(
+    root: Path, ordinals: tuple[int, ...] = (1,), run: dict = RUN,
+) -> tuple[Path, Path]:
+    matrix = sweep_matrix.resolve_matrix(backends="all")
+    wrapper = next(item for item in matrix["requested_cases"] if item["disposition"] == "unsupported")
+    matrix = {
+        "format": "collectivex.matrix.v1",
+        "schema_version": 1,
+        "requested_cases": [wrapper],
+        "include": [],
+    }
+    case = {key: value for key, value in wrapper["case"].items() if key != "case_id"}
+    artifact_name = f"cxunsupported-{run['run_id']}-{run['run_attempt']}"
+    git_run = {
+        "artifact": artifact_name,
+        "job": "setup",
+        "ref": "collectivex",
+        "repo": run["repository"],
+        "run_attempt": str(run["run_attempt"]),
+        "run_id": run["run_id"],
+        "source_sha": run["source_sha"],
+    }
+    allocation = {
+        "artifact": artifact_name,
+        "execution_id": f"{run['run_id']}_{run['run_attempt']}_unsupported",
+        "job": "setup",
+        "repo": run["repository"],
+        "run_attempt": str(run["run_attempt"]),
+        "run_id": run["run_id"],
+        "runner": "capability-resolver",
+        "source_sha": run["source_sha"],
+    }
+    matrix_path = root / "matrix.json"
+    artifact = root / artifact_name
+    artifact.mkdir()
+    matrix_path.write_text(json.dumps(matrix))
+    control_sha256 = hashlib.sha256(matrix_path.read_bytes()).hexdigest()
+    for ordinal in ordinals:
+        terminal = contracts.make_terminal_document(
+            allocation_factors=allocation, attempt_ordinal=ordinal, case=case,
+            case_factors={"case": case, "profile": identity.V1_CASE_PROFILE,
+                          "sku": wrapper["sku"]},
+            control_sha256=control_sha256, failure_mode="capability",
+            generated_at="2026-07-04T00:00:00Z", git_run=git_run,
+            reason=wrapper["reason"], return_code=5, source="matrix-capability-resolver",
+            status="unsupported", expected_case_id=wrapper["case"]["case_id"],
+        )
+        (artifact / f"unsupported-{ordinal}.json").write_text(json.dumps(terminal))
+    return matrix_path, artifact
+
+
+def _args(
+    store: Path, matrix: Path, artifact: Path, run: dict = RUN
+) -> types.SimpleNamespace:
+    return types.SimpleNamespace(
+        store_root=str(store),
+        matrix=str(matrix),
+        artifact=[str(artifact)],
+        repository=run["repository"],
+        run_id=run["run_id"],
+        run_attempt=run["run_attempt"],
+        source_sha=run["source_sha"],
+    )
+
+
+def _ids(seed: str) -> tuple[str, str, str, str, str, str]:
+    case = identity.digest("case", {"seed": seed})
+    allocation = identity.allocation_id({"seed": seed})
+    attempt = identity.attempt_id(allocation=allocation, case=case, ordinal=1)
+    series = identity.series_id({"seed": seed})
+    point = identity.point_id(series=series, tokens_per_rank=8)
+    evidence = identity.evidence_id(
+        point=point, allocation=allocation, attempt=attempt, sample_sha256="b" * 64
+    )
+    return case, allocation, attempt, series, point, evidence
+
+
+def _component(scale: float = 1.0) -> dict:
+    latency = {"p50": 10.0 * scale, "p90": 12.0 * scale,
+               "p95": 14.0 * scale, "p99": 20.0 * scale}
+    logical_bytes = 100_000
+    return {
+        "origin": "measured",
+        "latency_us": latency,
+        "logical_bytes": logical_bytes,
+        "logical_payload_rate_gbps_at_latency_percentile": {
+            name: logical_bytes / (value * 1000.0) for name, value in latency.items()
+        },
+        "sample_count": 512,
+    }
+
+
+def _hybrid_provenance(ep_size: int = 1) -> dict:
+    realized = {field: 1 for field in contracts.HYBRID_REALIZED_CONFIG_FIELDS}
+    for field in contracts.HYBRID_REALIZED_BOOL_FIELDS:
+        realized[field] = True
+    realized.update({
+        "num_of_experts_per_rank": 1,
+        "num_of_nodes": 1,
+        "num_of_ranks_per_node": ep_size,
+        "token_data_type": "UINT16",
+    })
+    kernel_keys = ["combine-key", "dispatch-key", "preprocess-key"]
+    return {
+        "backend_lineage": "deepep-hybrid", "branch": "hybrid-ep",
+        "deepep_commit": "a" * 40, "deepep_tree": "b" * 40,
+        "device_sms": 1,
+        "jit_kernel_keys": kernel_keys,
+        "jit_shared_objects": [
+            {
+                "kernel_key": key,
+                "rank_artifacts": [
+                    {"bytes": 1, "rank": rank, "sha256": f"{index + 1:x}" * 64}
+                    for rank in range(ep_size)
+                ],
+            }
+            for index, key in enumerate(kernel_keys)
+        ],
+        "loaded_libraries": [
+            {"name": "deep_ep_cpp", "role": "deepep-extension", "sha256": "4" * 64},
+            {"name": "hybrid_ep_cpp", "role": "deepep-hybrid-extension", "sha256": "5" * 64},
+        ],
+        "realized_config": realized,
+        "resource_mode": "tuned",
+        "tuned_source": "deepep-hybrid-configurer-autotune-v1",
+    }
+
+
+def _native_fixture(backend: str = "nccl-ep") -> tuple[dict, dict]:
+    def digest(value: object) -> str:
+        return hashlib.sha256(contracts.canonical_json_bytes(value)).hexdigest()
+
+    scheduled = {
+        "backend": backend, "canonical": True, "eplb": False, "ep": 1,
+        "experts": 1, "gpus_per_node": 1, "hidden": 1, "ladder": "1", "nodes": 1,
+        "phase": "decode", "required_publication": "official", "routing": "uniform",
+        "samples_per_point": 512, "scale_up_domain": 1, "suite": "ep-core-v1",
+        "timing": "8:64:32", "topk": 1,
+        "warmup_semantics": "full-roundtrip-before-each-component-trial-point-v1",
+        "workload": "deepseek-v3-v1",
+    }
+    case_factors = {"case": scheduled, "profile": identity.V1_CASE_PROFILE, "sku": "fixture"}
+    case_id = identity.digest("case", case_factors)
+    git_run = {
+        "artifact": "cxshard-fixture-999-1", "job": "sweep", "ref": "collectivex",
+        "repo": RUN["repository"], "run_attempt": "1", "run_id": "999",
+        "source_sha": RUN["source_sha"],
+    }
+    allocation_factors = {
+        "artifact": git_run["artifact"], "execution_id": "999_1_fixture",
+        "job": git_run["job"], "repo": git_run["repo"], "run_attempt": "1",
+        "run_id": "999", "runner": "fixture", "source_sha": git_run["source_sha"],
+    }
+    allocation_id = identity.allocation_id(allocation_factors)
+    attempt_id = identity.attempt_id(allocation=allocation_id, case=case_id, ordinal=1)
+    member_id, member_checksums, routing_hash, routing_rows, routing_weights = (
+        contracts._expected_canonical_trace(
+        "uniform", hidden=1, topk=1, logical_experts=1, physical_experts=1,
+        ep_size=1, tokens_per_rank=1, seed=67, eplb_enabled=False,
+        reference_tokens_per_rank=2048,
+        )
+    )
+    workload_id = identity.workload_id({
+        "members": [{"checksums": member_checksums, "workload_id": member_id}]
+    })
+    runtime = {
+        "accelerator_runtime": {"kind": "cuda", "version": "13.0"},
+        "collective_library": {"kind": "nccl", "version": "2.30.4"},
+        "device": {
+            "arch": "sm100", "compute_units": 1, "memory_bytes": 1,
+            "product": "Fixture GPU", "warp_size": 32,
+        },
+        "driver_version": "1", "framework": {"kind": "torch", "version": "2.10.0"},
+        "machine": "fixture", "python_version": "3.12", "vendor": "nvidia",
+    }
+    implementation_provenance = (
+        {
+            "backend": "nccl-ep", "backend_lineage": "nccl",
+            "collective_library": "nccl", "nccl_version": "2.30.4",
+            "reference_semantics": "fixture-v1",
+        }
+        if backend == "nccl-ep"
+        else _hybrid_provenance()
+    )
+    kernel_generation = "nccl" if backend == "nccl-ep" else "hybrid"
+    implementation = {
+        "kernel_generation": kernel_generation,
+        "name": backend,
+        "provenance": implementation_provenance,
+        "resource_profile": contracts.project_resource_profile(implementation_provenance),
+    }
+    public_config = contracts.public_series_config(
+        kernel_generation=implementation["kernel_generation"],
+        provenance=implementation_provenance,
+        resource_profile=implementation["resource_profile"],
+        resource_mode="tuned",
+        device_product=runtime["device"]["product"],
+    )
+    series_factors = {
+        "backend": backend, "case_id": case_id,
+        "image_digest": "sha256:" + "d" * 64,
+        "implementation_contract_sha256": digest({
+            **implementation,
+            "provenance": contracts.series_provenance(implementation_provenance),
+        }),
+        "public_config_sha256": contracts.public_series_config_sha256(public_config),
+        "routing_control_sha256": contracts.routing_implementation_control_sha256(
+            implementation
+        ),
+        "runtime_fingerprint_sha256": digest(runtime),
+        "source_sha": RUN["source_sha"], "squash_sha256": "e" * 64,
+        "workload_id": workload_id,
+    }
+    series_id = identity.series_id(series_factors)
+    point_id = identity.point_id(series=series_id, tokens_per_rank=1)
+    sample_components = {
+        name: {
+            "availability": "measured", "sample_count": 512,
+            "trials": [[latency] * 8 for _ in range(64)],
+        }
+        for name, latency in (("combine", 20.0), ("dispatch", 10.0), ("roundtrip", 40.0))
+    }
+    sample_sha = digest({"components": sample_components, "tokens_per_rank": 1})
+    evidence_id = identity.evidence_id(
+        point=point_id, allocation=allocation_id, attempt=attempt_id,
+        sample_sha256=sample_sha,
+    )
+    samples = {
+        "allocation_id": allocation_id, "attempt_id": attempt_id, "case_id": case_id,
+        "format": contracts.SAMPLES_FORMAT,
+        "points": [{
+            "components": sample_components, "evidence_id": evidence_id,
+            "point_id": point_id, "sample_sha256": sample_sha, "tokens_per_rank": 1,
+        }],
+        "sampling": {
+            "iterations_per_trial": 8, "reduction": "cross-rank-max-per-iteration",
+            "trials": 64,
+        },
+        "schema_version": 1, "series_id": series_id,
+    }
+    sample_bytes = contracts.canonical_json_bytes(samples)
+    oracle = {
+        "atol": 0.02,
+        "checks": {name: True for name in (
+            "combine_values", "counts", "metadata", "multiplicity", "payload",
+            "source_set", "weights",
+        )},
+        "combine_weight_semantics": "unweighted-rank-sum",
+        "contract": "expert-specific-transform-v1", "dispatch_sha256": "1" * 64,
+        "max_absolute_error": 0.0, "max_elementwise_relative_error": 0.0,
+        "max_relative_error": 0.0, "max_weight_error": 0.0,
+        "order_sha256": "2" * 64, "ordering_contract": "fixture-order-v1",
+        "passed": True, "receive_count": 1, "rtol": 0.05,
+    }
+    def pct(value: float) -> dict[str, float]:
+        return {name: value for name in ("p50", "p90", "p95", "p99")}
+
+    def measured(value: float) -> dict:
+        return {
+            "availability": "measured", "origin": "measured",
+            "percentiles_us": pct(value), "sample_count": 512,
+        }
+    row = {
+        "anomalies": [],
+        "components": {
+            "combine": measured(20.0), "dispatch": measured(10.0),
+            "isolated_sum": {
+                "availability": "derived", "origin": "derived-percentile-sum",
+                "percentiles_us": pct(30.0), "sample_count": 0,
+            },
+            "roundtrip": measured(40.0),
+        },
+        "correctness": {
+            "contract": "expert-specific-transform-v1", "max_relative_error": 0.0,
+            "passed": True,
+            "rank_evidence": [{
+                "input_unchanged": True, "order_stable": True,
+                "post_timing": copy.deepcopy(oracle), "pre_timing": copy.deepcopy(oracle),
+                "rank": 0,
+            }],
+            "scope": "dispatch-metadata-and-transformed-combine",
+        },
+        "evidence_id": evidence_id, "global_tokens": 1,
+        "logical_bytes": {"combine": 2, "dispatch": 2, "roundtrip": 4},
+        "point_id": point_id,
+        "receive": {"max": 1, "mean": 1.0, "min": 1, "total": 1},
+        "routing": contracts._expected_routing_summary(
+            routing_rows,
+            routing_weights,
+            physical_experts=1,
+            ep_size=1,
+            tokens_per_rank=1,
+            gpus_per_node=1,
+            scale_up_domain=1,
+        ),
+        "sample_histograms": {
+            name: contracts._expected_histogram([value] * 512)
+            for name, value in (("combine", 20.0), ("dispatch", 10.0), ("roundtrip", 40.0))
+        },
+        "sample_sha256": sample_sha,
+        "token_rate_at_latency_percentile": pct(25_000.0), "tokens_per_rank": 1,
+    }
+    raw = {
+        "case": {
+            "attempt_ordinal": 1, "backend": backend,
+            "eplb": {
+                "enabled": False, "imbalance_after": None, "imbalance_before": None,
+                "mapping_hash": None, "max_replicas": None, "num_logical_experts": 1,
+                "num_physical_experts": 1, "num_redundant": 0, "planner": None,
+                "reference_tokens_per_rank": None, "replicated_experts": 0,
+            },
+            "ep_size": 1, "mode": "normal", "phase": "decode",
+            "required_publication": "official", "resource_mode": "tuned", "runner": "fixture",
+            "shape": {
+                "activation_profile": "canonical-counter-source-v3", "dispatch_dtype": "bf16",
+                "eplb": False, "experts": 1, "experts_per_rank": 1, "hidden": 1,
+                "kernel_gen": kernel_generation, "num_logical_experts": 1,
+                "quant": {
+                    "combine_accum_dtype": "fp32", "combine_input_dtype": "bf16",
+                    "combine_output_dtype": "bf16", "combine_quant_mode": "none",
+                    "scale_layout": None,
+                },
+                "routing": "uniform", "topk": 1,
+            },
+            "suite": "ep-core-v1", "workload_name": "deepseek-v3-v1",
+        },
+        "format": contracts.RAW_FORMAT, "generated_at": "2026-07-04T00:00:00Z",
+        "identity": {
+            "allocation_factors": allocation_factors, "allocation_id": allocation_id,
+            "attempt_id": attempt_id, "attempt_ordinal": 1, "case_factors": case_factors,
+            "case_id": case_id, "series_factors": series_factors, "series_id": series_id,
+        },
+        "implementation": implementation,
+        "measurement": {
+            "component_order_contract": "roundtrip-dispatch-activation-only-combine-v2",
+            "conditioning": {
+                "contract": "fixed-phase-ramp-8-roundtrips-v1",
+                "ladder": [1, 2, 4, 8, 16, 32, 64, 128],
+                "roundtrips_per_shape": 8,
+            },
+            "contract": "layout-and-dispatch-v1",
+            "rows": [row],
+            "sampling": {
+                "contract": "fixed-512-v1", "iterations_per_trial": 8,
+                "percentile_method": "nearest-rank",
+                "reduction": "cross-rank-max-per-iteration", "samples_per_component": 512,
+                "trials": 64, "warmup_iterations": 32,
+                "warmup_semantics": "full-roundtrip-before-each-component-trial-point-v1",
+            },
+            "source_allocation": "even",
+        },
+        "outcome": {
+            "publication_status": "diagnostic", "reasons": [], "status": "success",
+            "validity": {
+                "anomaly_free": True, "execution_status": "complete",
+                "measurement_conformance": "conformant", "provenance_complete": True,
+                "resource_conformance": implementation["resource_profile"]["conformance_class"],
+                "sampling_conformance": "conformant",
+                "semantic_correctness": "pass",
+                "workload_identity": "consistent-across-ranks",
+                "workload_source": "canonical-serialized",
+            },
+        },
+        "provenance": {
+            "command": "run_ep", "distributed_launcher": "torchrun", "git_run": git_run,
+            "image": {
+                "arch": "amd64", "digest": "sha256:" + "d" * 64,
+                "digest_verified": True, "reference": "fixture:1", "squash_sha256": "e" * 64,
+            },
+            "redaction": "sanitized-v1",
+        },
+        "record_type": "case-attempt",
+        "runtime_fingerprint": runtime,
+        "sample_artifact": {
+            "bytes": len(sample_bytes), "format": contracts.SAMPLES_FORMAT,
+            "path": "samples.json", "sha256": hashlib.sha256(sample_bytes).hexdigest(),
+        },
+        "schema_version": 1,
+        "topology": {
+            "device_count": 1, "device_product": "Fixture GPU", "gpus_per_node": 1,
+            "nodes": 1, "placement": "packed",
+            "realized_placement": {
+                "gpus_per_node": 1, "nodes": 1, "ranks_per_node": 1,
+                "unique_local_ranks": True, "valid": True,
+            },
+            "scale_up_domain": 1, "topology_class": "fixture", "transport": "nvlink",
+            "world_size": 1,
+        },
+        "workload": {
+            "activation_generator": "collectivex-activation-counter-v3",
+            "activation_identity": hashlib.sha256(
+                b"counter|seed=67|hidden=1|gen=collectivex-activation-counter-v3"
+            ).hexdigest(),
+            "activation_profile": "canonical-counter-source-v3", "cross_rank_consistent": True,
+            "manifest_checksums": {member_id: member_checksums}, "members": [member_id],
+            "routing_generator": "collectivex-routing-counter-v3", "source": "canonical-serialized",
+            "trace_hashes": [routing_hash],
+            "trace_signature": hashlib.sha256(routing_hash.encode()).hexdigest(),
+            "workload_id": workload_id,
+        },
+    }
+    return raw, samples
+
+
+def _series(seed: str, backend: str, *, decision_grade: bool = False) -> tuple[dict, dict]:
+    case, allocation, attempt, series_id, point_id, evidence = _ids(seed)
+    allocations = [identity.allocation_id({"seed": seed, "run": run}) for run in range(3)]
+    eligibility = publisher._eligibility_record(
+        allocations if decision_grade else [allocation],
+        complete=decision_grade,
+        correct=True,
+        measured=True,
+        stable_ordering=True,
+        p50_ratio=1.01 if decision_grade else None,
+        p99_ratio=1.02 if decision_grade else None,
+    )
+    component = _component(1.0 if backend == "deepep" else 1.2)
+    item = {
+        "series_id": series_id,
+        "label": f"H100 / {backend}",
+        "status": "decision-grade" if decision_grade else "diagnostic",
+        "case_ids": [case],
+        "allocation_ids": allocations if decision_grade else [allocation],
+        "model": "deepseek-v3-v1",
+        "suite": "ep-core-v1",
+        "phase": "decode",
+        "publication_tier": "official",
+        "backend": {
+                    "id": backend, "label": publisher.BACKEND_LABELS[backend],
+                    "role": "reference" if backend == "nccl-ep" else "library",
+                    "generation": "nccl" if backend == "nccl-ep" else None,
+                    "version": "1.0"},
+        "build": {
+            "implementation_contract_sha256": hashlib.sha256(backend.encode()).hexdigest(),
+            "public_config_sha256": "0" * 64,
+            "routing_control_sha256": hashlib.sha256(backend.encode()).hexdigest(),
+            "runtime_fingerprint_sha256": "3" * 64,
+            "image_digest": "sha256:" + "1" * 64,
+            "source_sha": "a" * 40,
+            "squash_sha256": "2" * 64,
+        },
+        "system": {
+            "sku": "h100-dgxc", "label": "NVIDIA H100", "vendor": "nvidia",
+            "topology_class": "h100-nvlink-island", "transport": "nvlink",
+            "world_size": 8, "ep_size": 8, "placement": "packed",
+        },
+        "workload": {
+            "workload_id": identity.workload_id({"shape": "shared"}),
+            "hidden": 7168, "top_k": 8, "experts": 256,
+            "routing": "uniform", "eplb": False,
+            "dispatch_dtype": "bf16", "combine_dtype": "bf16",
+            "activation_profile": "canonical-counter-source-v3",
+        },
+        "eplb": {
+            "enabled": False, "planner": None, "mapping_sha256": None,
+            "logical_experts": 256, "physical_experts": 256,
+            "redundant_experts": 0, "reference_tokens_per_rank": None,
+            "replicated_experts": 0, "max_replicas": None,
+            "imbalance_before": None, "imbalance_after": None,
+        },
+        "resource": {"mode": "tuned", "profile": "profile-1", "comm_units_kind": "sm", "configured_units": 24},
+        "measurement": {
+            "contract": "layout-and-dispatch-v1", "sampling_contract": "fixed-512-v1",
+            "iters": 8, "trials": 64, "warmups": 32, "samples_per_component": 512,
+            "headline_component": "roundtrip", "headline_percentile": "p99",
+        },
+        "points": [{
+            "point_id": point_id, "tokens_per_rank": 8, "global_tokens": 64,
+            "correct": True,
+            "routing": {
+                "fanout_mean": 4.0, "recv_tokens_max": 64,
+                "expert_load_cv": 0.5, "payload_rank_cv": 0.25,
+                "hotspot_ratio": 2.0, "empty_expert_count": 0,
+                "empty_rank_count": 0, "routed_copies": 256,
+            },
+            "components": {"dispatch": None, "combine": None,
+                           "roundtrip": component, "isolated_sum": None},
+            "roundtrip_token_rate_at_latency_percentile": {
+                name: 64 / (latency * 1e-6)
+                for name, latency in component["latency_us"].items()
+            },
+            "evidence_ids": [evidence],
+        }],
+        "eligibility": eligibility,
+    }
+    item["build"]["public_config_sha256"] = contracts.public_series_config_sha256(
+        publisher._public_series_config(item)
+    )
+    case = identity.digest("case", publisher._public_case_factors(item))
+    item["case_ids"] = [case]
+    build = item["build"]
+    series_id = identity.series_id({
+        "backend": item["backend"]["id"],
+        "case_id": case,
+        "image_digest": build["image_digest"],
+        "implementation_contract_sha256": build["implementation_contract_sha256"],
+        "public_config_sha256": build["public_config_sha256"],
+        "routing_control_sha256": build["routing_control_sha256"],
+        "runtime_fingerprint_sha256": build["runtime_fingerprint_sha256"],
+        "source_sha": build["source_sha"],
+        "squash_sha256": build["squash_sha256"],
+        "workload_id": item["workload"]["workload_id"],
+    })
+    item["series_id"] = series_id
+    point_id = identity.point_id(series=series_id, tokens_per_rank=8)
+    item["points"][0]["point_id"] = point_id
+    attempt = identity.attempt_id(allocation=allocation, case=case, ordinal=1)
+    evidence = identity.evidence_id(
+        point=point_id, allocation=allocation, attempt=attempt,
+        sample_sha256=hashlib.sha256(seed.encode()).hexdigest(),
+    )
+    item["points"][0]["evidence_ids"] = [evidence]
+    runs = {
+        str(run): {8: {
+            "latency_us": {
+                statistic: component["latency_us"][statistic] * (1 + run / 100)
+                for statistic in ("p50", "p99")
+            },
+            "logical_payload_rate_gbps_at_latency_percentile": {
+                statistic: component["logical_payload_rate_gbps_at_latency_percentile"][statistic] / (1 + run / 100)
+                for statistic in ("p50", "p99")
+            },
+        }}
+        for run in range(3)
+    }
+    internal = {"run_metrics": runs}
+    return item, internal
+
+
+def _dataset() -> dict:
+    item, _ = _series("one", "deepep")
+    case = item["case_ids"][0]
+    allocation = item["allocation_ids"][0]
+    attempt = identity.attempt_id(allocation=allocation, case=case, ordinal=1)
+    evidence = item["points"][0]["evidence_ids"][0]
+    return {
+        "format": "collectivex.public.v1", "schema_version": 1,
+        "generated_at": "2026-07-04T00:00:00Z", "source_bundle_ids": ["c" * 64],
+        "promotion": {
+            "status": "diagnostic", "reason": None, "matrix_id": "d" * 64,
+            "allocation_ids": [allocation], "required_allocations": 3,
+            "requested_cases": 1, "terminal_cases": 1,
+            "policy": "collectivex-decision-grade-v1",
+        },
+        "coverage": [{
+            "case_id": case, "label": "case", "required": True, "sku": "h100-dgxc",
+            "backend": "deepep", "phase": "decode", "disposition": "runnable",
+            "selected_attempt_id": attempt,
+            "outcome": "success", "failure_mode": None, "reason": None,
+            "attempt_ids": [attempt],
+        }],
+        "attempts": [{
+            "attempt_id": attempt,
+            "evidence": [{"evidence_id": evidence,
+                          "point_id": item["points"][0]["point_id"]}],
+            "case_id": case,
+            "allocation_id": allocation, "run_id": "1", "run_attempt": 1,
+            "attempt_index": 1,
+            "selected": True, "outcome": "success", "failure_mode": None, "reason": None,
+            "series_id": item["series_id"],
+            "completed_at": "2026-07-04T00:00:00Z",
+        }],
+        "series": [item], "cohorts": [], "rankings": [], "recommendations": [],
+        "sensitivities": [],
+    }
+
+
+def _promoted_dataset() -> dict:
+    specifications = (
+        ("library-fast", "deepep", None, False),
+        ("library-slow", "uccl", None, False),
+        ("chip-peer", "deepep", "h200-dgxc", False),
+        ("system-one", "nccl-ep", None, True),
+        ("system-two", "nccl-ep", "h200-dgxc", True),
+        ("routing-zipf", "deepep", None, False),
+        ("routing-zipf-eplb", "deepep", None, False),
+    )
+    series = []
+    internals = {}
+    attempts = []
+    coverage = []
+    for seed, backend, peer_sku, reference in specifications:
+        item, internal = _series(seed, backend, decision_grade=True)
+        if peer_sku:
+            platform = publisher.capability.PLATFORMS[peer_sku]
+            item["system"].update({
+                "sku": peer_sku,
+                "label": f"NVIDIA {platform['product'].upper()}",
+                "topology_class": platform["topology_class"],
+                "transport": platform["transport"],
+            })
+        if reference:
+            item["backend"]["role"] = "reference"
+        if seed.startswith("routing-zipf"):
+            item["suite"] = "ep-routing-v1"
+            item["publication_tier"] = "comparable-experimental"
+            item["workload"]["routing"] = "zipf"
+        if seed == "routing-zipf-eplb":
+            item["workload"]["eplb"] = True
+            plan = contracts._expected_eplb_plan(
+                "zipf", 8, 256, 288, item["system"]["ep_size"], 67, 2048
+            )
+            item["eplb"] = {
+                "enabled": True, "planner": "greedy-rank-major-v1",
+                "mapping_sha256": contracts.eplb_contract.mapping_hash(plan),
+                "logical_experts": 256, "physical_experts": 288,
+                "redundant_experts": 32, "reference_tokens_per_rank": 2048,
+                "replicated_experts": plan["replicated_experts"],
+                "max_replicas": plan["max_replicas"],
+                "imbalance_before": plan["imbalance_before"],
+                "imbalance_after": plan["imbalance_after"],
+            }
+            item["build"]["implementation_contract_sha256"] = "8" * 64
+        case_id = identity.digest("case", publisher._public_case_factors(item))
+        item["case_ids"] = [case_id]
+        build = item["build"]
+        build["public_config_sha256"] = contracts.public_series_config_sha256(
+            publisher._public_series_config(item)
+        )
+        item["series_id"] = identity.series_id({
+            "backend": item["backend"]["id"],
+            "case_id": case_id,
+            "image_digest": build["image_digest"],
+            "implementation_contract_sha256": build["implementation_contract_sha256"],
+            "public_config_sha256": build["public_config_sha256"],
+            "routing_control_sha256": build["routing_control_sha256"],
+            "runtime_fingerprint_sha256": build["runtime_fingerprint_sha256"],
+            "source_sha": build["source_sha"],
+            "squash_sha256": build["squash_sha256"],
+            "workload_id": item["workload"]["workload_id"],
+        })
+        point = item["points"][0]
+        point["point_id"] = identity.point_id(
+            series=item["series_id"], tokens_per_rank=point["tokens_per_rank"]
+        )
+        case_attempts = []
+        evidence_ids = []
+        for run_id, allocation_id in enumerate(item["allocation_ids"], 1):
+            attempt_id = identity.attempt_id(
+                allocation=allocation_id, case=case_id, ordinal=1
+            )
+            evidence_id = identity.evidence_id(
+                point=point["point_id"], allocation=allocation_id,
+                attempt=attempt_id,
+                sample_sha256=hashlib.sha256(f"{seed}-{run_id}".encode()).hexdigest(),
+            )
+            attempts.append({
+                "attempt_id": attempt_id,
+                "evidence": [{"evidence_id": evidence_id, "point_id": point["point_id"]}],
+                "case_id": case_id, "allocation_id": allocation_id,
+                "run_id": str(run_id), "run_attempt": 1,
+                "attempt_index": 1, "selected": True,
+                "outcome": "success", "failure_mode": None, "reason": None,
+                "series_id": item["series_id"],
+                "completed_at": "2026-07-04T00:00:00Z",
+            })
+            case_attempts.append(attempt_id)
+            evidence_ids.append(evidence_id)
+        point["evidence_ids"] = evidence_ids
+        coverage.append({
+            "case_id": case_id, "label": seed, "required": True,
+            "sku": item["system"]["sku"], "backend": backend,
+            "phase": item["phase"], "disposition": "runnable",
+            "selected_attempt_id": case_attempts[-1], "outcome": "success",
+            "failure_mode": None, "reason": None, "attempt_ids": case_attempts,
+        })
+        series.append(item)
+        internals[item["series_id"]] = internal
+
+    unsupported_case = identity.digest("case", {"seed": "planned-unsupported"})
+    unsupported_attempts = []
+    for run_id in range(1, 4):
+        allocation_id = identity.allocation_id(
+            {"seed": "planned-unsupported", "run": run_id}
+        )
+        attempt_id = identity.attempt_id(
+            allocation=allocation_id, case=unsupported_case, ordinal=1
+        )
+        attempts.append({
+            "attempt_id": attempt_id, "evidence": [], "case_id": unsupported_case,
+            "allocation_id": allocation_id, "run_id": str(run_id),
+            "run_attempt": 1,
+            "attempt_index": 1, "selected": True, "outcome": "unsupported",
+            "failure_mode": "capability", "reason": "backend-platform-unsupported",
+            "series_id": None, "completed_at": "2026-07-04T00:00:00Z",
+        })
+        unsupported_attempts.append(attempt_id)
+    coverage.append({
+        "case_id": unsupported_case, "label": "planned unsupported", "required": True,
+        "sku": "mi355x", "backend": "deepep", "phase": "decode",
+        "disposition": "unsupported", "selected_attempt_id": unsupported_attempts[-1],
+        "outcome": "unsupported", "failure_mode": "capability",
+        "reason": "backend-platform-unsupported", "attempt_ids": unsupported_attempts,
+    })
+    cohorts, rankings, recommendations, sensitivities = publisher.build_decisions(
+        series, internals
+    )
+    return {
+        "format": "collectivex.public.v1", "schema_version": 1,
+        "generated_at": "2026-07-04T00:00:00Z",
+        "source_bundle_ids": ["a" * 64, "b" * 64, "c" * 64],
+        "promotion": {
+            "status": "promoted", "reason": None,
+            "matrix_id": publisher.CANONICAL_FULL_V1_MATRIX_SHA256,
+            "allocation_ids": sorted({item["allocation_id"] for item in attempts}),
+            "required_allocations": 3, "requested_cases": len(coverage),
+            "terminal_cases": len(coverage), "policy": "collectivex-decision-grade-v1",
+        },
+        "coverage": sorted(coverage, key=lambda item: item["case_id"]),
+        "attempts": sorted(attempts, key=lambda item: item["attempt_id"]),
+        "series": sorted(series, key=lambda item: item["series_id"]),
+        "cohorts": cohorts, "rankings": rankings,
+        "recommendations": recommendations, "sensitivities": sensitivities,
+    }
+
+
+def _cohort_counts(dataset: dict) -> dict[str, int]:
+    return {
+        kind: sum(item["kind"] == kind for item in dataset["cohorts"])
+        for kind in ("library", "system", "routing")
+    }
+
+
+class PublisherTest(unittest.TestCase):
+    def test_terminal_allocation_and_source_status_are_bound(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            path = next(artifact.glob("*.json"))
+            terminal = contracts.strict_load(path)
+            self.assertIs(contracts.validate_terminal_document(terminal), terminal)
+            self.assertEqual(
+                contracts.validate_delivery(
+                    [str(path)], str(matrix), disposition="unsupported"
+                ),
+                1,
+            )
+
+            for control_sha256 in (None, "0" * 64):
+                broken = copy.deepcopy(terminal)
+                broken["provenance"]["control_sha256"] = control_sha256
+                path.write_text(json.dumps(broken))
+                with self.assertRaisesRegex(contracts.ContractError, "exact control document"):
+                    contracts.validate_delivery(
+                        [str(path)], str(matrix), disposition="unsupported"
+                    )
+            path.write_text(json.dumps(terminal))
+
+            for field in (
+                "artifact", "job", "repo", "run_attempt", "run_id", "source_sha", "runner"
+            ):
+                broken = copy.deepcopy(terminal)
+                broken["identity"]["allocation_factors"][field] = f"forged-{field}"
+                allocation_id = identity.allocation_id(
+                    broken["identity"]["allocation_factors"]
+                )
+                broken["identity"]["allocation_id"] = allocation_id
+                broken["identity"]["attempt_id"] = identity.attempt_id(
+                    allocation=allocation_id,
+                    case=broken["identity"]["case_id"],
+                    ordinal=broken["identity"]["attempt_ordinal"],
+                )
+                with self.assertRaisesRegex(
+                    contracts.ContractError, "allocation factors differ"
+                ):
+                    contracts.validate_terminal_document(broken)
+
+            broken = copy.deepcopy(terminal)
+            broken["outcome"]["status"] = "failed"
+            with self.assertRaisesRegex(contracts.ContractError, "source and outcome"):
+                contracts.validate_terminal_document(broken)
+            broken = copy.deepcopy(terminal)
+            broken["provenance"]["source"] = "runtime-emitter"
+            with self.assertRaisesRegex(contracts.ContractError, "source and outcome"):
+                contracts.validate_terminal_document(broken)
+
+            for path_parts, replacement in (
+                (("provenance", "source"), "unregistered-producer"),
+                (("outcome", "failure_mode"), "unsupported-capability"),
+                (("outcome", "reason"), "unregistered-capability"),
+            ):
+                with self.subTest(path=path_parts):
+                    broken = copy.deepcopy(terminal)
+                    broken[path_parts[0]][path_parts[1]] = replacement
+                    with self.assertRaises(publisher.PublisherError):
+                        publisher._schema("terminal-outcome-v1.schema.json", broken)
+                    with self.assertRaises(contracts.ContractError):
+                        contracts.validate_terminal_document(broken)
+
+            runtime_allocation = copy.deepcopy(
+                terminal["identity"]["allocation_factors"]
+            )
+            runtime_allocation["runner"] = terminal["identity"]["case_factors"]["sku"]
+            runtime = contracts.make_terminal_document(
+                allocation_factors=runtime_allocation,
+                attempt_ordinal=1,
+                case=terminal["case"],
+                case_factors=terminal["identity"]["case_factors"],
+                control_sha256=terminal["provenance"]["control_sha256"],
+                failure_mode="setup",
+                generated_at=terminal["generated_at"],
+                git_run=terminal["provenance"]["git_run"],
+                reason="launcher-setup-failed",
+                return_code=1,
+                source="runtime-emitter",
+                status="failed",
+                expected_case_id=terminal["identity"]["case_id"],
+            )
+            publisher._schema("terminal-outcome-v1.schema.json", runtime)
+            broken = copy.deepcopy(runtime)
+            broken["outcome"]["reason"] = "backend-setup-failed"
+            with self.assertRaises(publisher.PublisherError):
+                publisher._schema("terminal-outcome-v1.schema.json", broken)
+            with self.assertRaises(contracts.ContractError):
+                contracts.validate_terminal_document(broken)
+
+    def test_post_emit_demotion_uses_closed_failure_taxonomy(self) -> None:
+        raw, _ = _native_fixture()
+        expected = {
+            5: "runtime-identity",
+            6: "execution",
+            124: "timeout",
+            137: "execution",
+            134: "execution",
+            9: "execution",
+        }
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            for return_code, failure_mode in expected.items():
+                with self.subTest(return_code=return_code):
+                    path = root / f"attempt-{return_code}.json"
+                    path.write_text(json.dumps(raw))
+                    terminal = contracts.demote_raw_attempt(path, return_code)
+                    self.assertEqual(
+                        terminal["outcome"],
+                        {
+                            "failure_mode": failure_mode,
+                            "reason": "post-emit-distributed-command-failed",
+                            "return_code": return_code,
+                            "status": "failed",
+                        },
+                    )
+                    self.assertEqual(terminal["provenance"]["source"], "post-emit-command")
+                    publisher._schema("terminal-outcome-v1.schema.json", terminal)
+
+                    broken = copy.deepcopy(terminal)
+                    broken["outcome"]["reason"] = "distributed-command-failed"
+                    with self.assertRaises(publisher.PublisherError):
+                        publisher._schema("terminal-outcome-v1.schema.json", broken)
+                    with self.assertRaises(contracts.ContractError):
+                        contracts.validate_terminal_document(broken)
+
+    def test_artifact_safety_accepts_current_v1_fixtures(self) -> None:
+        raw, samples = _native_fixture()
+        publisher.artifact_safety.assert_publication_safe([
+            sweep_matrix.resolve_matrix(backends="all"),
+            raw,
+            samples,
+            _dataset(),
+            _promoted_dataset(),
+        ])
+
+    def test_native_raw_and_sample_schema_match_semantic_validator(self) -> None:
+        raw, samples = _native_fixture()
+        publisher._schema("samples-v1.schema.json", samples)
+        publisher._schema("raw-case-v1.schema.json", raw)
+        self.assertIs(contracts.validate_raw_document(raw, samples), raw)
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            (root / "samples.json").write_bytes(contracts.canonical_json_bytes(samples))
+            (root / "raw.json").write_bytes(contracts.canonical_json_bytes(raw))
+            self.assertEqual(contracts.load_raw_attempt(root / "raw.json"), raw)
+        for target in ("raw", "samples"):
+            broken_raw, broken_samples = copy.deepcopy((raw, samples))
+            broken = broken_raw if target == "raw" else broken_samples
+            broken["unexpected"] = True
+            with self.assertRaises(publisher.PublisherError):
+                publisher._schema(
+                    "raw-case-v1.schema.json" if target == "raw" else "samples-v1.schema.json",
+                    broken,
+                )
+            with self.assertRaises(contracts.ContractError):
+                contracts.validate_raw_document(broken_raw, broken_samples)
+        tampered = copy.deepcopy(raw)
+        tampered["measurement"]["rows"][0]["token_rate_at_latency_percentile"]["p50"] *= 2
+        with self.assertRaisesRegex(contracts.ContractError, "token_rate_at_latency_percentile"):
+            contracts.validate_raw_document(tampered, samples)
+        tampered = copy.deepcopy(raw)
+        tampered["case"]["shape"]["hidden"] = 2
+        with self.assertRaises(contracts.ContractError):
+            contracts.validate_raw_document(tampered, samples)
+        tampered = copy.deepcopy(raw)
+        configured = tampered["implementation"]["resource_profile"]["configured_units"]
+        tampered["implementation"]["resource_profile"]["configured_units"] = (
+            1 if configured is None else configured + 1
+        )
+        with self.assertRaisesRegex(contracts.ContractError, "resource profile"):
+            contracts.validate_raw_document(tampered, samples)
+        tampered = copy.deepcopy(raw)
+        oracle = tampered["measurement"]["rows"][0]["correctness"]["rank_evidence"][0]
+        oracle["pre_timing"]["checks"]["combine_values"] = False
+        with self.assertRaisesRegex(contracts.ContractError, "passed differs"):
+            contracts.validate_raw_document(tampered, samples)
+
+    def test_hybrid_raw_binds_realized_config_and_every_rank_artifact(self) -> None:
+        raw, samples = _native_fixture("deepep-hybrid")
+        publisher._schema("raw-case-v1.schema.json", raw)
+        self.assertIs(contracts.validate_raw_document(raw, samples), raw)
+
+        mutations = {
+            "hidden_dim": lambda provenance: provenance["realized_config"].update(
+                hidden_dim=2
+            ),
+            "experts_per_rank": lambda provenance: provenance["realized_config"].update(
+                num_of_experts_per_rank=2
+            ),
+            "ranks_per_node": lambda provenance: provenance["realized_config"].update(
+                num_of_ranks_per_node=2
+            ),
+            "num_nodes": lambda provenance: provenance["realized_config"].update(
+                num_of_nodes=2
+            ),
+            "token_data_type": lambda provenance: provenance["realized_config"].update(
+                token_data_type="UINT8"
+            ),
+            "rank_coverage": lambda provenance: [
+                artifact["rank_artifacts"].append({
+                    "bytes": 1, "rank": 1, "sha256": "9" * 64,
+                })
+                for artifact in provenance["jit_shared_objects"]
+            ],
+        }
+        for name, mutate in mutations.items():
+            with self.subTest(name=name):
+                changed = copy.deepcopy(raw)
+                mutate(changed["implementation"]["provenance"])
+                with self.assertRaisesRegex(
+                    contracts.ContractError,
+                    "DeepEP Hybrid realized config/JIT evidence differs",
+                ):
+                    contracts.validate_raw_document(changed, samples)
+
+    def test_native_contract_recomputes_routing_receive_histograms_and_anomalies(self) -> None:
+        raw, samples = _native_fixture()
+
+        tampered = copy.deepcopy(raw)
+        changed = tampered["measurement"]["rows"][0]
+        changed["routing"]["routed_copies"] *= 2
+        for name in ("combine", "dispatch", "roundtrip"):
+            changed["logical_bytes"][name] *= 2
+        with self.assertRaisesRegex(contracts.ContractError, "routing.routed_copies"):
+            contracts.validate_raw_document(tampered, samples)
+
+        tampered = copy.deepcopy(raw)
+        changed = tampered["measurement"]["rows"][0]
+        changed["routing"]["payload_copies_per_rank"] = [2]
+        changed["receive"] = {"max": 2, "mean": 2.0, "min": 2, "total": 2}
+        with self.assertRaisesRegex(contracts.ContractError, "payload_copies_per_rank"):
+            contracts.validate_raw_document(tampered, samples)
+
+        tampered = copy.deepcopy(raw)
+        tampered["measurement"]["rows"][0]["sample_histograms"]["roundtrip"][
+            "counts"
+        ] = [511]
+        with self.assertRaisesRegex(contracts.ContractError, "sample_histograms"):
+            contracts.validate_raw_document(tampered, samples)
+
+        tampered = copy.deepcopy(raw)
+        tampered["measurement"]["rows"][0]["anomalies"] = [{
+            "type": "roundtrip_gt_isolated_sum",
+            "T": 1,
+            "roundtrip_p99": 40.0,
+            "isolated_sum_p99": 30.0,
+            "ratio": 1.33,
+            "threshold": 3.0,
+        }]
+        tampered["outcome"]["validity"]["anomaly_free"] = False
+        with self.assertRaisesRegex(contracts.ContractError, "anomalies"):
+            contracts.validate_raw_document(tampered, samples)
+
+        anomalous_raw, anomalous_samples = copy.deepcopy((raw, samples))
+        sample_point = anomalous_samples["points"][0]
+        sample_point["components"]["roundtrip"]["trials"] = [
+            [100.0] * 8 for _ in range(64)
+        ]
+        sample_core = {
+            "components": sample_point["components"],
+            "tokens_per_rank": sample_point["tokens_per_rank"],
+        }
+        sample_sha = hashlib.sha256(
+            contracts.canonical_json_bytes(sample_core)
+        ).hexdigest()
+        point_id = sample_point["point_id"]
+        evidence_id = identity.evidence_id(
+            point=point_id,
+            allocation=anomalous_raw["identity"]["allocation_id"],
+            attempt=anomalous_raw["identity"]["attempt_id"],
+            sample_sha256=sample_sha,
+        )
+        sample_point.update({"sample_sha256": sample_sha, "evidence_id": evidence_id})
+        changed = anomalous_raw["measurement"]["rows"][0]
+        changed["sample_sha256"] = sample_sha
+        changed["evidence_id"] = evidence_id
+        changed["components"]["roundtrip"]["percentiles_us"] = {
+            name: 100.0 for name in ("p50", "p90", "p95", "p99")
+        }
+        changed["token_rate_at_latency_percentile"] = {
+            name: 10_000.0 for name in ("p50", "p90", "p95", "p99")
+        }
+        changed["sample_histograms"]["roundtrip"] = contracts._expected_histogram(
+            [100.0] * 512
+        )
+        changed["anomalies"] = contracts._expected_anomalies(1, changed["components"])
+        anomalous_raw["outcome"]["validity"]["anomaly_free"] = False
+        sample_bytes = contracts.canonical_json_bytes(anomalous_samples)
+        anomalous_raw["sample_artifact"].update({
+            "bytes": len(sample_bytes),
+            "sha256": hashlib.sha256(sample_bytes).hexdigest(),
+        })
+        self.assertIs(
+            contracts.validate_raw_document(anomalous_raw, anomalous_samples),
+            anomalous_raw,
+        )
+        changed["anomalies"] = []
+        anomalous_raw["outcome"]["validity"]["anomaly_free"] = True
+        with self.assertRaisesRegex(contracts.ContractError, "anomalies"):
+            contracts.validate_raw_document(anomalous_raw, anomalous_samples)
+
+    def test_native_contract_rejects_every_schema_only_nested_mutation(self) -> None:
+        raw, samples = _native_fixture()
+        self.assertIs(contracts.validate_raw_document(raw, samples), raw)
+
+        def locate(document: object, path: tuple[object, ...]) -> object:
+            value = document
+            for part in path:
+                value = value[part]  # type: ignore[index]
+            return value
+
+        def reject_raw(document: dict) -> None:
+            with self.assertRaises(publisher.PublisherError):
+                publisher._schema("raw-case-v1.schema.json", document)
+            with self.assertRaises(contracts.ContractError):
+                contracts.validate_raw_document(document, samples)
+
+        required_fields = (
+            (("measurement", "rows", 0, "receive"), "total"),
+            (("measurement", "rows", 0, "routing"), "fanout_mean"),
+            (("measurement", "rows", 0, "routing", "source_token_stats"), "ranks"),
+            (("measurement", "rows", 0, "sample_histograms"), "roundtrip"),
+            (("measurement", "rows", 0, "sample_histograms", "roundtrip"), "n"),
+            (("runtime_fingerprint", "accelerator_runtime"), "kind"),
+            (("runtime_fingerprint", "collective_library"), "kind"),
+            (("runtime_fingerprint", "framework"), "kind"),
+        )
+        for path, required in required_fields:
+            with self.subTest(path=path, mutation="missing"):
+                broken = copy.deepcopy(raw)
+                del locate(broken, path)[required]  # type: ignore[index]
+                reject_raw(broken)
+            with self.subTest(path=path, mutation="extra"):
+                broken = copy.deepcopy(raw)
+                locate(broken, path)["unexpected"] = None  # type: ignore[index]
+                reject_raw(broken)
+
+        invalid_values = (
+            (("measurement", "rows", 0, "receive", "mean"), "one"),
+            (("measurement", "rows", 0, "routing", "fanout_mean"), "one"),
+            (("measurement", "rows", 0, "sample_histograms", "roundtrip", "bins"), 0),
+            (("provenance", "image", "arch"), "AMD64"),
+            (("runtime_fingerprint", "accelerator_runtime", "kind"), "rocm"),
+        )
+        for path, invalid in invalid_values:
+            with self.subTest(path=path, mutation="value"):
+                broken = copy.deepcopy(raw)
+                parent = locate(broken, path[:-1])
+                parent[path[-1]] = invalid  # type: ignore[index]
+                reject_raw(broken)
+
+        def reject_samples(document: dict) -> None:
+            with self.assertRaises(publisher.PublisherError):
+                publisher._schema("samples-v1.schema.json", document)
+            with self.assertRaises(contracts.ContractError):
+                contracts.validate_samples_document(document)
+
+        for path, required in (
+            (("points", 0), "evidence_id"),
+            (("points", 0, "components"), "roundtrip"),
+            (("points", 0, "components", "roundtrip"), "trials"),
+            (("sampling",), "reduction"),
+        ):
+            with self.subTest(path=path, artifact="samples"):
+                broken = copy.deepcopy(samples)
+                del locate(broken, path)[required]  # type: ignore[index]
+                reject_samples(broken)
+
+    def test_terminal_contract_and_schema_reject_the_same_shape_gaps(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            _, artifact = _unsupported_delivery(Path(temporary).resolve())
+            terminal = contracts.strict_load(next(artifact.glob("*.json")))
+        publisher._schema("terminal-outcome-v1.schema.json", terminal)
+        self.assertIs(contracts.validate_terminal_document(terminal), terminal)
+
+        def reject(document: dict) -> None:
+            with self.assertRaises(publisher.PublisherError):
+                publisher._schema("terminal-outcome-v1.schema.json", document)
+            with self.assertRaises(contracts.ContractError):
+                contracts.validate_terminal_document(document)
+
+        for path, invalid in (
+            (("outcome", "failure_mode"), "Not Safe"),
+            (("outcome", "reason"), "x" * 241),
+            (("provenance", "source"), "Not Safe"),
+            (("provenance", "git_run", "ref"), ""),
+        ):
+            with self.subTest(path=path):
+                broken = copy.deepcopy(terminal)
+                parent = broken
+                for part in path[:-1]:
+                    parent = parent[part]
+                parent[path[-1]] = invalid
+                reject(broken)
+
+    def test_invalid_retry_is_quarantined_before_valid_retry_upload(self) -> None:
+        raw, samples = _native_fixture()
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            sample_bytes = contracts.canonical_json_bytes(samples)
+            bad = copy.deepcopy(raw)
+            bad["sample_artifact"].update({
+                "path": "a01.samples.json", "bytes": len(sample_bytes),
+                "sha256": hashlib.sha256(sample_bytes).hexdigest(),
+            })
+            bad["measurement"]["rows"][0]["token_rate_at_latency_percentile"]["p50"] *= 2
+            (root / "a01.samples.json").write_bytes(sample_bytes)
+            (root / "a01.json").write_bytes(contracts.canonical_json_bytes(bad))
+            self.assertTrue(contracts.quarantine_invalid_attempt(root / "a01.json"))
+            valid = copy.deepcopy(raw)
+            valid["sample_artifact"].update({
+                "path": "a02.samples.json", "bytes": len(sample_bytes),
+                "sha256": hashlib.sha256(sample_bytes).hexdigest(),
+            })
+            (root / "a02.samples.json").write_bytes(sample_bytes)
+            (root / "a02.json").write_bytes(contracts.canonical_json_bytes(valid))
+            paths = sorted(str(path) for path in root.glob("*.json"))
+            self.assertEqual(contracts.validate_attempt_paths(paths), 1)
+            self.assertTrue((root / "a01.json.quarantine").is_file())
+            self.assertTrue((root / "a01.samples.json.quarantine").is_file())
+
+    def test_ingest_archives_first_and_publishes_latest_attempt(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            self.assertEqual(len(summarize.load_results(str(artifact), None, None)), 1)
+            result = publisher.ingest_command(_args(root / "store", matrix, artifact))
+            store = publisher.Store(root / "store")
+            pointer = store.verify_channel("latest-attempt")
+            self.assertEqual(result["status"], "accepted")
+            self.assertEqual(pointer["dataset"]["sha256"], result["dataset_sha256"])
+            self.assertTrue((store.incoming / result["incoming_id"] / "COMPLETE").is_file())
+            self.assertTrue((store.bundles / result["bundle_id"] / "COMPLETE").is_file())
+            self.assertFalse((store.channels / "dev-latest.json").exists())
+            self.assertEqual(os.stat(store.private).st_mode & 0o777, 0o700)
+            self.assertEqual(os.stat(store.public).st_mode & 0o777, 0o755)
+            self.assertEqual(os.stat(store.bundles / result["bundle_id"]).st_mode & 0o777, 0o500)
+            dataset_dir = store.datasets / result["dataset_sha256"]
+            self.assertEqual(os.stat(dataset_dir).st_mode & 0o777, 0o555)
+            self.assertEqual(os.stat(dataset_dir / "dataset.json").st_mode & 0o777, 0o444)
+
+    def test_repeated_ingest_is_content_idempotent(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            args = _args(root / "store", matrix, artifact)
+            first = publisher.ingest_command(args)
+            store = publisher.Store(root / "store")
+            pointer_before = (store.channels / "latest-attempt.json").read_bytes()
+            second = publisher.ingest_command(args)
+            self.assertEqual(second, first)
+            self.assertEqual(
+                (store.channels / "latest-attempt.json").read_bytes(), pointer_before
+            )
+            self.assertEqual(len(list(store.incoming.iterdir())), 1)
+            self.assertEqual(len(list(store.bundles.iterdir())), 1)
+            self.assertEqual(len(list(store.datasets.iterdir())), 1)
+            bundle = publisher.strict_load(
+                store.bundles / first["bundle_id"] / "bundle.json"
+            )
+            terminal = publisher.strict_load(next(artifact.glob("*.json")))
+            self.assertEqual(bundle["created_at"], terminal["generated_at"])
+
+    def test_dataset_is_invariant_to_bundle_argument_order(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            store_root = root / "store"
+            bundle_ids = []
+            for run_id in (9, 11, 10):
+                run = {**RUN, "run_id": str(run_id)}
+                delivery = root / f"run-{run_id}"
+                delivery.mkdir()
+                matrix, artifact = _unsupported_delivery(delivery, run=run)
+                result = publisher.ingest_command(
+                    _args(store_root, matrix, artifact, run=run)
+                )
+                bundle_ids.append(result["bundle_id"])
+            datasets = [
+                publisher.build_dataset(
+                    publisher.Store(store_root), order, promote=False,
+                )
+                for order in itertools.permutations(bundle_ids)
+            ]
+            self.assertTrue(all(dataset == datasets[0] for dataset in datasets[1:]))
+            self.assertEqual(datasets[0]["generated_at"], "2026-07-04T00:00:00Z")
+            selected = datasets[0]["coverage"][0]["selected_attempt_id"]
+            selected_attempt = next(
+                item for item in datasets[0]["attempts"]
+                if item["attempt_id"] == selected
+            )
+            self.assertEqual(selected_attempt["run_id"], "11")
+
+    def test_diagnostic_dataset_orders_reruns_by_run_attempt(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            store_root = root / "store"
+            bundle_ids = []
+            for run_attempt in (1, 2):
+                run = {**RUN, "run_attempt": run_attempt}
+                delivery = root / f"attempt-{run_attempt}"
+                delivery.mkdir()
+                matrix, artifact = _unsupported_delivery(delivery, run=run)
+                result = publisher.ingest_command(
+                    _args(store_root, matrix, artifact, run=run)
+                )
+                bundle_ids.append(result["bundle_id"])
+            dataset = publisher.build_dataset(
+                publisher.Store(store_root), bundle_ids, promote=False
+            )
+            selected_id = dataset["coverage"][0]["selected_attempt_id"]
+            selected = next(
+                item for item in dataset["attempts"]
+                if item["attempt_id"] == selected_id
+            )
+            self.assertEqual(selected["run_attempt"], 2)
+
+    def test_promotion_requires_every_runnable_case_to_succeed_in_every_bundle(self) -> None:
+        cases = {
+            "runnable": {"_disposition": "runnable"},
+            "planned-unsupported": {"_disposition": "unsupported"},
+        }
+        bundles = []
+        for _ in range(3):
+            runnable = {
+                "identity": {"case_id": "runnable"},
+                "outcome": {"status": "success"},
+            }
+            unsupported = {
+                "identity": {"case_id": "planned-unsupported"},
+                "outcome": {"status": "unsupported"},
+            }
+            bundles.append({
+                "selected": {"runnable": runnable, "planned-unsupported": unsupported},
+                "documents": {"runnable": runnable, "planned-unsupported": unsupported},
+            })
+        publisher._require_runnable_promotion_success(bundles, cases)
+
+        for status in ("failed", "invalid", "unsupported", "diagnostic"):
+            with self.subTest(status=status):
+                broken = copy.deepcopy(bundles)
+                broken[1]["selected"]["runnable"]["outcome"]["status"] = status
+                with self.assertRaisesRegex(
+                    publisher.PublisherError, "every runnable matrix case"
+                ):
+                    publisher._require_runnable_promotion_success(broken, cases)
+
+        broken = copy.deepcopy(bundles)
+        broken[1]["documents"]["retry"] = {
+            "identity": {"case_id": "runnable"},
+            "outcome": {"status": "failed"},
+        }
+        with self.assertRaisesRegex(publisher.PublisherError, "rejects runnable cases"):
+            publisher._require_runnable_promotion_success(broken, cases)
+
+    def test_promoted_public_dataset_rejects_failed_retry_history(self) -> None:
+        dataset = _promoted_dataset()
+        successful = next(
+            item for item in dataset["attempts"]
+            if item["outcome"] == "success"
+        )
+        failed = copy.deepcopy(successful)
+        old_attempt_id = successful["attempt_id"]
+        successful["attempt_index"] = 2
+        successful["attempt_id"] = identity.attempt_id(
+            allocation=successful["allocation_id"], case=successful["case_id"], ordinal=2
+        )
+        failed.update({
+            "attempt_id": old_attempt_id,
+            "attempt_index": 1,
+            "outcome": "failed",
+            "failure_mode": "execution",
+            "reason": "execution-failed",
+            "series_id": None,
+            "selected": False,
+            "evidence": [],
+        })
+        dataset["attempts"].append(failed)
+        dataset["attempts"].sort(key=lambda item: item["attempt_id"])
+        coverage = next(
+            item for item in dataset["coverage"]
+            if item["case_id"] == failed["case_id"]
+        )
+        coverage["attempt_ids"] = [
+            successful["attempt_id"] if value == old_attempt_id else value
+            for value in coverage["attempt_ids"]
+        ]
+        coverage["attempt_ids"].append(failed["attempt_id"])
+        coverage["attempt_ids"].sort()
+        if coverage["selected_attempt_id"] == old_attempt_id:
+            coverage["selected_attempt_id"] = successful["attempt_id"]
+
+        fixture_catalog = publisher._case_disposition_catalog_sha256(dataset["coverage"])
+        with mock.patch.object(
+            publisher, "CANONICAL_FULL_V1_CASE_CATALOG_SHA256", fixture_catalog
+        ), self.assertRaisesRegex(publisher.PublisherError, "rejects runnable cases"):
+            publisher.validate_public_dataset(dataset)
+
+    def test_unselected_success_does_not_reference_an_unpublished_series(self) -> None:
+        raw, _ = _native_fixture()
+        retained = publisher._public_attempt(raw, selected=False)
+        selected = publisher._public_attempt(raw, selected=True)
+        self.assertEqual(retained["outcome"], "success")
+        self.assertIsNone(retained["series_id"])
+        self.assertEqual(selected["series_id"], raw["identity"]["series_id"])
+
+    def test_public_dataset_selects_latest_derived_retry(self) -> None:
+        dataset = _dataset()
+        first = dataset["attempts"][0]
+        second = copy.deepcopy(first)
+        second.update({
+            "attempt_id": identity.attempt_id(
+                allocation=first["allocation_id"], case=first["case_id"], ordinal=2
+            ),
+            "attempt_index": 2,
+            "selected": False,
+            "series_id": None,
+            "evidence": [],
+        })
+        dataset["attempts"].append(second)
+        dataset["attempts"].sort(key=lambda item: item["attempt_id"])
+        dataset["coverage"][0]["attempt_ids"].append(second["attempt_id"])
+        dataset["coverage"][0]["attempt_ids"].sort()
+        with self.assertRaisesRegex(publisher.PublisherError, "select the latest retry"):
+            publisher.validate_public_dataset(dataset)
+
+        second["attempt_id"] = identity.digest("attempt", {"not": "derived"})
+        dataset["attempts"].sort(key=lambda item: item["attempt_id"])
+        dataset["coverage"][0]["attempt_ids"] = [
+            item["attempt_id"] for item in dataset["attempts"]
+        ]
+        with self.assertRaisesRegex(publisher.PublisherError, "retry identity differs"):
+            publisher.validate_public_dataset(dataset)
+
+    def test_promotion_requires_an_eligible_cohort_for_every_comparison_kind(self) -> None:
+        stable_fast, stable_fast_internal = _series(
+            "stable-fast", "deepep", decision_grade=True
+        )
+        stable_slow, stable_slow_internal = _series(
+            "stable-slow", "uccl", decision_grade=True
+        )
+        unstable_fast, unstable_fast_internal = _series(
+            "unstable-fast", "deepep", decision_grade=True
+        )
+        unstable_slow, unstable_slow_internal = _series(
+            "unstable-slow", "uccl", decision_grade=True
+        )
+        unstable_fast["phase"] = unstable_slow["phase"] = "prefill"
+        unstable_fast["series_id"] = identity.series_id({"test": "unstable-fast"})
+        unstable_slow["series_id"] = identity.series_id({"test": "unstable-slow"})
+        for statistic in ("p50", "p99"):
+            unstable_slow_internal["run_metrics"]["1"][8]["latency_us"][statistic] = (
+                unstable_fast_internal["run_metrics"]["1"][8]["latency_us"][statistic]
+                / 2
+            )
+            unstable_slow_internal["run_metrics"]["1"][8]["logical_payload_rate_gbps_at_latency_percentile"][statistic] = (
+                unstable_fast_internal["run_metrics"]["1"][8]["logical_payload_rate_gbps_at_latency_percentile"][statistic]
+                * 2
+            )
+        series = [stable_fast, stable_slow, unstable_fast, unstable_slow]
+        internals = {
+            stable_fast["series_id"]: stable_fast_internal,
+            stable_slow["series_id"]: stable_slow_internal,
+            unstable_fast["series_id"]: unstable_fast_internal,
+            unstable_slow["series_id"]: unstable_slow_internal,
+        }
+        cohorts, _, _, _ = publisher.build_decisions(series, internals)
+        eligible = [item for item in cohorts if item["eligibility"]["decision_grade"]]
+        ineligible = [item for item in cohorts if not item["eligibility"]["decision_grade"]]
+        self.assertEqual({item["kind"] for item in eligible}, {"library"})
+        self.assertTrue(ineligible)
+        anchor_series = [
+            {
+                "series_id": name,
+                "workload": {"routing": routing, "eplb": eplb},
+                "build": {"implementation_contract_sha256": "1" * 64},
+            }
+            for name, routing, eplb in (
+                ("uniform", "uniform", False),
+                ("zipf", "zipf", False),
+                ("zipf-eplb", "zipf", True),
+            )
+        ]
+        required = eligible + [
+            {
+                "kind": kind,
+                "eligibility": {"decision_grade": True},
+                **({"series_ids": [item["series_id"] for item in anchor_series]}
+                   if kind == "routing" else {}),
+            }
+            for kind in publisher.REQUIRED_COHORT_KINDS
+            if kind != "library"
+        ]
+        with mock.patch.object(
+            publisher, "REQUIRED_PROMOTION_COHORT_COUNTS", {}
+        ), mock.patch.object(
+            publisher, "_expected_chip_cohort_count", return_value=1
+        ):
+            publisher._require_promotion_cohorts(
+                required + ineligible, anchor_series
+            )
+            for kind in publisher.REQUIRED_COHORT_KINDS:
+                with self.subTest(missing_kind=kind), self.assertRaisesRegex(
+                    publisher.PublisherError, rf"cohort kinds:.*{kind}"
+                ):
+                    publisher._require_promotion_cohorts([
+                        item for item in required + ineligible
+                        if item["kind"] != kind or not item["eligibility"]["decision_grade"]
+                    ], anchor_series)
+
+    def test_promotion_requires_exact_counts_and_routing_anchors(self) -> None:
+        dataset = _promoted_dataset()
+        counts = _cohort_counts(dataset)
+        with mock.patch.object(
+            publisher, "REQUIRED_PROMOTION_COHORT_COUNTS", counts
+        ):
+            publisher._require_promotion_cohorts(
+                dataset["cohorts"], dataset["series"]
+            )
+            routing = next(
+                item for item in dataset["cohorts"] if item["kind"] == "routing"
+            )
+            eplb = next(
+                item for item in dataset["series"]
+                if item["series_id"] in routing["series_ids"]
+                and item["workload"]["eplb"]
+            )
+            eplb["workload"]["eplb"] = False
+            with self.assertRaisesRegex(publisher.PublisherError, "exact uniform"):
+                publisher._require_promotion_cohorts(
+                    dataset["cohorts"], dataset["series"]
+                )
+
+        dataset = _promoted_dataset()
+        routing = next(item for item in dataset["cohorts"] if item["kind"] == "routing")
+        zipf = next(
+            item for item in dataset["series"]
+            if item["series_id"] in routing["series_ids"]
+            and item["workload"]["routing"] == "zipf"
+            and not item["workload"]["eplb"]
+        )
+        zipf["build"]["implementation_contract_sha256"] = "f" * 64
+        with mock.patch.object(
+            publisher, "REQUIRED_PROMOTION_COHORT_COUNTS", counts
+        ), self.assertRaisesRegex(publisher.PublisherError, "identical off-EPLB"):
+            publisher._require_promotion_cohorts(dataset["cohorts"], dataset["series"])
+
+        wrong_counts = {**counts, "library": counts["library"] + 1}
+        with mock.patch.object(
+            publisher, "REQUIRED_PROMOTION_COHORT_COUNTS", wrong_counts
+        ), self.assertRaisesRegex(publisher.PublisherError, "exactly"):
+            publisher._require_promotion_cohorts(
+                dataset["cohorts"], dataset["series"]
+            )
+
+    def test_promotion_requires_every_derived_chip_cohort_to_be_stable(self) -> None:
+        dataset = _promoted_dataset()
+        chip = next(item for item in dataset["cohorts"] if item["kind"] == "chip")
+        self.assertEqual(
+            publisher._expected_chip_cohort_count(dataset["series"]),
+            sum(item["kind"] == "chip" for item in dataset["cohorts"]),
+        )
+        with mock.patch.object(
+            publisher, "REQUIRED_PROMOTION_COHORT_COUNTS", _cohort_counts(dataset)
+        ):
+            missing = [item for item in dataset["cohorts"] if item is not chip]
+            with self.assertRaisesRegex(publisher.PublisherError, "derived chip cohorts"):
+                publisher._require_promotion_cohorts(missing, dataset["series"])
+
+            chip["eligibility"]["decision_grade"] = False
+            with self.assertRaisesRegex(publisher.PublisherError, "derived chip cohorts"):
+                publisher._require_promotion_cohorts(
+                    dataset["cohorts"], dataset["series"]
+                )
+
+    def test_promotion_rejects_more_than_three_bundles(self) -> None:
+        bundles = {
+            str(run_id): {
+                "id": str(run_id), "cases": [],
+                "manifest": {
+                    "matrix": {"sha256": publisher.CANONICAL_FULL_V1_MATRIX_SHA256},
+                    "run": {"run_id": str(run_id), "run_attempt": 1},
+                },
+            }
+            for run_id in range(1, 5)
+        }
+        with mock.patch.object(
+            publisher, "load_bundle", side_effect=lambda _, bundle_id: bundles[bundle_id]
+        ), self.assertRaisesRegex(publisher.PublisherError, "three independent"):
+            publisher.build_dataset(object(), list(bundles), promote=True)
+
+        dataset = _promoted_dataset()
+        dataset["source_bundle_ids"].append("d" * 64)
+        counts = _cohort_counts(dataset)
+        with mock.patch.object(
+            publisher,
+            "CANONICAL_FULL_V1_CASE_CATALOG_SHA256",
+            publisher._case_disposition_catalog_sha256(dataset["coverage"]),
+        ), mock.patch.object(
+            publisher, "REQUIRED_PROMOTION_COHORT_COUNTS", counts
+        ), self.assertRaisesRegex(publisher.PublisherError, "complete coverage"):
+            publisher.validate_public_dataset(dataset)
+
+    def test_standalone_promotion_binds_matrix_and_requested_dispositions(self) -> None:
+        dataset = _promoted_dataset()
+        fixture_catalog = publisher._case_disposition_catalog_sha256(dataset["coverage"])
+        with self.assertRaisesRegex(
+            publisher.PublisherError, "canonical case/disposition catalog"
+        ):
+            publisher.validate_public_dataset(dataset)
+        with mock.patch.object(
+            publisher, "CANONICAL_FULL_V1_CASE_CATALOG_SHA256", fixture_catalog
+        ), mock.patch.object(
+            publisher,
+            "REQUIRED_PROMOTION_COHORT_COUNTS",
+            _cohort_counts(dataset),
+        ):
+            publisher.validate_public_dataset(dataset)
+
+        diagnostic = copy.deepcopy(dataset)
+        item = diagnostic["series"][0]
+        item["status"] = "diagnostic"
+        item["eligibility"].update({
+            "decision_grade": False,
+            "stable_p50": False,
+            "p50_max_min_ratio": 1.20,
+            "reasons": ["unstable-p50"],
+        })
+        with mock.patch.object(
+            publisher, "CANONICAL_FULL_V1_CASE_CATALOG_SHA256", fixture_catalog
+        ), mock.patch.object(
+            publisher,
+            "REQUIRED_PROMOTION_COHORT_COUNTS",
+            _cohort_counts(dataset),
+        ), self.assertRaisesRegex(
+            publisher.PublisherError, "unstable or incomplete required series"
+        ):
+            publisher.validate_public_dataset(diagnostic)
+
+        broken = copy.deepcopy(dataset)
+        broken["promotion"]["matrix_id"] = "d" * 64
+        with self.assertRaisesRegex(publisher.PublisherError, "canonical full-v1 matrix"):
+            publisher.validate_public_dataset(broken)
+
+        for original, replacement in (("runnable", "unsupported"),
+                                      ("unsupported", "runnable")):
+            with self.subTest(original=original):
+                broken = copy.deepcopy(dataset)
+                item = next(
+                    coverage for coverage in broken["coverage"]
+                    if coverage["disposition"] == original
+                )
+                item["disposition"] = replacement
+                with mock.patch.object(
+                    publisher,
+                    "CANONICAL_FULL_V1_CASE_CATALOG_SHA256",
+                    publisher._case_disposition_catalog_sha256(broken["coverage"]),
+                ), self.assertRaisesRegex(
+                    publisher.PublisherError, "requested dispositions"
+                ):
+                    publisher.validate_public_dataset(broken)
+
+    def test_workflow_matrix_and_catalog_digests_do_not_drift(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            matrix_path = Path(temporary) / "matrix_full.json"
+            result = subprocess.run(
+                [
+                    sys.executable, str(ROOT / "sweep_matrix.py"),
+                    "--suites", "all", "--max-cases", "128",
+                    "--backends", "all", "--out", str(matrix_path),
+                ],
+                text=True,
+                capture_output=True,
+            )
+            self.assertEqual(result.returncode, 0, result.stderr)
+            self.assertEqual(
+                hashlib.sha256(matrix_path.read_bytes()).hexdigest(),
+                publisher.CANONICAL_FULL_V1_MATRIX_SHA256,
+            )
+            matrix = contracts.strict_load(matrix_path)
+        coverage = [
+            {
+                "case_id": item["case"]["case_id"],
+                "disposition": item["disposition"],
+            }
+            for item in matrix["requested_cases"]
+        ]
+        self.assertEqual(
+            publisher._case_disposition_catalog_sha256(coverage),
+            publisher.CANONICAL_FULL_V1_CASE_CATALOG_SHA256,
+        )
+        self.assertEqual(
+            (
+                len(matrix["include"]), len(coverage),
+                sum(item["disposition"] == "runnable" for item in coverage),
+                sum(item["disposition"] == "unsupported" for item in coverage),
+                sum(
+                    len(item["case"]["ladder"].split())
+                    for item in matrix["requested_cases"]
+                ),
+                sum(
+                    len(item["case"]["ladder"].split())
+                    for item in matrix["requested_cases"]
+                    if item["disposition"] == "runnable"
+                ),
+                sum(
+                    len(item["case"]["ladder"].split())
+                    for item in matrix["requested_cases"]
+                    if item["disposition"] == "unsupported"
+                ),
+            ),
+            (38, 360, 228, 132, 840, 532, 308),
+        )
+        library: dict[tuple, set[str]] = {}
+        system: dict[tuple, set[str]] = {}
+        routing: dict[tuple, list[tuple[str, bool]]] = {}
+        for requested in matrix["requested_cases"]:
+            if requested["disposition"] != "runnable":
+                continue
+            case = requested["case"]
+            shape = tuple(
+                case[field]
+                for field in ("workload", "hidden", "topk", "experts", "ep", "phase")
+            )
+            route = (case["routing"], case["eplb"])
+            if case["backend"] != "nccl-ep":
+                library.setdefault((requested["sku"], shape, route), set()).add(
+                    case["backend"]
+                )
+            else:
+                system.setdefault((shape, route), set()).add(requested["sku"])
+            routing.setdefault(
+                (requested["sku"], case["backend"], shape), []
+            ).append(route)
+        anchors = {("uniform", False), ("zipf", False), ("zipf", True)}
+        self.assertEqual(
+            {
+                "library": sum(len(variants) >= 2 for variants in library.values()),
+                "system": sum(len(variants) >= 2 for variants in system.values()),
+                "routing": sum(
+                    len(variants) == 3 and set(variants) == anchors
+                    for variants in routing.values()
+                ),
+            },
+            publisher.REQUIRED_PROMOTION_COHORT_COUNTS,
+        )
+
+    def test_build_promotion_requires_canonical_full_matrix(self) -> None:
+        bundles = {
+            str(run_id): {
+                "id": str(run_id), "cases": [],
+                "manifest": {
+                    "matrix": {"sha256": "d" * 64},
+                    "run": {"run_id": str(run_id), "run_attempt": 1},
+                },
+            }
+            for run_id in range(1, 4)
+        }
+        with mock.patch.object(
+            publisher, "load_bundle", side_effect=lambda _, bundle_id: bundles[bundle_id]
+        ), self.assertRaisesRegex(publisher.PublisherError, "canonical full-v1 matrix"):
+            publisher.build_dataset(object(), list(bundles), promote=True)
+
+    def test_rejection_updates_latest_but_never_dev_latest(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            store = publisher.Store(root / "store")
+            sentinel = b"existing-promoted-pointer\n"
+            (store.channels / "dev-latest.json").write_bytes(sentinel)
+            (artifact / "unknown.json").write_text('{"format":"unknown"}')
+            with self.assertRaises(publisher.PublisherError):
+                publisher.ingest_command(_args(store.root, matrix, artifact))
+            self.assertEqual((store.channels / "dev-latest.json").read_bytes(), sentinel)
+            pointer = store.verify_channel("latest-attempt")
+            dataset = publisher.strict_load(store.public / pointer["dataset"]["path"])
+            self.assertEqual(dataset["promotion"]["status"], "quarantined")
+            self.assertTrue(any(store.quarantine.iterdir()))
+
+    def test_repeated_rejection_is_content_idempotent(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            store = publisher.Store(root / "store")
+            (artifact / "unknown.json").write_text('{"format":"unknown"}')
+            with self.assertRaises(publisher.PublisherError):
+                publisher.ingest_command(_args(store.root, matrix, artifact))
+            pointer = (store.channels / "latest-attempt.json").read_bytes()
+            counts = tuple(
+                len(list(path.iterdir()))
+                for path in (store.incoming, store.quarantine, store.datasets)
+            )
+            with self.assertRaises(publisher.PublisherError):
+                publisher.ingest_command(_args(store.root, matrix, artifact))
+            self.assertEqual((store.channels / "latest-attempt.json").read_bytes(), pointer)
+            self.assertEqual(
+                tuple(
+                    len(list(path.iterdir()))
+                    for path in (store.incoming, store.quarantine, store.datasets)
+                ),
+                counts,
+            )
+
+    def test_distinct_rejections_advance_latest_attempt(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            store = publisher.Store(root / "store")
+            unknown = artifact / "unknown.json"
+            unknown.write_text('{"format":"unknown-one"}')
+            with self.assertRaises(publisher.PublisherError):
+                publisher.ingest_command(_args(store.root, matrix, artifact))
+            first = (store.channels / "latest-attempt.json").read_bytes()
+            unknown.write_text('{"format":"unknown-two"}')
+            with self.assertRaises(publisher.PublisherError):
+                publisher.ingest_command(_args(store.root, matrix, artifact))
+            second = (store.channels / "latest-attempt.json").read_bytes()
+            self.assertNotEqual(second, first)
+            self.assertEqual(len(list(store.datasets.iterdir())), 2)
+
+    def test_zip_traversal_is_rejected(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            archive = root / "bad.zip"
+            with zipfile.ZipFile(archive, "w") as handle:
+                handle.writestr("../escape.json", "{}")
+            with self.assertRaisesRegex(publisher.PublisherError, "escapes"):
+                publisher.extract_archive(archive, root / "out")
+
+    def test_store_and_directory_archive_reject_symlinks(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            real = root / "real"
+            real.mkdir()
+            alias = root / "alias"
+            alias.symlink_to(real, target_is_directory=True)
+            with self.assertRaisesRegex(publisher.PublisherError, "symlinked parent"):
+                publisher.Store(alias / "store")
+            self.assertFalse((real / "store").exists())
+            artifact = root / f"cxunsupported-{RUN['run_id']}-{RUN['run_attempt']}"
+            artifact.mkdir()
+            target = root / "target.json"
+            target.write_text("{}")
+            (artifact / "linked.json").symlink_to(target)
+            with self.assertRaisesRegex(publisher.PublisherError, "symlink"):
+                publisher._archive_download_directory(artifact, root / "artifact.zip")
+
+    def test_offline_caller_metadata_is_validated_before_store_creation(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            store_root = root / "store"
+            args = _args(store_root, matrix, artifact)
+            args.run_id = "0"
+            with self.assertRaisesRegex(publisher.PublisherError, "run-id"):
+                publisher.ingest_command(args)
+            self.assertFalse(store_root.exists())
+
+            promote = types.SimpleNamespace(
+                store_root=str(store_root), bundle=["not-a-digest"]
+            )
+            with self.assertRaisesRegex(publisher.PublisherError, "bundle IDs"):
+                publisher.promote_command(promote)
+            self.assertFalse(store_root.exists())
+            with self.assertRaisesRegex(publisher.PublisherError, "absolute path"):
+                publisher._store_from_args(types.SimpleNamespace(store_root="relative-store"))
+
+    def test_store_rejects_group_or_world_writable_root(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve() / "unsafe-store"
+            root.mkdir()
+            root.chmod(0o772)
+            with self.assertRaisesRegex(publisher.PublisherError, "group/world writable"):
+                publisher.Store(root)
+
+    def test_retry_ordinals_must_be_contiguous_from_one(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root, (1, 3))
+            with self.assertRaisesRegex(publisher.PublisherError, "contiguous ordinals"):
+                publisher.ingest_command(_args(root / "store", matrix, artifact))
+
+    def test_delivery_rejects_extra_archive_and_non_native_member(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            extra = root / f"cxshard-extra-{RUN['run_id']}-{RUN['run_attempt']}"
+            extra.mkdir()
+            (extra / "extra.json").write_text("{}")
+            args = _args(root / "store-extra", matrix, artifact)
+            args.artifact.append(str(extra))
+            with self.assertRaisesRegex(publisher.PublisherError, "archive set"):
+                publisher.ingest_command(args)
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            (artifact / "notes.txt").write_text("not native evidence")
+            with self.assertRaisesRegex(publisher.PublisherError, "unconsumed"):
+                publisher.ingest_command(_args(root / "store-member", matrix, artifact))
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            path = next(artifact.glob("*.json"))
+            terminal = json.loads(path.read_text())
+            terminal["outcome"]["reason"] = next(
+                reason for reason in contracts.CAPABILITY_FAILURE_REASONS
+                if reason != terminal["outcome"]["reason"]
+            )
+            path.write_text(json.dumps(terminal))
+            with self.assertRaisesRegex(publisher.PublisherError, "reason differs"):
+                publisher.ingest_command(_args(root / "store-reason", matrix, artifact))
+
+    def test_rates_invert_latency_and_global_tokens_use_ep_size(self) -> None:
+        dataset = _dataset()
+        publisher.validate_public_dataset(dataset)
+        rates = dataset["series"][0]["points"][0]["components"]["roundtrip"]["logical_payload_rate_gbps_at_latency_percentile"]
+        self.assertGreater(rates["p50"], rates["p99"])
+        broken = copy.deepcopy(dataset)
+        broken["series"][0]["points"][0]["global_tokens"] = 128
+        with self.assertRaisesRegex(publisher.PublisherError, "EP size"):
+            publisher.validate_public_dataset(broken)
+        broken = copy.deepcopy(dataset)
+        broken["series"][0]["points"][0]["roundtrip_token_rate_at_latency_percentile"]["p99"] *= 2
+        with self.assertRaisesRegex(publisher.PublisherError, "token throughput"):
+            publisher.validate_public_dataset(broken)
+        broken = copy.deepcopy(dataset)
+        broken["attempts"][0]["evidence"][0]["point_id"] = identity.point_id(
+            series=broken["series"][0]["series_id"], tokens_per_rank=16
+        )
+        with self.assertRaisesRegex(publisher.PublisherError, "point evidence"):
+            publisher.validate_public_dataset(broken)
+        broken = copy.deepcopy(dataset)
+        broken["attempts"][0]["series_id"] = None
+        with self.assertRaisesRegex(publisher.PublisherError, "present exactly for selected success"):
+            publisher.validate_public_dataset(broken)
+        broken = copy.deepcopy(dataset)
+        component = broken["series"][0]["points"][0]["components"]["roundtrip"]
+        component["logical_bytes"] = None
+        component["logical_payload_rate_gbps_at_latency_percentile"] = None
+        with self.assertRaisesRegex(publisher.PublisherError, "logical bandwidth is missing"):
+            publisher.validate_public_dataset(broken)
+
+        for mutate in (
+            lambda item: item.update({"model": "different-model"}),
+            lambda item: item["workload"].update({"hidden": 4096}),
+            lambda item: item["workload"].update({"top_k": 4}),
+            lambda item: item["workload"].update({"experts": 128}),
+        ):
+            broken = copy.deepcopy(dataset)
+            mutate(broken["series"][0])
+            with self.assertRaisesRegex(publisher.PublisherError, "frozen v1"):
+                publisher.validate_public_dataset(broken)
+
+        broken = copy.deepcopy(dataset)
+        broken["series"][0]["eplb"]["mapping_sha256"] = "f" * 64
+        with self.assertRaisesRegex(publisher.PublisherError, "claims a plan"):
+            publisher.validate_public_dataset(broken)
+
+        broken = copy.deepcopy(dataset)
+        broken["series"][0]["backend"].update({
+            "id": "nccl-ep", "label": publisher.BACKEND_LABELS["nccl-ep"],
+            "role": "reference", "generation": "rccl",
+        })
+        broken["coverage"][0]["backend"] = "nccl-ep"
+        with self.assertRaisesRegex(publisher.PublisherError, "configuration"):
+            publisher.validate_public_dataset(broken)
+
+    def test_routing_and_eplb_facts_must_match_across_repeats(self) -> None:
+        raw, _ = _native_fixture()
+        descriptor = publisher._eplb_descriptor(raw)
+        facts = publisher._routing_facts(raw["measurement"]["rows"][0])
+        self.assertEqual(
+            publisher._exact_repeat_value([descriptor, copy.deepcopy(descriptor)], "EPLB"),
+            descriptor,
+        )
+        self.assertEqual(
+            publisher._exact_repeat_value([facts, copy.deepcopy(facts)], "routing"),
+            facts,
+        )
+        changed = copy.deepcopy(facts)
+        changed["hotspot_ratio"] += 0.1
+        with self.assertRaisesRegex(publisher.PublisherError, "routing differs"):
+            publisher._exact_repeat_value([facts, changed], "routing")
+
+        dataset = _promoted_dataset()
+        dataset["promotion"]["status"] = "diagnostic"
+        eplb = next(item for item in dataset["series"] if item["eplb"]["enabled"])
+        eplb["points"][0]["routing"]["empty_expert_count"] = 280
+        publisher.validate_public_dataset(dataset)
+        eplb["points"][0]["routing"]["empty_expert_count"] = 288
+        with self.assertRaisesRegex(publisher.PublisherError, "routing/load facts"):
+            publisher.validate_public_dataset(dataset)
+
+        for field, value in (
+            ("mapping_sha256", "0" * 64),
+            ("redundant_experts", 31),
+            ("replicated_experts", 1),
+            ("max_replicas", 2),
+            ("replicated_experts", 257),
+            ("max_replicas", 999),
+            ("imbalance_after", 0.4),
+            ("planner", "different-planner"),
+            ("reference_tokens_per_rank", 1024),
+        ):
+            broken = _promoted_dataset()
+            broken["promotion"]["status"] = "diagnostic"
+            descriptor = next(
+                item["eplb"] for item in broken["series"] if item["eplb"]["enabled"]
+            )
+            descriptor[field] = value
+            with self.subTest(eplb_field=field), self.assertRaisesRegex(
+                publisher.PublisherError, "EPLB descriptor"
+            ):
+                publisher.validate_public_dataset(broken)
+
+    def test_publisher_owns_stable_rankings_and_recommendations(self) -> None:
+        fast, fast_internal = _series("fast", "deepep", decision_grade=True)
+        slow, slow_internal = _series("slow", "uccl", decision_grade=True)
+        reference, reference_internal = _series("reference", "nccl-ep", decision_grade=True)
+        reference_peer, reference_peer_internal = _series(
+            "reference-peer", "nccl-ep", decision_grade=True
+        )
+        reference["backend"]["role"] = "reference"
+        reference_peer["backend"]["role"] = "reference"
+        reference_peer["system"].update({"sku": "h200-dgxc", "label": "NVIDIA H200"})
+        cohorts, rankings, recommendations, _ = publisher.build_decisions(
+            [fast, slow, reference, reference_peer], {
+                fast["series_id"]: fast_internal,
+                slow["series_id"]: slow_internal,
+                reference["series_id"]: reference_internal,
+                reference_peer["series_id"]: reference_peer_internal,
+            }
+        )
+        library = next(item for item in cohorts if item["kind"] == "library")
+        ranking = next(item for item in rankings if item["cohort_id"] == library["cohort_id"]
+                       and item["metric"]["measure"] == "latency_us"
+                       and item["metric"]["statistic"] == "p99")
+        self.assertTrue(library["eligibility"]["decision_grade"])
+        self.assertEqual(ranking["entries"][0]["series_id"], fast["series_id"])
+        self.assertTrue(any(item["series_id"] == fast["series_id"] for item in recommendations))
+        self.assertFalse(any(
+            entry["series_id"] == reference["series_id"]
+            for item in rankings if item["cohort_id"] == library["cohort_id"]
+            for entry in item["entries"]
+        ))
+        self.assertTrue(any(
+            item["kind"] == "system" and reference["series_id"] in item["series_ids"]
+            for item in cohorts
+        ))
+
+    def test_routing_evidence_is_experimental_and_not_a_configuration_recommendation(self) -> None:
+        dataset = _promoted_dataset()
+        routing = next(item for item in dataset["cohorts"] if item["kind"] == "routing")
+        members = [
+            item for item in dataset["series"]
+            if item["series_id"] in routing["series_ids"]
+        ]
+        self.assertEqual(
+            {(item["workload"]["routing"], item["workload"]["eplb"]) for item in members},
+            {("uniform", False), ("zipf", False), ("zipf", True)},
+        )
+        self.assertIn("implementation-static-build", routing["controlled_factors"])
+        self.assertIn("resource", routing["controlled_factors"])
+        self.assertEqual(
+            routing["varying_factors"],
+            ["workload.routing", "workload.eplb", "implementation-config"],
+        )
+        self.assertEqual(
+            len({item["build"]["routing_control_sha256"] for item in members}),
+            1,
+        )
+        self.assertGreater(
+            len({item["build"]["implementation_contract_sha256"] for item in members}),
+            1,
+        )
+        self.assertEqual(len({json.dumps(item["resource"], sort_keys=True) for item in members}), 1)
+        self.assertEqual(routing["publication_tier"], "comparable-experimental")
+        self.assertTrue(any(
+            item["cohort_id"] == routing["cohort_id"] for item in dataset["rankings"]
+        ))
+        self.assertFalse(any(
+            item["cohort_id"] == routing["cohort_id"] for item in dataset["recommendations"]
+        ))
+        self.assertTrue(all(
+            item["publication_tier"] == "official"
+            for item in dataset["recommendations"]
+        ))
+        self.assertFalse(any(
+            dataset_cohort["publication_tier"] == "comparable-experimental"
+            and item["cohort_id"] == dataset_cohort["cohort_id"]
+            for item in dataset["recommendations"]
+            for dataset_cohort in dataset["cohorts"]
+        ))
+        self.assertTrue(all(
+            item["publication_tier"] == "comparable-experimental"
+            for item in dataset["sensitivities"]
+            if item["cohort_id"] == routing["cohort_id"]
+        ))
+
+    def test_routing_implementation_mismatch_blocks_all_decisions(self) -> None:
+        dataset = _promoted_dataset()
+        published = next(item for item in dataset["cohorts"] if item["kind"] == "routing")
+        members = [
+            item for item in dataset["series"]
+            if item["series_id"] in published["series_ids"]
+        ]
+        zipf = next(
+            item for item in members
+            if item["workload"]["routing"] == "zipf" and not item["workload"]["eplb"]
+        )
+        zipf["build"]["implementation_contract_sha256"] = "f" * 64
+        internals = {}
+        for member in members:
+            point = member["points"][0]
+            roundtrip = point["components"]["roundtrip"]
+            metrics = {
+                "latency_us": {
+                    name: roundtrip["latency_us"][name] for name in ("p50", "p99")
+                },
+                "logical_payload_rate_gbps_at_latency_percentile": {
+                    name: roundtrip[
+                        "logical_payload_rate_gbps_at_latency_percentile"
+                    ][name]
+                    for name in ("p50", "p99")
+                },
+            }
+            internals[member["series_id"]] = {
+                "run_metrics": {
+                    str(run): {point["tokens_per_rank"]: metrics}
+                    for run in range(3)
+                }
+            }
+        cohorts, rankings, recommendations, sensitivities = publisher.build_decisions(
+            members, internals
+        )
+        routing = next(item for item in cohorts if item["kind"] == "routing")
+        self.assertFalse(routing["eligibility"]["decision_grade"])
+        self.assertIn(
+            "implementation-config-mismatch", routing["eligibility"]["reasons"]
+        )
+        self.assertEqual((rankings, recommendations, sensitivities), ([], [], []))
+
+    def test_promoted_series_fields_are_bound_to_case_and_series_identities(self) -> None:
+        dataset = _promoted_dataset()
+        changed = copy.deepcopy(dataset)
+        series = next(
+            item for item in changed["series"]
+            if item["system"]["sku"] == "h100-dgxc"
+        )
+        series["system"].update({
+            "sku": "h200-dgxc", "label": "NVIDIA H200",
+            "topology_class": "h200-nvlink-island",
+        })
+        for case_id in series["case_ids"]:
+            next(
+                item for item in changed["coverage"] if item["case_id"] == case_id
+            )["sku"] = "h200-dgxc"
+        with self.assertRaisesRegex(publisher.PublisherError, "configuration|case identity"):
+            publisher.validate_public_dataset(changed)
+
+        for field, value in (
+            ("source_sha", "b" * 40),
+            ("image_digest", "sha256:" + "4" * 64),
+            ("squash_sha256", "5" * 64),
+            ("runtime_fingerprint_sha256", "6" * 64),
+            ("implementation_contract_sha256", "7" * 64),
+            ("public_config_sha256", "9" * 64),
+            ("routing_control_sha256", "8" * 64),
+        ):
+            changed = copy.deepcopy(dataset)
+            changed["series"][0]["build"][field] = value
+            with self.subTest(build_field=field), self.assertRaisesRegex(
+                publisher.PublisherError, "commit"
+            ):
+                publisher.validate_public_dataset(changed)
+        changed = copy.deepcopy(dataset)
+        changed["series"][0]["workload"]["workload_id"] = identity.workload_id(
+            {"changed": True}
+        )
+        with self.assertRaisesRegex(publisher.PublisherError, "committed factors"):
+            publisher.validate_public_dataset(changed)
+
+        for mutate, message in (
+            (lambda item: item["backend"].update({
+                "generation": "fabricated", "version": "fabricated-999",
+            }), "configuration"),
+            (lambda item: item["resource"].update({
+                "profile": "profile-fabricated", "configured_units": 99,
+            }), "configuration"),
+            (lambda item: item["system"].update({"label": "Fabricated H100"}), "projection"),
+        ):
+            changed = copy.deepcopy(dataset)
+            mutate(changed["series"][0])
+            with self.assertRaisesRegex(publisher.PublisherError, message):
+                publisher.validate_public_dataset(changed)
+
+        diagnostic = _dataset()
+        diagnostic["series"][0]["build"]["source_sha"] = "b" * 40
+        with self.assertRaisesRegex(publisher.PublisherError, "committed factors"):
+            publisher.validate_public_dataset(diagnostic)
+
+    def test_all_decision_metrics_require_stable_repeat_ordering(self) -> None:
+        fast, fast_internal = _series("ordering-fast", "deepep", decision_grade=True)
+        slow, slow_internal = _series("ordering-slow", "uccl", decision_grade=True)
+        internals = {
+            fast["series_id"]: fast_internal,
+            slow["series_id"]: slow_internal,
+        }
+
+        cohorts, rankings, recommendations, _ = publisher.build_decisions(
+            [fast, slow], internals
+        )
+        library = next(item for item in cohorts if item["kind"] == "library")
+        self.assertTrue(library["eligibility"]["decision_grade"])
+        self.assertEqual(
+            len([item for item in rankings if item["cohort_id"] == library["cohort_id"]]),
+            4,
+        )
+        self.assertEqual(
+            len([
+                item for item in recommendations
+                if item["cohort_id"] == library["cohort_id"]
+            ]),
+            4,
+        )
+
+        for statistic in ("p50", "p99"):
+            slow_internal["run_metrics"]["1"][8]["logical_payload_rate_gbps_at_latency_percentile"][statistic] = (
+                fast_internal["run_metrics"]["1"][8]["logical_payload_rate_gbps_at_latency_percentile"][statistic] * 2
+            )
+        cohorts, rankings, recommendations, _ = publisher.build_decisions(
+            [fast, slow], internals
+        )
+        library = next(item for item in cohorts if item["kind"] == "library")
+        self.assertFalse(library["eligibility"]["decision_grade"])
+        self.assertIn("unstable-ordering", library["eligibility"]["reasons"])
+        self.assertFalse(any(
+            item["cohort_id"] == library["cohort_id"] for item in rankings
+        ))
+        self.assertFalse(any(
+            item["cohort_id"] == library["cohort_id"] for item in recommendations
+        ))
+
+    def test_extra_eligibility_reason_blocks_decision_grade(self) -> None:
+        allocations = [identity.allocation_id({"run": run}) for run in range(3)]
+        eligibility = publisher._eligibility_record(
+            allocations, complete=True, correct=True, measured=True,
+            stable_ordering=True, p50_ratio=1.01, p99_ratio=1.02,
+            extra_reasons=["incomplete-provenance"],
+        )
+        self.assertFalse(eligibility["decision_grade"])
+        self.assertEqual(eligibility["reasons"], ["incomplete-provenance"])
+        self.assertIs(publisher._eligibility(eligibility, "fixture"), eligibility)
+        broken = {**eligibility, "decision_grade": True}
+        with self.assertRaisesRegex(publisher.PublisherError, "promotion gates"):
+            publisher._eligibility(broken, "fixture")
+
+    def test_schema_is_strict_and_channel_target_must_be_complete(self) -> None:
+        dataset = _dataset()
+        dataset["unexpected"] = True
+        with self.assertRaises(publisher.PublisherError):
+            publisher.validate_public_dataset(dataset)
+        with mock.patch.object(publisher, "MAX_PUBLIC_DATASET_BYTES", 1), self.assertRaisesRegex(
+            publisher.PublisherError, "serving size limit"
+        ):
+            publisher.validate_public_dataset(_dataset())
+        with tempfile.TemporaryDirectory() as temporary:
+            store = publisher.Store(Path(temporary).resolve())
+            dataset = _dataset()
+            digest, size = store.install_dataset(dataset)
+            store.update_channel("latest-attempt", digest, size, dataset["generated_at"])
+            self.assertEqual(store.verify_channel("latest-attempt")["dataset"]["sha256"], digest)
+            channel_path = store.channels / "latest-attempt.json"
+            pointer = publisher.strict_load(channel_path)
+            pointer["generated_at"] = "2099-01-01T00:00:00Z"
+            channel_path.write_bytes(contracts.canonical_json_bytes(pointer))
+            with self.assertRaisesRegex(publisher.PublisherError, "metadata differs"):
+                store.verify_channel("latest-attempt")
+            store.update_channel("latest-attempt", digest, size, dataset["generated_at"])
+            with self.assertRaisesRegex(publisher.PublisherError, "metadata differs"):
+                store.update_channel(
+                    "latest-attempt", digest, size + 1, dataset["generated_at"]
+                )
+            with self.assertRaisesRegex(publisher.PublisherError, "metadata differs"):
+                store.update_channel(
+                    "latest-attempt", digest, size, "2026-07-05T00:00:00Z"
+                )
+            os.chmod(channel_path, 0o666)
+            with self.assertRaisesRegex(publisher.PublisherError, "regular 644"):
+                store.verify_channel("latest-attempt")
+            os.chmod(channel_path, 0o644)
+            dataset_dir = store.datasets / digest
+            os.chmod(dataset_dir, 0o755)
+            with self.assertRaisesRegex(publisher.PublisherError, "mode differs"):
+                store.verify_channel("latest-attempt")
+            os.chmod(dataset_dir, 0o555)
+            os.chmod(dataset_dir / "dataset.json", 0o644)
+            with self.assertRaisesRegex(publisher.PublisherError, "mode differs"):
+                store.verify_channel("latest-attempt")
+            os.chmod(dataset_dir / "dataset.json", 0o444)
+            os.chmod(dataset_dir, 0o755)
+            (dataset_dir / "COMPLETE").unlink()
+            os.chmod(dataset_dir, 0o555)
+            with self.assertRaisesRegex(publisher.PublisherError, "incomplete"):
+                store.verify_channel("latest-attempt")
+
+    def test_store_modes_do_not_depend_on_process_umask(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            previous = os.umask(0o077)
+            try:
+                store = publisher.Store(Path(temporary).resolve())
+                dataset = _dataset()
+                digest, size = store.install_dataset(dataset)
+                store.update_channel(
+                    "latest-attempt", digest, size, dataset["generated_at"]
+                )
+                with store.locked():
+                    pass
+            finally:
+                os.umask(previous)
+            self.assertEqual(
+                store.root.stat().st_mode & 0o777,
+                0o750,
+            )
+            self.assertEqual(
+                (store.channels / "latest-attempt.json").stat().st_mode & 0o777,
+                0o644,
+            )
+            self.assertEqual(
+                (store.datasets / digest / "dataset.json").stat().st_mode & 0o777,
+                0o444,
+            )
+            self.assertEqual(
+                (store.locks / "publisher.lock").stat().st_mode & 0o777,
+                0o600,
+            )
+
+    def test_verify_requires_bootstrap_but_dev_latest_is_optional(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            args = types.SimpleNamespace(
+                store_root=str(root / "store"), channel=None, bundle=[]
+            )
+            with self.assertRaises(publisher.PublisherError):
+                publisher.verify_command(args)
+            store = publisher.Store(args.store_root)
+            dataset = publisher._quarantine_dataset(
+                "awaiting-v1-runs", "2026-07-04T00:00:00Z"
+            )
+            digest, size = store.install_dataset(dataset)
+            store.update_channel(
+                "latest-attempt", digest, size, "2026-07-04T00:00:00Z"
+            )
+            result = publisher.verify_command(args)
+            self.assertEqual(set(result["channels"]), {"latest-attempt"})
+            explicit = types.SimpleNamespace(
+                store_root=args.store_root, channel=["dev-latest"], bundle=[]
+            )
+            with self.assertRaises(publisher.PublisherError):
+                publisher.verify_command(explicit)
+            dev_pointer = copy.deepcopy(store.verify_channel("latest-attempt"))
+            dev_pointer["channel"] = "dev-latest"
+            (store.channels / "dev-latest.json").write_bytes(
+                contracts.canonical_json_bytes(dev_pointer)
+            )
+            with self.assertRaisesRegex(publisher.PublisherError, "non-promoted"):
+                publisher.verify_command(args)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/experimental/CollectiveX/tests/test_sampling_contract.py b/experimental/CollectiveX/tests/test_sampling_contract.py
new file mode 100644
index 0000000000..fa4b4005ea
--- /dev/null
+++ b/experimental/CollectiveX/tests/test_sampling_contract.py
@@ -0,0 +1,2287 @@
+#!/usr/bin/env python3
+"""CPU-only behavioral tests for the CollectiveX v1 execution contract."""
+from __future__ import annotations
+
+import argparse
+import ast
+import copy
+import hashlib
+import io
+import json
+import os
+from pathlib import Path
+import re
+import subprocess
+import sys
+import tempfile
+import types
+import unittest
+from unittest import mock
+
+import numpy as np
+
+HERE = Path(__file__).resolve().parent
+ROOT = HERE.parent
+sys.path[:0] = [str(ROOT), str(HERE)]
+
+import artifact_safety  # noqa: E402
+import capability  # noqa: E402
+import contracts  # noqa: E402
+import eplb  # noqa: E402
+import ep_harness  # noqa: E402
+import identity  # noqa: E402
+import run_ep  # noqa: E402
+import summarize  # noqa: E402
+import sweep_matrix  # noqa: E402
+import workload  # noqa: E402
+
+
+class SamplingContractTest(unittest.TestCase):
+    def test_identity_and_fixed_sampling_profile(self) -> None:
+        identity.verify_test_vector()
+        self.assertTrue(identity.is_typed_id(identity.IDENTITY_TEST_VECTOR["series_id"], "series"))
+        self.assertEqual(ep_harness.SAMPLING_CONTRACT, "fixed-512-v1")
+        self.assertEqual(
+            (
+                ep_harness.TIMED_ITERS_PER_TRIAL,
+                ep_harness.TRIALS_PER_POINT,
+                ep_harness.TIMED_SAMPLES_PER_POINT,
+                ep_harness.WARMUP_ITERS_PER_TRIAL,
+            ),
+            (8, 64, 512, 32),
+        )
+        self.assertEqual(identity.V1_CASE_PROFILE["activation_profile"], "canonical-counter-source-v3")
+        self.assertEqual(
+            identity.V1_CASE_PROFILE["activation_generator"],
+            "collectivex-activation-counter-v3",
+        )
+        self.assertEqual(identity.V1_CASE_PROFILE["sampling_contract"], "fixed-512-v1")
+        self.assertEqual(identity.V1_CASE_PROFILE["percentile_method"], "nearest-rank")
+        self.assertEqual(
+            identity.V1_CASE_PROFILE["rank_reduction"],
+            "cross-rank-max-per-iteration",
+        )
+        self.assertEqual(
+            identity.V1_CASE_PROFILE["oracle_contract"],
+            "expert-specific-transform-v1",
+        )
+        parser = argparse.ArgumentParser()
+        ep_harness.add_common_args(parser)
+        args = parser.parse_args(
+            ["--runner", "test", "--topology-class", "test", "--out", "result.json"]
+        )
+        self.assertEqual((args.iters, args.trials, args.warmup), (8, 64, 32))
+        for profile in ((8, 64, 32), (128, 4, 32), (8, 1, 4), (0, 64, 32)):
+            with self.subTest(profile=profile):
+                self.assertEqual(
+                    ep_harness.sampling_contract_error(*profile) is None,
+                    profile == (8, 64, 32),
+                )
+
+    def test_nearest_rank_percentiles_use_all_512_samples(self) -> None:
+        samples = list(range(1, 513))
+        self.assertEqual(ep_harness.percentile(samples, 50), 256)
+        self.assertEqual(ep_harness.percentile(samples, 99), 507)
+
+    def test_terminal_summary_uses_bound_sku_and_route(self) -> None:
+        terminal = {
+            "format": contracts.TERMINAL_FORMAT,
+            "case": {
+                "backend": "deepep", "phase": "prefill", "ep": 8,
+                "suite": "ep-routing-v1", "routing": "zipf", "eplb": True,
+                "required_publication": "comparable-experimental",
+            },
+            "identity": {"case_factors": {"sku": "h100-dgxc"}},
+        }
+        self.assertEqual(
+            summarize._identity(terminal),
+            (
+                "h100-dgxc", "ep-routing-v1", "zipf", "prefill", True,
+                "comparable-experimental", 8,
+            ),
+        )
+
+    def test_matrix_cases_and_shards_are_identity_bound(self) -> None:
+        matrix = sweep_matrix.validate_matrix_document(
+            sweep_matrix.resolve_matrix(backends="all")
+        )
+        requested = {item["case"]["case_id"]: item for item in matrix["requested_cases"]}
+        assigned = [case_id for shard in matrix["include"] for case_id in shard["case_ids"]]
+        runnable = {
+            case_id for case_id, item in requested.items()
+            if item["disposition"] == "runnable"
+        }
+        self.assertEqual(
+            (
+                len(matrix["include"]),
+                len(matrix["requested_cases"]),
+                sum(
+                    len(item["case"]["ladder"].split())
+                    for item in matrix["requested_cases"]
+                ),
+            ),
+            (38, 360, 840),
+        )
+        routing_points = {
+            phase: {
+                int(point)
+                for item in matrix["requested_cases"]
+                if item["case"]["suite"] == "ep-routing-v1"
+                and item["case"]["phase"] == phase
+                for point in item["case"]["ladder"].split()
+            }
+            for phase in ("decode", "prefill")
+        }
+        self.assertEqual(routing_points, {"decode": {128}, "prefill": {512}})
+        skus = sorted({shard["sku"] for shard in matrix["include"]})
+        self.assertEqual(
+            [shard["sku"] for shard in matrix["include"][:len(skus)]],
+            skus,
+        )
+        self.assertEqual(set(assigned), runnable)
+        self.assertEqual(len(assigned), len(set(assigned)))
+        excluded = {
+            "uccl": {"b200-dgxc", "b300"},
+        }
+        for backend, skus in excluded.items():
+            for sku in skus:
+                with self.subTest(backend=backend, sku=sku):
+                    self.assertFalse(capability.resolve(sku, backend)[0])
+        for case_id, item in requested.items():
+            case = {key: value for key, value in item["case"].items() if key != "case_id"}
+            self.assertEqual(
+                case_id,
+                identity.case_id(sku=item["sku"], profile=identity.V1_CASE_PROFILE, case=case),
+            )
+            self.assertEqual(case["timing"], "8:64:32")
+            self.assertEqual(case["samples_per_point"], 512)
+
+        bad_matrix = copy.deepcopy(matrix)
+        bad_matrix["schema_version"] = True
+        with self.assertRaises(sweep_matrix.MatrixError):
+            sweep_matrix.validate_matrix_document(bad_matrix)
+
+        bad_catalog = copy.deepcopy(matrix)
+        wrapper = next(
+            item for item in bad_catalog["requested_cases"]
+            if item["disposition"] == "runnable"
+        )
+        old_id = wrapper["case"]["case_id"]
+        wrapper["case"]["hidden"] = 1
+        factors = {key: value for key, value in wrapper["case"].items() if key != "case_id"}
+        new_id = identity.case_id(
+            sku=wrapper["sku"], profile=identity.V1_CASE_PROFILE, case=factors
+        )
+        wrapper["case"]["case_id"] = new_id
+        for shard in bad_catalog["include"]:
+            shard["case_ids"] = [new_id if value == old_id else value for value in shard["case_ids"]]
+        with self.assertRaisesRegex(sweep_matrix.MatrixError, "frozen v1"):
+            sweep_matrix.validate_matrix_document(bad_catalog)
+
+        shard_meta = matrix["include"][0]
+        requested_cases = {item["case"]["case_id"]: item["case"] for item in matrix["requested_cases"]}
+        shard = {
+            "schema_version": True,
+            "id": shard_meta["id"],
+            "sku": shard_meta["sku"],
+            "backend": shard_meta["backend"],
+            "nodes": shard_meta["nodes"],
+            "n": shard_meta["n"],
+            "cases": [requested_cases[value] for value in shard_meta["case_ids"]],
+        }
+        with self.assertRaises(sweep_matrix.MatrixError):
+            sweep_matrix.validate_shard_control(
+                shard, sku=shard_meta["sku"], backend=shard_meta["backend"],
+                nodes=shard_meta["nodes"],
+            )
+
+    def test_matrix_yaml_and_config_validation_are_strict(self) -> None:
+        suites = sweep_matrix._load("suites.yaml")
+        workloads = sweep_matrix._load("workloads.yaml")
+        invalid = (
+            ("unknown top", lambda s, _w: s.update({"typo": True})),
+            (
+                "unknown suite field",
+                lambda s, _w: s["suites"]["ep-core-v1"].update({"modes": ["normal"]}),
+            ),
+            (
+                "unknown workload field",
+                lambda _s, w: w["model_derived"]["deepseek-v3-v1"].update({"unused": 1}),
+            ),
+            (
+                "string phases",
+                lambda s, _w: s["suites"]["ep-core-v1"].update({"phases": "decode"}),
+            ),
+            (
+                "unknown routing",
+                lambda s, _w: s["suites"]["ep-core-v1"].update({"routings": ["random"]}),
+            ),
+            (
+                "integer EPLB",
+                lambda s, _w: s["suites"]["ep-routing-v1"].update({"eplb": [0, 1]}),
+            ),
+            (
+                "duplicate platform",
+                lambda s, _w: s["suites"]["ep-core-v1"]["platforms"].append("h100-dgxc"),
+            ),
+            ("missing top field", lambda s, _w: s.pop("schema_version")),
+            (
+                "string dimension",
+                lambda _s, w: w["model_derived"]["deepseek-v3-v1"].update({"hidden": "7168"}),
+            ),
+            (
+                "unreachable phase ladder",
+                lambda s, _w: s["suites"]["ep-routing-v1"].update({"phases": ["prefill"]}),
+            ),
+        )
+        for label, mutate in invalid:
+            with self.subTest(label=label), self.assertRaises(SystemExit):
+                bad_suites, bad_workloads = copy.deepcopy(suites), copy.deepcopy(workloads)
+                mutate(bad_suites, bad_workloads)
+                sweep_matrix.validate_config_documents(bad_suites, bad_workloads)
+
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            (root / "configs").mkdir()
+            (root / "configs" / "duplicate.yaml").write_text(
+                "schema_version: 1\nsuites:\n  same: 1\n  same: 2\n"
+            )
+            with mock.patch.object(sweep_matrix, "HERE", root), self.assertRaisesRegex(
+                SystemExit, "duplicate YAML key"
+            ):
+                sweep_matrix._load("duplicate.yaml")
+
+    def test_semantically_duplicate_suite_points_are_rejected(self) -> None:
+        suites = sweep_matrix._load("suites.yaml")
+        workloads = sweep_matrix._load("workloads.yaml")
+        suites["suites"]["ep-core-copy-v1"] = copy.deepcopy(
+            suites["suites"]["ep-core-v1"]
+        )
+
+        def load(name: str) -> dict[str, object]:
+            return workloads if name == "workloads.yaml" else suites
+
+        with mock.patch.object(sweep_matrix, "_load", side_effect=load), self.assertRaisesRegex(
+            SystemExit, "duplicate semantic point"
+        ):
+            sweep_matrix.resolve_matrix()
+
+    def test_only_three_shared_launchers_are_registered(self) -> None:
+        expected = {
+            "launch_single-slurm.sh",
+            "launch_gb-nv.sh",
+            "launch_mi-amds.sh",
+        }
+        self.assertEqual({path.name for path in (ROOT / "launchers").glob("launch_*.sh")}, expected)
+        self.assertEqual(
+            {platform["launcher"] for platform in capability.PLATFORMS.values()},
+            {"single-slurm", "gb-nv", "mi-amds"},
+        )
+        for platform in capability.PLATFORMS.values():
+            launcher = ROOT / "launchers" / f"launch_{platform['launcher']}.sh"
+            self.assertTrue(launcher.is_file())
+            source = launcher.read_text()
+            self.assertNotIn("RUNNER_NAME", source)
+            self.assertIn("cx_preflight_allocation", source)
+            lock_environment = 'cx_lock_canonical_gha_env "$RUNNER"'
+            self.assertIn(lock_environment, source)
+            self.assertLess(
+                source.index("cx_load_operator_config"),
+                source.index(lock_environment),
+            )
+            validate = 'cx_validate_shard_control "$CX_DIR"'
+            stage = 'MOUNT_SRC="$(cx_stage_repo '
+            self.assertIn(validate, source)
+            self.assertLess(source.index(validate), source.index(stage))
+            self.assertLess(source.index(validate), source.index("cx_require_vars"))
+
+        common = (ROOT / "runtime" / "common.sh").read_text()
+        workflow = (ROOT.parent.parent / ".github" / "workflows" / "collectivex-sweep.yml").read_text()
+        self.assertNotIn("RUNNER_NAME", common)
+        self.assertNotIn("RUNNER_NAME:", workflow)
+        self.assertNotIn("flashinfer", capability.BACKENDS)
+        self.assertFalse((HERE / "ep_flashinfer.py").exists())
+
+    def test_image_pinned_deepep_and_input_integrity_order_are_explicit(self) -> None:
+        runtime = (ROOT / "runtime" / "run_in_container.sh").read_text()
+        probe = runtime[runtime.index("cx_probe_deepep()"):
+                        runtime.index("cx_activate_deepep_v2()")]
+        self.assertIn('expected_version="1.2.1"', probe)
+        self.assertIn('expected_version="1.1.0+814e508"', probe)
+        self.assertNotIn("pip install", probe)
+        self.assertNotIn("cx_fetch_revision", probe)
+        self.assertIn("Path(deep_ep.__file__).resolve() in recorded_files", probe)
+        self.assertIn("Path(buffer_module.__file__).resolve() in recorded_files", probe)
+
+        harness = (HERE / "ep_harness.py").read_text()
+        pass_one = harness[harness.index("# ---- Pass 1"):
+                           harness.index("# ---- Pass 2")]
+        self.assertLess(
+            pass_one.index("input_snapshots[T] ="),
+            pass_one.index("oracle = _run_expert_oracle"),
+        )
+        self.assertIn("pre_input_unchanged", pass_one)
+        self.assertIn("hh = prep()\n                    torch.cuda.synchronize()", harness)
+
+    def test_squash_imports_are_reproducible_and_use_a_fresh_cache_key(self) -> None:
+        common = (ROOT / "runtime" / "common.sh").read_text()
+        amd = (ROOT / "launchers" / "launch_mi-amds.sh").read_text()
+        self.assertIn('CX_SQUASH_FORMAT_VERSION="repro-v1"', common)
+        self.assertIn("SOURCE_DATE_EPOCH=\"$CX_SQUASH_SOURCE_DATE_EPOCH\"", common)
+        self.assertIn("${COLLECTIVEX_IMAGE_DIGEST#sha256:}", common)
+        self.assertIn("cx_ensure_squash_on_job", amd)
+        self.assertIn('"${CX_LOCK_DIR:-}"', amd)
+        self.assertNotIn('"${CX_LOCK_DIR:-/tmp}"', amd)
+        self.assertIn('[ -n "$lock_dir" ] || lock_dir="$squash_dir/.locks"', common)
+        self.assertGreaterEqual(common.count("--chdir=/tmp"), 2)
+        self.assertGreaterEqual(amd.count("--chdir=/tmp"), 2)
+        self.assertIn('ENROOT_CACHE_PATH="$compute_home/enroot-cache"', common)
+        self.assertIn('ENROOT_RUNTIME_PATH="$compute_home/enroot-run"', common)
+        self.assertEqual(common.count('cx_reverify_registry_image "$image"'), 2)
+        result = subprocess.run(
+            [
+                "bash",
+                "-c",
+                f'source "{ROOT / "runtime" / "common.sh"}"; '
+                'COLLECTIVEX_IMAGE_DIGEST="sha256:$(printf b%.0s {1..64})"; '
+                'CX_IMAGE_PLATFORM=linux/amd64; cx_squash_path /cache repo/image:tag; '
+                'printf "\\n"; CX_IMAGE_PLATFORM=linux/arm64; '
+                'cx_squash_path /cache repo/image:tag',
+            ],
+            text=True,
+            capture_output=True,
+        )
+        self.assertEqual(result.returncode, 0, result.stderr)
+        digest = "b" * 64
+        self.assertEqual(
+            result.stdout.splitlines(),
+            [
+                f"/cache/repro-v1_{digest}_repo_image_tag.sqsh",
+                f"/cache/repro-v1_linux_arm64_{digest}_repo_image_tag.sqsh",
+            ],
+        )
+
+    def test_launchers_preserve_platform_specific_runtime_requirements(self) -> None:
+        single = (ROOT / "launchers" / "launch_single-slurm.sh").read_text()
+        gb = (ROOT / "launchers" / "launch_gb-nv.sh").read_text()
+        amd = (ROOT / "launchers" / "launch_mi-amds.sh").read_text()
+        common = (ROOT / "runtime" / "common.sh").read_text()
+        self.assertIn("ALLOC_EXTRA=(--mem=0)", single)
+        self.assertIn("ALLOC_EXTRA=(-N 1 --mem=0)", single)
+        self.assertIn("SRUN_EXTRA=(--mpi=none --container-remap-root)", single)
+        self.assertIn("CX_ENROOT_LOCAL_IMPORT=1", single)
+        self.assertIn('PRODUCT="${CX_SHARD_SKU:-${CX_GB_PRODUCT:-', gb)
+        self.assertIn("cx_ensure_squash_on_job", gb)
+        self.assertIn("--mem=0 --cpus-per-task=72", gb)
+        self.assertIn("--mem=0 --cpus-per-task=35", gb)
+        self.assertIn("--container-writable", gb)
+        self.assertIn("--container-remap-root", gb)
+        workload_stage = gb[
+            gb.index("workload_args=("):gb.index("workload_log=", gb.index("workload_args=("))
+        ]
+        self.assertNotIn("--workload", workload_stage)
+        self.assertIn("mi325x) CPUS_PER_TASK=256", amd)
+        self.assertIn("/dev/kfd:/dev/kfd,/dev/dri:/dev/dri", amd)
+        collect = common[common.index("cx_collect_results()"):
+                         common.index("cx_cleanup_stage()")]
+        cleanup = common[common.index("cx_launcher_cleanup()"):
+                         common.index("cx_install_launcher_fail_safe()")]
+        self.assertNotIn("cx_cleanup_stage", collect)
+        self.assertLess(cleanup.index("cx_cancel_job"), cleanup.index("cx_cleanup_stage"))
+        runtime = (ROOT / "runtime" / "run_in_container.sh").read_text()
+        self.assertIn('distribution.read_text("direct_url.json")', runtime)
+        self.assertIn("6548e9c504a12b2471af4b7f4d9546321210a57a456b5dc55bd4a8dad0f932ac", runtime)
+        self.assertIn("2671cff7baf8c2c214ff4bac721af875d513130670bec57601998bd1aae82882", runtime)
+
+    def test_deferred_backend_provenance_resolves_before_measurement(self) -> None:
+        harness = (ROOT / "tests" / "ep_harness.py").read_text()
+        conditioning = harness.index("for wt in conditioning_ladder")
+        provenance = harness.index("# Setup may materialize deferred provenance")
+        measurement = harness.index("# ---- Pass 1: build each deterministic problem")
+        self.assertLess(conditioning, provenance)
+        self.assertLess(provenance, measurement)
+
+    def test_backend_specific_routing_contracts_are_explicit(self) -> None:
+        hybrid = (ROOT / "tests" / "ep_deepep_hybrid.py").read_text()
+        self.assertIn("self.domain_rank = int(self.buffer.local_rank)", hybrid)
+        self.assertIn(
+            "probability_columns = self.domain_rank * self.local_experts + local_expert_ids",
+            hybrid,
+        )
+        self.assertIn("h.recv_probs[:count][rows, probability_columns]", hybrid)
+
+        mori = (ROOT / "tests" / "ep_mori.py").read_text()
+        self.assertIn("topk_idx=indices", mori)
+        self.assertIn("indices=indices", mori)
+        self.assertIn(
+            "combine_indices = p.indices if self._async_ll else h.dispatch_indices",
+            mori,
+        )
+        self.assertIn("h.combine_input,\n            None,\n            combine_indices", mori)
+        self.assertIn('"use_external_inp_buf": self._async_ll', mori)
+        self.assertIn("self.block_num = self._block_target = 64", mori)
+        self.assertIn('config_kwargs["block_num"] = self.block_num', mori)
+        self.assertIn(
+            'config_kwargs["warp_num_per_block"] = self.dispatch_warps', mori
+        )
+        self.assertIn("count > tensor.size(0)", mori)
+        self.assertIn("return combined[:p.T]", mori)
+        self.assertNotIn("return combined\n", mori)
+        self.assertIn(
+            "raw_expert_ids < local_start + experts_per_rank",
+            mori,
+        )
+        self.assertNotIn("MoRI returned a non-local expert", mori)
+        harness = (ROOT / "tests" / "ep_harness.py").read_text()
+        self.assertIn("problem.recv_tokens = backend.recv_tokens(handle)", harness)
+
+    def test_mori_masks_global_topk_metadata_to_the_local_rank(self) -> None:
+        path = HERE / "ep_mori.py"
+        tree = ast.parse(path.read_text(), str(path))
+        helper = next(
+            node
+            for node in tree.body
+            if isinstance(node, ast.FunctionDef) and node.name == "_project_local_metadata"
+        )
+        namespace: dict[str, object] = {}
+        exec(compile(ast.Module(body=[helper], type_ignores=[]), str(path), "exec"), namespace)
+        raw_ids = np.array([[0, 32, 63, -1], [64, 95, 7, 96]], dtype=np.int64)
+        raw_weights = np.arange(8, dtype=np.float32).reshape(2, 4)
+        torch_module = types.SimpleNamespace(
+            where=np.where,
+            full_like=np.full_like,
+            zeros_like=np.zeros_like,
+        )
+        ids, weights, local_ids = namespace["_project_local_metadata"](
+            torch_module, raw_ids, raw_weights, 1, 32
+        )
+        np.testing.assert_array_equal(
+            ids,
+            np.array([[-1, 32, 63, -1], [-1, -1, -1, -1]], dtype=np.int64),
+        )
+        np.testing.assert_array_equal(
+            weights,
+            np.array([[0, 1, 2, 0], [0, 0, 0, 0]], dtype=np.float32),
+        )
+        counts = np.bincount(local_ids, minlength=32)
+        self.assertEqual((counts[0], counts[31], int(counts.sum())), (1, 1, 2))
+        commit_helper = next(
+            node for node in tree.body
+            if isinstance(node, ast.FunctionDef) and node.name == "_mori_source_commit"
+        )
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            module = root / "python" / "mori" / "__init__.py"
+            module.parent.mkdir(parents=True)
+            module.touch()
+            git = root / ".git"
+            git.mkdir()
+            (git / "HEAD").write_text("a" * 40 + "\n")
+            commit_namespace = {
+                "Path": Path,
+                "re": re,
+                "mori": types.SimpleNamespace(__file__=str(module)),
+            }
+            exec(
+                compile(ast.Module(body=[commit_helper], type_ignores=[]), str(path), "exec"),
+                commit_namespace,
+            )
+            self.assertEqual(commit_namespace["_mori_source_commit"](), "a" * 40)
+            (git / "HEAD").write_text("ref: refs/heads/main\n")
+            with self.assertRaisesRegex(RuntimeError, "detached commit"):
+                commit_namespace["_mori_source_commit"]()
+
+        profile = contracts.project_resource_profile(
+            {
+                "block_num": 64,
+                "device_cus": 304,
+                "kernel_type": "AsyncLL",
+                "tuned_source": "upstream-asyncll-64x8-external-input",
+            }
+        )
+        self.assertIsNone(profile["comm_units_kind"])
+        self.assertIsNone(profile["configured_units"])
+
+    def test_squash_identity_rehashes_instead_of_trusting_a_sidecar(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            image = Path(temporary) / "image.sqsh"
+            image.write_bytes(b"current squash bytes")
+            sidecar = Path(f"{image}.sha256")
+            sidecar.write_text("a" * 64)
+            os.utime(sidecar, (image.stat().st_mtime + 10, image.stat().st_mtime + 10))
+            result = subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; COLLECTIVEX_EXECUTION_ID="squash-hash-$$"; '
+                    'cx_export_squash_identity "$2"; cx_cleanup_private_logs 0; '
+                    'printf "%s" "$COLLECTIVEX_SQUASH_SHA256"',
+                    "_", str(ROOT / "runtime" / "common.sh"), str(image),
+                ],
+                text=True,
+                capture_output=True,
+            )
+            self.assertEqual(result.returncode, 0, result.stderr)
+            self.assertEqual(result.stdout, hashlib.sha256(image.read_bytes()).hexdigest())
+
+    def test_salloc_job_id_parser_uses_the_portable_grant_message(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            directory = Path(temporary)
+            arguments = directory / "arguments"
+            salloc = directory / "salloc"
+            salloc.write_text(
+                "#!/usr/bin/env bash\n"
+                "printf '%s\\n' \"$@\" > \"$CX_TEST_SALLOC_ARGUMENTS\"\n"
+                "printf 'salloc: Granted job allocation 4242\\n' >&2\n"
+            )
+            salloc.chmod(0o700)
+            result = subprocess.run(
+                [
+                    "bash",
+                    "-c",
+                    f'source "{ROOT / "runtime" / "common.sh"}"; '
+                    'COLLECTIVEX_EXECUTION_ID="scheduler-parser-$$"; '
+                    'JOB_ID=""; cx_salloc_jobid --partition=compute; '
+                    'cx_cleanup_private_logs 0; printf "%s:%s" "$JOB_ID" "$CX_ALLOCATION_REQUESTED"',
+                ],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "PATH": f"{directory}:{os.environ['PATH']}",
+                    "CX_TEST_SALLOC_ARGUMENTS": str(arguments),
+                },
+            )
+            self.assertEqual(result.returncode, 0, result.stderr)
+            self.assertEqual(result.stdout, "4242:1")
+            self.assertEqual(
+                arguments.read_text().splitlines(),
+                ["--partition=compute", "--no-shell"],
+            )
+
+    def test_allocation_cleanup_fails_closed_when_scheduler_queries_fail(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            directory = Path(temporary)
+            for name, body in {
+                "scancel": "exit 0",
+                "squeue": "exit 2",
+                "sleep": "exit 0",
+            }.items():
+                command = directory / name
+                command.write_text(f"#!/usr/bin/env bash\n{body}\n")
+                command.chmod(0o700)
+            result = subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; cx_cancel_job 4242',
+                    "_", str(ROOT / "runtime" / "common.sh"),
+                ],
+                text=True,
+                capture_output=True,
+                env={**os.environ, "PATH": f"{directory}:{os.environ['PATH']}"},
+            )
+            self.assertNotEqual(result.returncode, 0)
+            self.assertIn("did not terminate", result.stderr)
+
+        workflow = (ROOT.parent.parent / ".github" / "workflows" / "collectivex-sweep.yml").read_text()
+        self.assertIn("cleanup-unsafe", workflow)
+        self.assertIn("cleanup-safe", workflow)
+        self.assertIn("Confirm allocation cleanup", workflow)
+        self.assertIn("Prepare pinned backend source archive", workflow)
+        self.assertIn("Install pinned backend source seed", workflow)
+        self.assertIn("CX_BACKEND_SOURCE_SEED_ROOT", workflow)
+        self.assertIn("steps.gen.outputs.source_backends", workflow)
+        self.assertIn("with tarfile.open", workflow)
+        artifact_validation = workflow[workflow.index("- name: Validate shard artifact safety"):]
+        self.assertIn("steps.allocation_cleanup.outcome == 'success'", artifact_validation)
+        sweep_workflow = workflow[workflow.index("  sweep:"):]
+        self.assertNotIn("GITHUB_WORKSPACE", sweep_workflow)
+        self.assertNotIn("RUNNER_WORKSPACE", sweep_workflow)
+        self.assertIn('CX_SOURCE_ROOT: /tmp/inferencex-collectivex-', sweep_workflow)
+        source_step = sweep_workflow[:sweep_workflow.index("- uses: actions/download-artifact")]
+        self.assertNotIn("unsafe_guards=", source_step)
+        self.assertIn("cutoff = time.time() - 86400", source_step)
+        self.assertIn("stat.S_IMODE(metadata.st_mode) != 0o700", source_step)
+        self.assertIn('for marker_name in ("cleanup-safe", "cleanup-unsafe")', source_step)
+        self.assertIn("stat.S_IMODE(marker.st_mode) == 0o600", source_step)
+        self.assertIn("shutil.rmtree(entry.path)", source_step)
+        self.assertLess(
+            source_step.index('rev-parse HEAD'),
+            source_step.index("echo 'prepared=true'"),
+        )
+        upload = workflow[workflow.index("- name: Stage shard artifact"):]
+        self.assertIn("id: stage_artifact", upload)
+        self.assertIn("id: upload_artifact", upload)
+        self.assertIn("steps.stage_artifact.outcome == 'success'", upload)
+        cleanup = workflow[workflow.index("- name: Cleanup isolated workspace"):]
+        for step in (
+            "sweep_shard", "allocation_cleanup", "artifact_safety",
+            "delivery_contracts", "stage_artifact", "upload_artifact",
+        ):
+            self.assertIn(f"steps.{step}.outcome", cleanup)
+        self.assertLess(
+            cleanup.index('cleanup-safe" ]'),
+            cleanup.index('rm -rf -- "$CX_JOB_ROOT"'),
+        )
+
+    def test_runtime_identity_and_realized_placement_are_behavioral(self) -> None:
+        self.assertFalse(capability.runtime_identity_issues(
+            "mi325x", vendor="amd", arch="gfx942", machine="amd64",
+            device_name="AMD Instinct MI325X", device_count=8, world_size=8,
+        ))
+        self.assertTrue(capability.runtime_identity_issues(
+            "mi355x", vendor="amd", arch="gfx942", machine="amd64",
+            device_name="AMD Instinct MI325X", device_count=8, world_size=8,
+        ))
+        records = [("private-a", rank) for rank in range(4)] + [
+            ("private-b", rank) for rank in range(4)
+        ]
+        self.assertEqual(
+            run_ep._summarize_realized_placement(
+                records, expected_nodes=2, expected_gpus_per_node=4, expected_world_size=8
+            ),
+            {
+                "gpus_per_node": 4,
+                "nodes": 2,
+                "ranks_per_node": 4,
+                "unique_local_ranks": True,
+                "valid": True,
+            },
+        )
+        with self.assertRaises(ValueError):
+            run_ep._summarize_realized_placement(
+                records[:-1] + [("private-b", 2)],
+                expected_nodes=2,
+                expected_gpus_per_node=4,
+                expected_world_size=8,
+            )
+
+    def test_collective_version_and_rccl_fingerprint_are_normalized(self) -> None:
+        self.assertEqual(ep_harness.format_collective_version(23004), "2.30.4")
+        self.assertEqual(ep_harness.format_collective_version(21805), "2.18.5")
+        self.assertEqual(ep_harness.format_collective_version((2, 21, 5)), "2.21.5")
+
+        properties = types.SimpleNamespace(
+            multi_processor_count=304, total_memory=1024, warp_size=64
+        )
+        fake = types.SimpleNamespace(
+            __version__="2.9.0",
+            version=types.SimpleNamespace(cuda=None, hip="7.2"),
+            cuda=types.SimpleNamespace(
+                get_device_properties=lambda _device: properties,
+                get_device_name=lambda _device: "AMD Instinct MI325X",
+                nccl=types.SimpleNamespace(version=lambda: 21805),
+            ),
+        )
+        with mock.patch.object(
+            run_ep, "_loaded_collective_version", return_value="2.18.5"
+        ):
+            fingerprint = run_ep._runtime_fingerprint(
+                fake, "device", machine="amd64", vendor="amd", arch="gfx942"
+            )
+        self.assertEqual(fingerprint["collective_library"], {"kind": "rccl", "version": "2.18.5"})
+        self.assertEqual(fingerprint["accelerator_runtime"], {"kind": "hip", "version": "7.2"})
+
+        class FakeCollective:
+            @staticmethod
+            def ncclGetVersion(pointer) -> int:
+                pointer._obj.value = 23004
+                return 0
+
+        maps = "0-1 r-xp 0 00:00 0 /runtime/libnccl.so.2\n"
+        with (
+            mock.patch("builtins.open", return_value=io.StringIO(maps)),
+            mock.patch.object(run_ep.os.path, "isfile", return_value=True),
+            mock.patch.object(
+                run_ep.os.path, "realpath", return_value="/runtime/libnccl.so.2"
+            ),
+            mock.patch.object(run_ep.ctypes, "CDLL", return_value=FakeCollective()),
+        ):
+            self.assertEqual(run_ep._loaded_collective_version(), "2.30.4")
+
+        path = HERE / "ep_nccl.py"
+        tree = ast.parse(path.read_text(), str(path))
+        helper = next(
+            node for node in tree.body
+            if isinstance(node, ast.FunctionDef) and node.name == "_runtime_collective"
+        )
+        namespace = {"re": re}
+        exec(compile(ast.Module(body=[helper], type_ignores=[]), str(path), "exec"), namespace)
+        args = types.SimpleNamespace(
+            runtime_fingerprint={
+                "collective_library": {"kind": "nccl", "version": "2.30.4"}
+            }
+        )
+        cuda = types.SimpleNamespace(version=types.SimpleNamespace(hip=None))
+        self.assertEqual(namespace["_runtime_collective"](args, cuda), ("nccl", "2.30.4"))
+        args.runtime_fingerprint["collective_library"]["version"] = None
+        with self.assertRaisesRegex(RuntimeError, "runtime identity is unavailable"):
+            namespace["_runtime_collective"](args, cuda)
+        self.assertNotIn("torch.cuda.nccl.version", path.read_text())
+
+    def test_workloads_bind_generator_activation_and_trace(self) -> None:
+        args = ("uniform", 7168, 8, 256, 8, 64, 67)
+        first = workload.compute_workload_id(*args)
+        self.assertTrue(identity.is_typed_id(first, "workload"))
+        self.assertEqual(first, workload.compute_workload_id(*args))
+        self.assertNotEqual(first, workload.compute_workload_id(*args[:-1], 68))
+        self.assertNotEqual(
+            first,
+            workload.compute_workload_id(*args, trace_checksum="a" * 64),
+        )
+        _, _, manifest = workload.build_workload(8, 2, 4, "uniform", 4, 67, 2)
+        member, checksums, _, _ = workload.canonical_member(
+            "uniform", 8, 2, 4, 2, 2, 67
+        )
+        self.assertEqual(member, manifest["workload_id"])
+        self.assertEqual(checksums, manifest["checksums"])
+
+    def test_canonical_members_are_bound_to_each_scheduled_row(self) -> None:
+        case = {
+            "routing": "uniform", "hidden": 8, "topk": 2, "experts": 4, "ep": 2,
+        }
+        eplb_record = {
+            "enabled": False, "mapping_hash": None, "num_physical_experts": 4,
+        }
+
+        def expected(
+            *, tokens: int = 1, hidden: int = 8
+        ) -> tuple[str, dict[str, str], str]:
+            member, checksums, row_hash, _, _ = contracts._expected_canonical_trace(
+                "uniform", hidden, 2, 4, 4, 2, tokens, 67, False, 2048
+            )
+            return member, checksums, row_hash
+
+        member, checksums, row_hash = expected()
+        rows = [{"tokens_per_rank": 1, "routing": {"hash": row_hash}}]
+        proof = {
+            "manifest_checksums": {member: checksums},
+            "members": [member],
+            "workload_id": identity.workload_id({
+                "members": [{"checksums": checksums, "workload_id": member}]
+            }),
+        }
+        contracts._validate_canonical_workload(proof, case, rows, eplb_record)
+
+        def replace_member(document: dict, replacement: tuple[str, dict[str, str], str]) -> None:
+            replacement_id, replacement_checksums, _ = replacement
+            document["members"] = [replacement_id]
+            document["manifest_checksums"] = {replacement_id: replacement_checksums}
+            document["workload_id"] = identity.workload_id({
+                "members": [{
+                    "checksums": replacement_checksums,
+                    "workload_id": replacement_id,
+                }]
+            })
+
+        mutations = {
+            "wrong member token": lambda document, mutated_rows: replace_member(
+                document, expected(tokens=2)
+            ),
+            "wrong member dimensions": lambda document, mutated_rows: replace_member(
+                document, expected(hidden=16)
+            ),
+            "wrong member checksum": lambda document, mutated_rows: replace_member(
+                document,
+                (
+                    member,
+                    {**checksums, "topk_idx": "0" * 64},
+                    row_hash,
+                ),
+            ),
+            "row hash unrelated to member": lambda document, mutated_rows: mutated_rows[0][
+                "routing"
+            ].update({"hash": "f" * 64}),
+        }
+        for label, mutate in mutations.items():
+            with self.subTest(label=label), self.assertRaises(contracts.ContractError):
+                bad_proof, bad_rows = copy.deepcopy(proof), copy.deepcopy(rows)
+                mutate(bad_proof, bad_rows)
+                contracts._validate_canonical_workload(
+                    bad_proof, case, bad_rows, eplb_record
+                )
+
+    def test_eplb_row_hash_is_bound_to_the_frozen_remap(self) -> None:
+        case = {"routing": "zipf", "hidden": 8, "topk": 2, "experts": 4, "ep": 2}
+        physical = eplb.physical_count(4, 32, 2)
+        plan = contracts._expected_eplb_plan("zipf", 2, 4, physical, 2, 67, 2048)
+        eplb_record = {
+            "enabled": True,
+            "mapping_hash": eplb.mapping_hash(plan),
+            "num_physical_experts": physical,
+        }
+        member, checksums, row_hash, _, _ = contracts._expected_canonical_trace(
+            "zipf", 8, 2, 4, physical, 2, 1, 67, True, 2048
+        )
+        self.assertNotEqual(row_hash, checksums["trace"])
+        workload_proof = {
+            "manifest_checksums": {member: checksums},
+            "members": [member],
+            "workload_id": identity.workload_id({
+                "members": [{"checksums": checksums, "workload_id": member}]
+            }),
+        }
+        rows = [{"tokens_per_rank": 1, "routing": {"hash": row_hash}}]
+        contracts._validate_canonical_workload(workload_proof, case, rows, eplb_record)
+        with self.assertRaisesRegex(contracts.ContractError, "EPLB mapping"):
+            contracts._validate_canonical_workload(
+                workload_proof, case, rows, {**eplb_record, "mapping_hash": "0" * 64}
+            )
+
+    def test_oracle_pass_cannot_ignore_combined_value_failure(self) -> None:
+        oracle = {
+            "atol": ep_harness.ORACLE_ATOL,
+            "checks": {
+                "combine_values": True,
+                "counts": True,
+                "metadata": True,
+                "multiplicity": True,
+                "payload": True,
+                "source_set": True,
+                "weights": True,
+            },
+            "combine_weight_semantics": "unweighted-rank-sum",
+            "contract": ep_harness.ORACLE_CONTRACT,
+            "dispatch_sha256": "a" * 64,
+            "max_absolute_error": 0.0,
+            "max_elementwise_relative_error": 0.0,
+            "max_relative_error": 0.0,
+            "max_weight_error": 0.0,
+            "order_sha256": "b" * 64,
+            "ordering_contract": "stable-v1",
+            "passed": True,
+            "receive_count": 1,
+            "rtol": ep_harness.ORACLE_RTOL,
+        }
+        contracts._validate_oracle(oracle, "oracle")
+        weighted = copy.deepcopy(oracle)
+        weighted["combine_weight_semantics"] = "native-gate-weighted"
+        with self.assertRaisesRegex(contracts.ContractError, "differs from v1"):
+            contracts._validate_oracle(weighted, "oracle")
+        tampered = copy.deepcopy(oracle)
+        tampered["checks"]["combine_values"] = False
+        with self.assertRaises(contracts.ContractError):
+            contracts._validate_oracle(tampered, "oracle")
+
+    def test_oracle_stability_canonicalizes_native_receive_order(self) -> None:
+        source = (HERE / "ep_harness.py").read_text()
+        canonical = source[source.index("canonical_order = torch.argsort"):
+                           source.index("problem.recv_tokens = receive_count")]
+        self.assertIn("canonical_sources", canonical)
+        self.assertIn("canonical_ids", canonical)
+        self.assertIn("canonical_weights", canonical)
+        self.assertNotIn("_tensor_sha256(source_ids", canonical)
+        mori = (HERE / "ep_mori.py").read_text()
+        self.assertIn(
+            'self.kernel_generation = "async-ll" if self._async_ll else "intranode"',
+            mori,
+        )
+        backend = types.SimpleNamespace(name="mori", kernel_generation="async-ll")
+        self.assertEqual(ep_harness.kernel_generation(backend), "async-ll")
+
+    def test_terminal_fail_safe_fills_only_missing_shard_cases(self) -> None:
+        matrix = sweep_matrix.resolve_matrix(backends="all", max_cases=128)
+        shard = next(item for item in matrix["include"] if item["n"] >= 2)
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            matrix_path = root / "matrix.json"
+            control_path = root / "control.json"
+            out_dir = root / "results"
+            matrix_path.write_text(json.dumps(matrix))
+            control = sweep_matrix.extract_shard(
+                matrix_path, shard["id"], control_path,
+                sku=shard["sku"], backend=shard["backend"], nodes=shard["nodes"],
+            )
+            control["cases"] = control["cases"][:2]
+            control["n"] = 2
+            control_path.write_text(json.dumps(control))
+            first = {key: value for key, value in control["cases"][0].items() if key != "case_id"}
+            git_run = {
+                "artifact": "artifact", "job": "job", "ref": "collectivex",
+                "repo": "SemiAnalysisAI/InferenceX", "run_attempt": "1",
+                "run_id": "123", "source_sha": "a" * 40,
+            }
+            allocation = {
+                "artifact": "artifact", "execution_id": "execution", "job": "job",
+                "repo": "SemiAnalysisAI/InferenceX", "run_attempt": "1", "run_id": "123",
+                "runner": shard["sku"], "source_sha": "a" * 40,
+            }
+            out_dir.mkdir()
+            existing = contracts.make_terminal_document(
+                allocation_factors=allocation, attempt_ordinal=1, case=first,
+                case_factors={"case": first, "profile": identity.V1_CASE_PROFILE, "sku": shard["sku"]},
+                control_sha256=hashlib.sha256(control_path.read_bytes()).hexdigest(),
+                failure_mode="setup", generated_at="2026-07-04T00:00:00Z", git_run=git_run,
+                reason="launcher-setup-failed", return_code=7, source="runtime-emitter",
+                status="failed",
+                expected_case_id=control["cases"][0]["case_id"],
+            )
+            (out_dir / "existing.json").write_text(json.dumps(existing))
+            (out_dir / "partial.json").write_text(json.dumps({
+                "format": contracts.RAW_FORMAT,
+                "identity": {"case_id": control["cases"][1]["case_id"]},
+                "sample_artifact": {"path": "partial.samples.json"},
+            }))
+            (out_dir / "partial.samples.json").write_text("{broken")
+            environment = {
+                **os.environ,
+                "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                "CX_SHARD_FILE": str(control_path),
+                "CX_SHARD_SKU": shard["sku"],
+                "CX_RUNNER": shard["sku"],
+                "CX_BENCH": shard["backend"],
+                "CX_NODES": str(shard["nodes"]),
+                "COLLECTIVEX_EXECUTION_ID": "execution",
+                "COLLECTIVEX_ARTIFACT_NAME": "artifact",
+                "GITHUB_JOB": "job", "GITHUB_REF_NAME": "collectivex",
+                "GITHUB_REPOSITORY": "SemiAnalysisAI/InferenceX",
+                "GITHUB_RUN_ATTEMPT": "1", "GITHUB_RUN_ID": "123",
+                "GITHUB_SHA": "a" * 40,
+            }
+            subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; cx_emit_setup_failures "$2" "$3" "$4" 7',
+                    "_", str(ROOT / "runtime" / "common.sh"), str(ROOT),
+                    str(out_dir), shard["backend"],
+                ],
+                check=True,
+                env=environment,
+            )
+            attempts = [contracts.strict_load(path) for path in out_dir.glob("*.json")]
+            self.assertEqual(len(attempts), 2)
+            self.assertEqual(
+                contracts.validate_attempt_paths([str(path) for path in out_dir.glob("*.json")]),
+                2,
+            )
+            delivery = [str(path) for path in out_dir.glob("*.json")]
+            self.assertEqual(contracts.validate_delivery(delivery, str(control_path)), 2)
+            with self.assertRaises(contracts.ContractError):
+                contracts.validate_delivery(delivery[:1], str(control_path))
+            self.assertEqual(
+                {attempt["identity"]["case_id"] for attempt in attempts},
+                {case["case_id"] for case in control["cases"]},
+            )
+            self.assertTrue((out_dir / "partial.json.quarantine").is_file())
+            self.assertTrue((out_dir / "partial.samples.json.quarantine").is_file())
+
+            preallocation = root / "preallocation"
+            preallocation_results = preallocation / "experimental" / "CollectiveX" / "results"
+            preallocation_results.mkdir(parents=True)
+            failed = subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; REPO_ROOT="$2"; export REPO_ROOT; '
+                    'cx_install_launcher_fail_safe; cx_load_operator_config',
+                    "_", str(ROOT / "runtime" / "common.sh"), str(preallocation),
+                ],
+                env={**environment, "COLLECTIVEX_OPERATOR_CONFIG_REQUIRED": "1"},
+            )
+            self.assertNotEqual(failed.returncode, 0)
+            preallocation_attempts = [
+                contracts.validate_terminal_document(contracts.strict_load(path))
+                for path in preallocation_results.glob("*.json")
+            ]
+            self.assertEqual(
+                {attempt["identity"]["case_id"] for attempt in preallocation_attempts},
+                {case["case_id"] for case in control["cases"]},
+            )
+
+    def test_runtime_identity_mismatch_is_failed_not_unsupported(self) -> None:
+        wrapper = next(
+            item for item in sweep_matrix.resolve_matrix()["requested_cases"]
+            if item["disposition"] == "runnable"
+        )
+        case = wrapper["case"]
+        environment = {
+            "CX_RUNNER": wrapper["sku"], "CX_CASE_ID": case["case_id"],
+            "CX_SUITE": case["suite"], "CX_WORKLOAD_NAME": case["workload"],
+            "CX_REQUIRED_PUBLICATION": case["required_publication"],
+            "CX_ROUTING": case["routing"], "CX_EPLB": "1" if case["eplb"] else "",
+            "CX_EP": str(case["ep"]), "CX_NGPUS": str(case["ep"]),
+            "CX_HIDDEN": str(case["hidden"]), "CX_TOPK": str(case["topk"]),
+            "CX_EXPERTS": str(case["experts"]), "CX_NODES": str(case["nodes"]),
+            "CX_GPUS_PER_NODE": str(case["gpus_per_node"]),
+            "CX_SCALE_UP_DOMAIN": str(case["scale_up_domain"]),
+            "CX_TOKENS_LADDER": case["ladder"], "CX_CANONICAL": "1",
+            "CX_ITERS": "8", "CX_TRIALS": "64", "CX_WARMUP": "32",
+            "CX_SAMPLES_PER_POINT": "512", "GITHUB_RUN_ID": "123",
+            "GITHUB_RUN_ATTEMPT": "1", "GITHUB_REF_NAME": "collectivex",
+            "GITHUB_SHA": "a" * 40, "GITHUB_REPOSITORY": "SemiAnalysisAI/InferenceX",
+            "GITHUB_JOB": "sweep", "COLLECTIVEX_ARTIFACT_NAME": "artifact",
+            "COLLECTIVEX_EXECUTION_ID": "execution",
+        }
+        with mock.patch.dict(os.environ, environment, clear=False):
+            terminal = contracts.make_terminal_from_environment(
+                backend=case["backend"], phase=case["phase"], return_code=5
+            )
+        self.assertEqual(terminal["identity"]["case_id"], case["case_id"])
+        self.assertEqual(
+            terminal["outcome"],
+            {
+                "failure_mode": "runtime-identity",
+                "reason": "runtime-identity-mismatch",
+                "return_code": 5,
+                "status": "failed",
+            },
+        )
+        for mode, reason in contracts.RUNTIME_FAILURE_REASONS.items():
+            with self.subTest(mode=mode), mock.patch.dict(os.environ, environment, clear=False):
+                staged = contracts.make_terminal_from_environment(
+                    backend=case["backend"], phase=case["phase"], return_code=1,
+                    failure_mode=mode,
+                )
+                self.assertEqual(staged["outcome"]["reason"], reason)
+                mismatched = copy.deepcopy(staged)
+                mismatched["outcome"]["reason"] = "distributed-command-failed"
+                if reason == "distributed-command-failed":
+                    mismatched["outcome"]["reason"] = "backend-setup-failed"
+                with self.assertRaisesRegex(
+                    contracts.ContractError, "source and outcome are not registered"
+                ):
+                    contracts.validate_terminal_document(mismatched)
+        with mock.patch.dict(os.environ, environment, clear=False):
+            with self.assertRaisesRegex(
+                contracts.ContractError, "runtime failure mode is not registered"
+            ) as raised:
+                contracts.make_terminal_from_environment(
+                    backend=case["backend"], phase=case["phase"], return_code=1,
+                    failure_mode="raw-private-error",
+                )
+        self.assertNotIn("raw-private-error", str(raised.exception))
+        with mock.patch.dict(os.environ, environment, clear=False):
+            generic = contracts.make_terminal_from_environment(
+                backend=case["backend"], phase=case["phase"], return_code=6,
+            )
+        self.assertEqual(
+            generic["outcome"],
+            {
+                "failure_mode": "execution",
+                "reason": "distributed-command-failed",
+                "return_code": 6,
+                "status": "failed",
+            },
+        )
+
+    def test_launchers_use_private_logs_and_allowlisted_failure_stages(self) -> None:
+        expected = {
+            "launch_single-slurm.sh": {
+                "setup", "registry-verification", "container-import", "container-hash",
+                "repository-stage", "scheduler-allocation", "container-launch",
+                "artifact-collection",
+            },
+            "launch_gb-nv.sh": {
+                "setup", "registry-verification", "container-import", "container-hash",
+                "repository-stage", "scheduler-allocation", "container-launch", "backend-setup",
+                "execution", "artifact-collection",
+            },
+            "launch_mi-amds.sh": {
+                "setup", "repository-stage", "registry-verification", "scheduler-allocation",
+                "container-import", "container-hash", "container-launch", "artifact-collection",
+            },
+        }
+        for name, stages in expected.items():
+            launcher = (ROOT / "launchers" / name).read_text()
+            self.assertNotIn("--export=ALL", launcher)
+            self.assertIn("cx_container_exports", launcher)
+            self.assertIn("collect_rc=0", launcher)
+            for stage in stages:
+                with self.subTest(launcher=name, stage=stage):
+                    self.assertIn(f"cx_set_failure_stage {stage}", launcher)
+        amd = (ROOT / "launchers" / "launch_mi-amds.sh").read_text()
+        self.assertIn("cx_ensure_squash_on_job", amd)
+        self.assertIn("cx_fail_stage container-hash", amd)
+        self.assertNotIn('cat "$import_log"', amd)
+        common = (ROOT / "runtime" / "common.sh").read_text()
+        self.assertIn('bash -s -- "$sq" "$lock" "$image"', common)
+        self.assertIn("> \"$log\" 2>&1 <<'BASH'", common)
+        self.assertIn("cx_fail_stage container-import", common)
+        runtime = (ROOT / "runtime" / "run_in_container.sh").read_text()
+        export_start = common.index("\ncx_container_exports() {")
+        exports = common[export_start:common.index("\n}", export_start)]
+        export_names = {
+            name
+            for payload in re.findall(r"printf '%s' '([^']*)'", exports)
+            for name in payload.split(",") if name
+        }
+        for private_name in (
+            "COLLECTIVEX_OPERATOR_CONFIG", "GITHUB_TOKEN", "GITHUB_WORKSPACE", "HOME",
+            "CX_PARTITION", "CX_ACCOUNT", "CX_SQUASH_DIR", "CX_STAGE_DIR",
+        ):
+            self.assertNotIn(private_name, export_names)
+        self.assertIn("CX_BACKEND_CACHE_ROOT", export_names)
+        self.assertIn("CX_BACKEND_CACHE_SENTINEL_SHA256", export_names)
+        self.assertNotIn("CX_PREPARED_BACKEND_CACHE", export_names)
+        self.assertIn("MORI_COMMIT", export_names)
+        self.assertIn("cx_write_runtime_stage backend-setup", runtime)
+        self.assertIn("cx_write_runtime_stage execution", runtime)
+        gb = (ROOT / "launchers" / "launch_gb-nv.sh").read_text()
+        self.assertIn("cx_private_log_path shard-summary", gb)
+        self.assertIn("cx_fail_stage execution", gb)
+
+    def test_case_failure_diagnostic_precedes_normal_srun_footer(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            log = Path(temporary) / "runtime.log"
+            log.write_text(
+                "WARN: deepep decode run failed rc=1 (CX_RUN_TIMEOUT=900s)\n"
+                "SHARD done: 6/6 case(s) failed\n"
+                "srun: error: task exited 1\n"
+            )
+            result = subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; cx_fail_stage execution "$2"',
+                    "_", str(ROOT / "runtime" / "common.sh"), str(log),
+                ],
+                text=True,
+                capture_output=True,
+            )
+            self.assertEqual(result.returncode, 1)
+            self.assertIn("diagnostic=benchmark-case-failure", result.stderr)
+
+    def test_non_timeout_failure_warning_is_classified_as_case_failure(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            log = Path(temporary) / "runtime.log"
+            log.write_text("WARN: deepep decode run failed rc=1\nsrun: task exited 1\n")
+            result = subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; cx_fail_stage execution "$2"',
+                    "_", str(ROOT / "runtime" / "common.sh"), str(log),
+                ],
+                text=True,
+                capture_output=True,
+            )
+            self.assertEqual(result.returncode, 1)
+            self.assertNotIn("diagnostic=network-or-timeout", result.stderr)
+            self.assertIn("diagnostic=benchmark-case-failure", result.stderr)
+
+    def test_private_runtime_failure_signatures_override_case_footer(self) -> None:
+        signatures = {
+            "DeepEP V2 no-GIN run is outside one realized LSA domain":
+                "accelerator-topology",
+            "CUDA error: call requires newer driver": "accelerator-driver",
+            "NCCL failure in ncclCommWindowRegister": "nccl-device-api",
+            "NVCC compilation failed": "jit-toolchain",
+            "CUDA out of memory": "accelerator-memory",
+            "torch rendezvous timed out": "network-or-timeout",
+        }
+        with tempfile.TemporaryDirectory() as temporary:
+            log = Path(temporary) / "runtime.log"
+            for signature, diagnostic in signatures.items():
+                log.write_text(f"{signature}\nSHARD done: 6/6 case(s) failed\n")
+                result = subprocess.run(
+                    [
+                        "bash", "-c",
+                        'source "$1"; cx_fail_stage execution "$2"',
+                        "_", str(ROOT / "runtime" / "common.sh"), str(log),
+                    ],
+                    text=True,
+                    capture_output=True,
+                )
+                self.assertEqual(result.returncode, 1)
+                self.assertIn(f"diagnostic={diagnostic}", result.stderr)
+
+    def test_runtime_stage_marker_distinguishes_launch_from_execution(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            mount = Path(temporary)
+            root = mount / "experimental" / "CollectiveX"
+            root.mkdir(parents=True)
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              export COLLECTIVEX_EXECUTION_ID=test_1_shard CX_TS=test
+              cx_set_failure_stage container-launch
+              cx_prepare_runtime_marker "$2"
+              (cd "$2/experimental/CollectiveX"; cx_write_runtime_stage backend-setup)
+              cx_adopt_runtime_stage "$2"
+              test "$CX_FAILSAFE_MODE" = backend-setup
+              (cd "$2/experimental/CollectiveX"; cx_write_runtime_stage execution)
+              cx_adopt_runtime_stage "$2"
+              test "$CX_FAILSAFE_MODE" = execution
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh"),
+                 str(mount)],
+                check=True,
+            )
+
+    def test_canonical_gha_environment_is_locked_but_manual_overrides_survive(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        command = r'''
+          set -euo pipefail
+          source "$1"
+          export COLLECTIVEX_CANONICAL_GHA=1 GITHUB_ACTIONS=true
+          export GITHUB_RUN_ID=123 GITHUB_RUN_ATTEMPT=1
+          export COLLECTIVEX_SOURCE_SHA=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+          export CX_SHARD_FILE=.shards/test.json CX_SHARD_SKU=mi325x
+          export CX_NODES=1 CX_GPUS_PER_NODE=8
+          export CX_IMAGE=untrusted CX_IMAGE_DIGEST=untrusted CX_NGPUS=99
+          export CX_NCCL_HOME=/untrusted CX_LOCK_DIR=/tmp CX_SQUASH_DIR=/shared/containers
+          export CX_STAGE_DIR=/private/stale-stage
+          export CX_MORI_KERNEL_TYPE=intranode MORI_ENABLE_SDMA=0
+          export NCCL_MNNVL_ENABLE=1 MC_FORCE_MNNVL=1 CX_DRYRUN=1
+          export CX_BACKEND_CACHE_ROOT=/untrusted CX_BACKEND_CACHE_SENTINEL_SHA256=bad
+          export CX_PREPARED_BACKEND_CACHE=/untrusted CX_BACKEND_SOURCE_ROOT=/untrusted
+          cx_lock_canonical_gha_env mi325x
+          test "$CX_IMAGE" = "$CX_IMAGE_AMD_MORI_MI325"
+          test "$CX_IMAGE_DIGEST" = "$CX_IMAGE_AMD_MORI_MI325_DIGEST"
+          test "$CX_NGPUS:$CX_SEED:$CX_RUN_TIMEOUT" = 8:67:1800
+          test "$CX_MORI_KERNEL_TYPE:$MORI_DISABLE_AUTO_XGMI:$MORI_ENABLE_SDMA" = asyncll:0:1
+          test "$MORI_COMMIT" = "$CX_MORI_COMMIT_MI325"
+          test "$MORI_APP_LOG_LEVEL:$MORI_SHMEM_LOG_LEVEL:$MORI_IO_LOG_LEVEL" = info:info:info
+          test "$CX_STAGE_DIR" = "$GITHUB_WORKSPACE"
+          test -z "${CX_NCCL_HOME+x}${CX_LOCK_DIR+x}${NCCL_MNNVL_ENABLE+x}${MC_FORCE_MNNVL+x}"
+          test -z "${CX_BACKEND_CACHE_ROOT+x}${CX_BACKEND_CACHE_SENTINEL_SHA256+x}"
+          test -z "${CX_PREPARED_BACKEND_CACHE+x}${CX_BACKEND_SOURCE_ROOT+x}"
+          test -z "${CX_DRYRUN+x}"
+
+          unset CX_STAGE_DIR
+          export CX_SHARD_SKU=gb300 CX_NODES=2 CX_GPUS_PER_NODE=4
+          export CX_IMAGE=untrusted CX_NGPUS=1 CX_MORI_KERNEL_TYPE=untrusted
+          export MORI_ENABLE_SDMA=0 CX_NCCL_HOME=/untrusted CX_MASTER_PORT=1
+          cx_lock_canonical_gha_env gb300
+          test "$CX_IMAGE" = "$CX_IMAGE_MULTIARCH"
+          test "$CX_IMAGE_DIGEST" = "$CX_IMAGE_MULTIARCH_DIGEST"
+          test "$CX_NGPUS:$CX_SEED:$CX_RUN_TIMEOUT" = 8:67:900
+          test "$CX_NCCL_HOME:$CX_MASTER_PORT" = /usr:29551
+          test "$CX_STAGE_DIR" = /shared/containers/.stage
+          test -z "${CX_MORI_KERNEL_TYPE+x}${MORI_ENABLE_SDMA+x}"
+
+          export COLLECTIVEX_OPERATOR_CONFIG_LOADED=$$
+          export CX_SHARD_SKU=mi355x CX_NODES=1 CX_GPUS_PER_NODE=8
+          export CX_LOCK_DIR=/validated/amd-locks
+          cx_lock_canonical_gha_env mi355x
+          test "$CX_LOCK_DIR" = /validated/amd-locks
+          test "$MORI_COMMIT" = "$CX_MORI_COMMIT_MI355"
+
+          unset COLLECTIVEX_CANONICAL_GHA
+          unset COLLECTIVEX_OPERATOR_CONFIG_LOADED
+          CX_IMAGE=manual CX_IMAGE_DIGEST=manual CX_NGPUS=3
+          CX_MORI_KERNEL_TYPE=manual
+          cx_lock_canonical_gha_env mi355x
+          test "$CX_IMAGE:$CX_IMAGE_DIGEST:$CX_NGPUS:$CX_MORI_KERNEL_TYPE" = manual:manual:3:manual
+        '''
+        with tempfile.TemporaryDirectory(dir=Path.home()) as workspace:
+            Path(workspace).chmod(0o720)
+            subprocess.run(
+                ["bash", "-c", command, "_", str(common)],
+                check=True,
+                env={
+                    **os.environ,
+                    "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                    "GITHUB_WORKSPACE": workspace,
+                },
+            )
+            self.assertEqual(list(Path(workspace).iterdir()), [])
+
+    def test_canonical_amd_stage_rejects_a_world_writable_workspace(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        command = r'''
+          source "$1"
+          export COLLECTIVEX_CANONICAL_GHA=1 GITHUB_ACTIONS=true
+          export GITHUB_RUN_ID=123 GITHUB_RUN_ATTEMPT=1
+          export COLLECTIVEX_SOURCE_SHA=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+          export CX_SHARD_FILE=.shards/test.json CX_SHARD_SKU=mi325x
+          export CX_NODES=1 CX_GPUS_PER_NODE=8 CX_SQUASH_DIR=/shared/containers
+          cx_lock_canonical_gha_env mi325x
+        '''
+        with tempfile.TemporaryDirectory(dir=Path.home()) as workspace:
+            Path(workspace).chmod(0o702)
+            result = subprocess.run(
+                ["bash", "-c", command, "_", str(common)],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                    "GITHUB_WORKSPACE": workspace,
+                },
+            )
+            self.assertNotEqual(result.returncode, 0)
+            self.assertIn("canonical AMD staging workspace is unsafe", result.stderr)
+            self.assertNotIn(workspace, result.stderr)
+
+    def test_canonical_amd_stage_rejects_a_symlinked_workspace(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        command = r'''
+          source "$1"
+          export COLLECTIVEX_CANONICAL_GHA=1 GITHUB_ACTIONS=true
+          export GITHUB_RUN_ID=123 GITHUB_RUN_ATTEMPT=1
+          export COLLECTIVEX_SOURCE_SHA=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+          export CX_SHARD_FILE=.shards/test.json CX_SHARD_SKU=mi325x
+          export CX_NODES=1 CX_GPUS_PER_NODE=8 CX_SQUASH_DIR=/shared/containers
+          cx_lock_canonical_gha_env mi325x
+        '''
+        with tempfile.TemporaryDirectory(dir=Path.home()) as temporary:
+            root = Path(temporary)
+            real = root / "real"
+            real.mkdir()
+            link = root / "workspace"
+            link.symlink_to(real, target_is_directory=True)
+            result = subprocess.run(
+                ["bash", "-c", command, "_", str(common)],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                    "GITHUB_WORKSPACE": str(link),
+                },
+            )
+            self.assertNotEqual(result.returncode, 0)
+            self.assertIn("canonical AMD staging workspace is unsafe", result.stderr)
+            self.assertNotIn(str(root), result.stderr)
+
+    def test_image_selection_and_registry_verification_are_fail_closed(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        command = r'''
+          source "$1"
+          test "$(cx_default_image mi325x)" = "$CX_IMAGE_AMD_MORI_MI325"
+          test "$(cx_default_image mi355x)" = "$CX_IMAGE_AMD_MORI"
+          pinned="sha256:$(printf 'a%.0s' {1..64})"
+          curl() {
+            case "$*" in
+              *auth.docker.io*) printf '{"token":"test"}' ;;
+              *) printf 'Docker-Content-Digest: %s\r\n' "$pinned" ;;
+            esac
+          }
+          test "$(cx_resolve_registry_digest ubuntu:latest)" = "$pinned"
+          test "$(cx_resolve_registry_digest docker.io/library/ubuntu:latest)" = "$pinned"
+          ! (cx_resolve_registry_digest "ubuntu@$pinned")
+          ! (cx_resolve_registry_digest ghcr.io/example/image:tag)
+          ! (cx_resolve_registry_digest 'ubuntu@sha256:bad')
+          curl() {
+            case "$*" in *auth.docker.io*) printf '{"token":"test"}';; esac
+          }
+          ! (cx_resolve_registry_digest ubuntu:latest)
+          cx_resolve_registry_digest() { printf '%s' "$CX_IMAGE_MULTIARCH_DIGEST"; }
+          cx_verify_registry_image "$CX_IMAGE_MULTIARCH"
+          test "$COLLECTIVEX_IMAGE_DIGEST_VERIFIED" = 1
+          test "$COLLECTIVEX_IMAGE_DIGEST" = "$CX_IMAGE_MULTIARCH_DIGEST"
+          cx_reverify_registry_image "$CX_IMAGE_MULTIARCH"
+          cx_resolve_registry_digest() { printf 'sha256:%064d' 0; }
+          ! (cx_reverify_registry_image "$CX_IMAGE_MULTIARCH")
+          ! (cx_verify_registry_image "$CX_IMAGE_MULTIARCH")
+        '''
+        subprocess.run(
+            ["bash", "-c", command, "_", str(common)],
+            check=True,
+            env={**os.environ, "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null"},
+        )
+
+    def test_canonical_gha_requires_compute_visible_staging(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            repo = Path(temporary) / "repo"
+            squash = Path(temporary) / "squash"
+            source = repo / "experimental" / "CollectiveX"
+            source.mkdir(parents=True)
+            squash.mkdir()
+            (source / "public.py").write_text("public\n")
+            (source / "private-infra.md").write_text("private\n")
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              unset CX_SHARD_FILE CX_STAGE_DIR
+              ! (COLLECTIVEX_CANONICAL_GHA=1; cx_stage_repo "$2" "")
+              staged="$(COLLECTIVEX_CANONICAL_GHA=0; cx_stage_repo "$2" "")"
+              test "$staged" != "$2"
+              test -f "$staged/experimental/CollectiveX/public.py"
+              test ! -e "$staged/experimental/CollectiveX/private-infra.md"
+              cx_cleanup_stage "$staged" "$2"
+              test ! -e "$staged"
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh"),
+                 str(repo)],
+                check=True,
+                env={
+                    **os.environ,
+                    "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                    "CX_SQUASH_DIR": str(squash),
+                },
+            )
+            self.assertEqual(list(squash.iterdir()), [])
+
+    def test_manual_stage_does_not_write_to_checkout_parent(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            parent = Path(temporary) / "readonly-parent"
+            repo = parent / "repo"
+            squash = parent / "squash"
+            source = repo / "experimental" / "CollectiveX"
+            source.mkdir(parents=True)
+            squash.mkdir(mode=0o700)
+            (source / "public.py").write_text("public\n")
+            original_mode = parent.stat().st_mode & 0o777
+            parent.chmod(0o555)
+            try:
+                command = r'''
+                  set -euo pipefail
+                  source "$1"
+                  unset CX_STAGE_DIR
+                  staged="$(cx_stage_repo "$2" "")"
+                  case "$staged" in "$3"/.collectivex-stage-*) ;; *) exit 1 ;; esac
+                  test -f "$staged/experimental/CollectiveX/public.py"
+                  test ! -e "$4/.collectivex-stage"
+                  cx_cleanup_stage "$staged" "$2"
+                  test ! -e "$staged"
+                '''
+                subprocess.run(
+                    [
+                        "bash", "-c", command, "_",
+                        str(ROOT / "runtime" / "common.sh"), str(repo),
+                        str(squash), str(parent),
+                    ],
+                    check=True,
+                    env={
+                        **os.environ,
+                        "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                        "CX_SQUASH_DIR": str(squash),
+                    },
+                )
+            finally:
+                parent.chmod(original_mode)
+            self.assertEqual(
+                sorted(path.name for path in parent.iterdir()),
+                ["repo", "squash"],
+            )
+            self.assertEqual(list(squash.iterdir()), [])
+
+    def test_stage_refuses_to_reuse_an_execution_child(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            repo = root / "repo"
+            source = repo / "experimental" / "CollectiveX"
+            source.mkdir(parents=True)
+            (source / "public.py").write_text("public\n")
+            base = root / "stage"
+            child = base / "job_collision"
+            child.mkdir(parents=True, mode=0o700)
+            sentinel = child / "keep"
+            sentinel.write_text("keep")
+            command = r'''
+              source "$1"
+              ! (cx_stage_repo "$2" "$3")
+            '''
+            subprocess.run(
+                [
+                    "bash", "-c", command, "_",
+                    str(ROOT / "runtime" / "common.sh"), str(repo), str(base),
+                ],
+                check=True,
+                env={
+                    **os.environ,
+                    "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                    "COLLECTIVEX_CANONICAL_GHA": "1",
+                    "COLLECTIVEX_EXECUTION_ID": "collision",
+                    "CX_STAGE_DIR": str(base),
+                },
+            )
+            self.assertEqual(sentinel.read_text(), "keep")
+
+    def test_stage_removes_its_execution_child_when_rsync_fails(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            repo = root / "repo"
+            source = repo / "experimental" / "CollectiveX"
+            source.mkdir(parents=True)
+            (source / "public.py").write_text("public\n")
+            base = root / "stage"
+            sentinel = root / "rsync-called"
+            command = r'''
+              source "$1"
+              rsync() { : > "$RSYNC_CALLED"; return 1; }
+              ! cx_stage_repo "$2" "$3"
+            '''
+            subprocess.run(
+                [
+                    "bash", "-c", command, "_",
+                    str(ROOT / "runtime" / "common.sh"), str(repo), str(base),
+                ],
+                check=True,
+                env={
+                    **os.environ,
+                    "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                    "COLLECTIVEX_CANONICAL_GHA": "1",
+                    "CX_STAGE_DIR": str(base),
+                    "RSYNC_CALLED": str(sentinel),
+                },
+            )
+            self.assertTrue(sentinel.is_file())
+            self.assertEqual(list(base.iterdir()), [])
+
+    def test_backend_cache_reuses_v3_and_falls_back_once_without_repair(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            parent = Path(temporary) / "stage"
+            parent.mkdir(mode=0o700)
+            concurrent = Path(temporary) / "concurrent"
+            concurrent.mkdir(mode=0o700)
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              for worker in 1 2 3; do
+                (
+                  cx_prepare_backend_cache "$2"
+                  printf '%s %s\n' "$CX_BACKEND_CACHE_SENTINEL_SHA256" \
+                    "$CX_PREPARED_BACKEND_CACHE" > "$3/$worker"
+                ) &
+              done
+              wait
+              cmp "$3/1" "$3/2"
+              cmp "$3/1" "$3/3"
+              cx_prepare_backend_cache "$2"
+              first="$CX_PREPARED_BACKEND_CACHE"
+              first_digest="$CX_BACKEND_CACHE_SENTINEL_SHA256"
+              chmod 2700 "$first"
+              cx_prepare_backend_cache "$2"
+              second="$CX_PREPARED_BACKEND_CACHE"
+              test "$first" = "$second"
+              test "$first_digest" = "$CX_BACKEND_CACHE_SENTINEL_SHA256"
+              test "$first" = "$(cd "$2" && pwd -P)/.collectivex-backend-cache-v3-$(id -u)"
+              export CX_BACKEND_CACHE_ROOT="$first"
+              cx_verify_backend_cache_mount
+              export CX_BACKEND_CACHE_SENTINEL_SHA256="$(printf '0%.0s' {1..64})"
+              ! cx_verify_backend_cache_mount
+            '''
+            subprocess.run(
+                [
+                    "bash", "-c", command, "_", str(common), str(parent),
+                    str(concurrent),
+                ],
+                check=True,
+            )
+            cache = parent / f".collectivex-backend-cache-v3-{os.getuid()}"
+            self.assertTrue(cache.is_dir())
+            self.assertEqual(cache.stat().st_mode & 0o777, 0o700)
+            self.assertEqual(
+                list(cache.glob(".collectivex-mount-sentinel-v1.tmp.*")), []
+            )
+            alias = Path(temporary) / "stage-alias"
+            alias.symlink_to(parent, target_is_directory=True)
+            canonical = subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; cx_prepare_backend_cache "$2"; '
+                    'printf "%s\\n%s\\n" "$CX_PREPARED_BACKEND_CACHE" '
+                    '"$CX_BACKEND_CACHE_SENTINEL_SHA256"',
+                    "_", str(common), str(alias),
+                ],
+                text=True,
+                capture_output=True,
+                check=True,
+            )
+            cache_path, digest = canonical.stdout.splitlines()
+            self.assertEqual(cache_path, str(cache.resolve()))
+            self.assertRegex(digest, r"^[0-9a-f]{64}$")
+            saved = parent / "saved-cache"
+            cache.rename(saved)
+            cache.mkdir(mode=0o700)
+            replacement = cache / ".collectivex-mount-sentinel-v1"
+            replacement.write_bytes(b"replacement".ljust(32, b"!"))
+            replacement.chmod(0o600)
+            replaced = subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; export CX_BACKEND_CACHE_ROOT="$2" '
+                    'CX_BACKEND_CACHE_SENTINEL_SHA256="$3"; '
+                    'cx_verify_backend_cache_mount',
+                    "_", str(common), str(cache), digest,
+                ]
+            )
+            self.assertNotEqual(replaced.returncode, 0)
+            replacement.unlink()
+            cache.rmdir()
+            saved.rename(cache)
+            (cache / ".collectivex-mount-sentinel-v1").unlink()
+            cache.rmdir()
+            target = Path(temporary) / "target"
+            target.mkdir(mode=0o700)
+            cache.symlink_to(target, target_is_directory=True)
+            fallback = subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; cx_prepare_backend_cache "$2"; '
+                    'printf "%s\\n" "$CX_PREPARED_BACKEND_CACHE"',
+                    "_", str(common), str(parent),
+                ],
+                text=True,
+                capture_output=True,
+                check=True,
+            )
+            v4 = parent / f".collectivex-backend-cache-v4-{os.getuid()}"
+            self.assertEqual(fallback.stdout.strip(), str(v4.resolve()))
+            self.assertTrue(cache.is_symlink())
+            self.assertTrue(v4.is_dir())
+            (v4 / ".collectivex-mount-sentinel-v1").unlink()
+            v4.rmdir()
+            v4.symlink_to(target, target_is_directory=True)
+            result = subprocess.run(
+                [
+                    "bash", "-c", 'source "$1"; cx_prepare_backend_cache "$2"',
+                    "_", str(common), str(parent),
+                ],
+                text=True,
+                capture_output=True,
+            )
+            self.assertNotEqual(result.returncode, 0)
+            self.assertNotIn(str(parent), result.stderr)
+            self.assertTrue(cache.is_symlink())
+            self.assertTrue(v4.is_symlink())
+
+        source = common.read_text().split("cx_prepare_backend_cache() {", 1)[1]
+        program = source.split("<<'PY'\n", 1)[1].split("\nPY\n", 1)[0]
+        with tempfile.TemporaryDirectory() as temporary:
+            parent = Path(temporary) / "stage"
+            parent.mkdir(mode=0o700)
+            fake_os = types.ModuleType("os")
+            fake_os.__dict__.update(os.__dict__)
+            fake_os.fsync = mock.Mock(side_effect=OSError("forced fsync failure"))
+            with (
+                mock.patch.dict(sys.modules, {"os": fake_os}),
+                mock.patch.object(sys, "argv", ["-", str(parent)]),
+                mock.patch.object(sys, "stdout", io.StringIO()),
+                self.assertRaises(SystemExit) as failure,
+            ):
+                exec(compile(program, "<cache-preparation>", "exec"), {})
+            self.assertEqual(failure.exception.code, 1)
+            self.assertEqual(
+                list(parent.rglob(".collectivex-mount-sentinel-v1.tmp.*")), []
+            )
+
+    def test_nvidia_namespace_package_roots_come_from_distribution_files(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            site = Path(temporary) / "site"
+            package = site / "nvidia" / "nccl"
+            (package / "include").mkdir(parents=True)
+            (package / "lib").mkdir()
+            (package / "include" / "nccl.h").write_text("header\n")
+            (package / "lib" / "libnccl.so.2").write_text("library\n")
+            info = site / "nvidia_nccl_cu13-2.30.4.dist-info"
+            info.mkdir()
+            (info / "METADATA").write_text(
+                "Metadata-Version: 2.1\nName: nvidia-nccl-cu13\nVersion: 2.30.4\n"
+            )
+            (info / "RECORD").write_text(
+                "nvidia/nccl/include/nccl.h,,\n"
+                "nvidia/nccl/lib/libnccl.so.2,,\n"
+                "nvidia_nccl_cu13-2.30.4.dist-info/METADATA,,\n"
+                "nvidia_nccl_cu13-2.30.4.dist-info/RECORD,,\n"
+            )
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_nvidia_package_root()/,/^}/p' "$1")"
+              root="$(cx_nvidia_package_root nvidia-nccl-cu13 nccl)"
+              test "$root" = "$2/nvidia/nccl"
+              ! cx_nvidia_package_root nvidia-nccl-cu13 nvshmem
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(runtime), str(site.resolve())],
+                check=True,
+                env={**os.environ, "PYTHONPATH": str(site)},
+            )
+
+    def test_cuda_cccl_exports_the_resolved_jit_toolchain_root(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            toolkit = root / "cuda-13.0"
+            (toolkit / "bin").mkdir(parents=True)
+            (toolkit / "include").mkdir()
+            (toolkit / "lib64").mkdir()
+            cccl = toolkit / "targets" / "x86_64-linux" / "include" / "cccl"
+            cccl.mkdir(parents=True)
+            nvcc = toolkit / "bin" / "nvcc"
+            nvcc.write_text("#!/bin/sh\nexit 0\n")
+            nvcc.chmod(0o755)
+            alias = root / "cuda"
+            alias.symlink_to(toolkit, target_is_directory=True)
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_prepare_cuda_cccl()/,/^}/p' "$1")"
+              cx_prepare_cuda_cccl
+              test "$CUDA_HOME" = "$2"
+              test "$CX_CUDA_CCCL" = "$2/targets/x86_64-linux/include/cccl"
+              test "$CPATH" = "$2/targets/x86_64-linux/include/cccl:"
+              test "$NVCC_PREPEND_FLAGS" = "-I$2/targets/x86_64-linux/include/cccl "
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(runtime), str(toolkit.resolve())],
+                check=True,
+                env={
+                    **os.environ,
+                    "PATH": f"{alias / 'bin'}:{os.environ['PATH']}",
+                    "CPATH": "",
+                    "NVCC_PREPEND_FLAGS": "",
+                },
+            )
+
+    def test_deepep_v2_toolchain_rejects_overlay_lock_failure(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_prepare_deepep_toolchain()/,/^}/p' "$1")"
+              cache_root="$2"
+              cx_nvidia_package_root() { printf '%s' /unused; }
+              cx_deepep_v2_root() { printf '%s' "$cache_root"; }
+              cx_log() { :; }
+              flock() { return 1; }
+              ! cx_prepare_deepep_toolchain
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(runtime), temporary],
+                check=True,
+            )
+
+    def test_pinned_source_fetch_retries_transient_failures(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_git()/,/^}/p' "$1")"
+              eval "$(sed -n '/^cx_git_in_tree()/,/^}/p' "$1")"
+              eval "$(sed -n '/^cx_fetch_revision()/,/^}/p' "$1")"
+              attempts=0
+              expected_directory="$(cd -P -- "$3" && pwd -P)"
+              sleep() { :; }
+              git() {
+                local argument has_directory=0 has_trust=0
+                if [ "$1" = '-c' ] && [ "$3" = init ]; then
+                  mkdir -p "${@: -1}"
+                  return 0
+                fi
+                for argument in "$@"; do
+                  [ "$argument" != '-C' ] || has_directory=1
+                  [ "$argument" != "safe.directory=$expected_directory" ] || has_trust=1
+                  [ "$argument" != 'safe.directory=*' ] || return 1
+                done
+                [ "$has_directory" = 0 ] || [ "$has_trust" = 1 ] || return 1
+                case " $* " in
+                  *' fetch '*)
+                    attempts=$((attempts + 1))
+                    [ "$attempts" = 3 ]
+                    ;;
+                  *' rev-parse HEAD '*) printf '%s\n' "$revision" ;;
+                  *) return 0 ;;
+                esac
+              }
+              cx_fetch_revision https://example.invalid/repo "$2" "$3"
+              test "$attempts" = 3
+            '''
+            revision = "a" * 40
+            subprocess.run(
+                ["bash", "-c", command, "_", str(common), revision, temporary],
+                check=True,
+            )
+
+    def test_git_tree_trust_is_exact_and_command_scoped(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            repository = root / "repo"
+            repository.mkdir()
+            alias = root / "alias"
+            alias.symlink_to(repository, target_is_directory=True)
+            wildcard = root / "*"
+            wildcard.mkdir()
+            arguments = root / "arguments"
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_git()/,/^}/p' "$1")"
+              eval "$(sed -n '/^cx_git_in_tree()/,/^}/p' "$1")"
+              arguments="$4"
+              git() { printf '%s\n' "$@" > "$arguments"; }
+              cx_git_in_tree "$2" status --porcelain
+              ! cx_git_in_tree relative status
+              ! cx_git_in_tree "$3" status
+              ! cx_git_in_tree "$5" status
+            '''
+            subprocess.run(
+                [
+                    "bash",
+                    "-c",
+                    command,
+                    "_",
+                    str(common),
+                    str(repository),
+                    str(alias),
+                    str(arguments),
+                    str(wildcard),
+                ],
+                check=True,
+            )
+            self.assertEqual(
+                arguments.read_text().splitlines(),
+                [
+                    "-c",
+                    "credential.helper=",
+                    "-c",
+                    f"safe.directory={repository.resolve()}",
+                    "-C",
+                    str(repository.resolve()),
+                    "status",
+                    "--porcelain",
+                ],
+            )
+            self.assertNotIn("safe.directory=*", arguments.read_text())
+
+    def test_runtime_materializes_the_verified_host_source_without_network(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            seed = root / "seed"
+            seed.mkdir()
+            (seed / "pinned").write_text("source\n")
+            destination = root / "build"
+            fetched = root / "network-fetch"
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              export CX_BACKEND_SOURCE_ROOT="$2/source"
+              SEED="$3" FETCHED="$5"
+              copy_mode=
+              cx_backend_source_path() { printf '%s' "$SEED"; }
+              cx_backend_source_is_valid() { test -f "$2/pinned"; }
+              cx_fetch_revision() { : > "$FETCHED"; return 1; }
+              cp() {
+                test "$1" = -R
+                copy_mode=recursive
+                command cp "$@"
+              }
+              cx_materialize_backend_source deepep-hybrid "$4"
+              test -f "$4/pinned"
+              test "$copy_mode" = recursive
+              python3 - "$4" <<'PY'
+import os
+import stat
+import sys
+assert stat.S_IMODE(os.stat(sys.argv[1]).st_mode) == 0o700
+PY
+              test ! -e "$FETCHED"
+            '''
+            subprocess.run(
+                [
+                    "bash", "-c", command, "_", str(common), str(root),
+                    str(seed), str(destination), str(fetched),
+                ],
+                check=True,
+            )
+
+    def test_backend_source_validation_rejects_status_errors_and_ignored_files(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              cx_backend_source_pin() { printf '%s|%s|' revision tree; }
+              git() {
+                case " $* " in
+                  *' rev-parse HEAD '*) printf '%s\n' revision ;;
+                  *' rev-parse HEAD^{tree} '*) printf '%s\n' tree ;;
+                  *' status --porcelain '*) [ "$mode" != status-error ] ;;
+                  *' ls-files --others --ignored '*)
+                    [ "$mode" != ignored ] || printf '%s\n' ignored.bin
+                    ;;
+                  *) return 1 ;;
+                esac
+              }
+              mode=status-error
+              ! cx_backend_source_is_valid backend "$2"
+              mode=ignored
+              ! cx_backend_source_is_valid backend "$2"
+              mode=clean
+              cx_backend_source_is_valid backend "$2"
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(common), temporary],
+                check=True,
+            )
+
+    def test_backend_source_root_normalizes_inherited_special_mode(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            source_root = root / "experimental" / "CollectiveX" / ".cx_sources"
+            source = source_root / "backend-revision"
+            source.mkdir(parents=True)
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              export COLLECTIVEX_EXECUTION_ID="source-mode-$$"
+              trap 'cx_cleanup_private_logs 0' EXIT
+              expected_mount="$2"
+              expected_source="$3"
+              expected_root="${expected_source%/*}"
+              observed_mode=2700
+              mock_stage_owner=4200
+              mock_root_owner=4200
+              chmod_calls=0
+              chmod() {
+                test "$1" = 700 && test "$2" = "$expected_root"
+                chmod_calls=$((chmod_calls + 1))
+                [ "$chmod_calls" = 2 ] || return 1
+                observed_mode=700
+              }
+              stat() {
+                case "$2" in
+                  %u)
+                    case "$3" in
+                      "$expected_mount") printf '%s\n' "$mock_stage_owner" ;;
+                      "$expected_root") printf '%s\n' "$mock_root_owner" ;;
+                      *) return 1 ;;
+                    esac
+                    ;;
+                  %a)
+                    case "$3" in
+                      "$expected_mount") printf '2700\n' ;;
+                      "$expected_root") printf '%s\n' "$observed_mode" ;;
+                      *) return 1 ;;
+                    esac
+                    ;;
+                  *) return 1 ;;
+                esac
+              }
+              cx_backend_source_path() { printf '%s' "$expected_source"; }
+              cx_backend_source_is_valid() {
+                test "$1" = backend && test "$2" = "$expected_source"
+              }
+              cx_prepare_backend_source "$2" backend
+              test "$observed_mode" = 2700
+              test "$chmod_calls" = 0
+              observed_mode=2750
+              ! _cx_prepare_backend_source "$2" backend
+              test "$chmod_calls" = 1
+              _cx_prepare_backend_source "$2" backend
+              test "$observed_mode" = 700
+              mock_root_owner=4300
+              ! _cx_prepare_backend_source "$2" backend
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(common), str(root), str(source)],
+                check=True,
+            )
+
+    def test_canonical_backend_sources_use_verified_seed_without_network(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            mount = root / "mount"
+            source_root = mount / "experimental" / "CollectiveX" / ".cx_sources"
+            seed_root = root / "seed"
+            seeds = [
+                seed_root / f"{backend}-revision"
+                for backend in ("backend-one", "backend-two")
+            ]
+            mount.mkdir(mode=0o700)
+            source_root.parent.mkdir(parents=True, mode=0o700)
+            for seed in seeds:
+                seed.mkdir(parents=True, mode=0o700)
+                (seed / "pinned").write_text("source\n")
+            network = root / "network"
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              export COLLECTIVEX_CANONICAL_GHA=1
+              export CX_BACKEND_SOURCE_SEED_ROOT="$4"
+              export COLLECTIVEX_EXECUTION_ID="source-seed-$$"
+              trap 'cx_cleanup_private_logs 0' EXIT
+              NETWORK="$5"
+              stat() {
+                case "$2" in
+                  %u) printf '4200\n' ;;
+                  %a) printf '700\n' ;;
+                  *) return 1 ;;
+                esac
+              }
+              cx_backend_source_path() { printf '%s/%s-revision' "$1" "$2"; }
+              cx_backend_source_is_valid() { test -f "$2/pinned"; }
+              cx_fetch_revision() { : > "$NETWORK"; return 1; }
+              cx_prepare_backend_source "$2" backend-one
+              cx_prepare_backend_source "$2" backend-two
+              test -f "$3/backend-one-revision/pinned"
+              test -f "$3/backend-two-revision/pinned"
+              test ! -e "$NETWORK"
+              rm -rf -- "$3/backend-one-revision" "$3/backend-two-revision"
+              unset CX_BACKEND_SOURCE_SEED_ROOT
+              ! _cx_prepare_backend_source "$2" backend-one
+              test ! -e "$NETWORK"
+            '''
+            subprocess.run(
+                [
+                    "bash", "-c", command, "_", str(common), str(mount),
+                    str(source_root), str(seed_root), str(network),
+                ],
+                check=True,
+            )
+
+    def test_deepep_hybrid_cache_reuse_revalidates_extensions(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            (root / "deep_ep_cpp.so").write_bytes(b"deep")
+            (root / "hybrid_ep_cpp.so").write_bytes(b"hybrid")
+            command = r'''
+              set -euo pipefail
+              chmod 700 "$3"
+              source "$1"
+              eval "$(sed -n '/^cx_deepep_hybrid_marker_content_sha256()/,/^}/p' "$2")"
+              eval "$(sed -n '/^cx_deepep_hybrid_cache_is_valid()/,/^}/p' "$2")"
+              revision=revision tree=tree
+              cx_git() {
+                case " $* " in
+                  *' rev-parse HEAD '*) printf '%s\n' "$revision" ;;
+                  *' rev-parse HEAD^{tree} '*) printf '%s\n' "$tree" ;;
+                  *' status --porcelain '*|*' ls-files --others '*) return 0 ;;
+                  *) return 1 ;;
+                esac
+              }
+              cx_git_in_tree() { shift; cx_git "$@"; }
+              marker="$3/.collectivex-complete"
+              digest="$(cx_extension_pair_sha256 "$3" 'deep_ep_cpp*.so' 'hybrid_ep_cpp*.so')"
+              (umask 077; printf '%s\n%s\n%s\n' "$revision" "$tree" "$digest" > "$marker")
+              cx_deepep_hybrid_cache_is_valid "$3" "$marker" "$revision" "$tree"
+              printf changed > "$3/hybrid_ep_cpp.so"
+              ! cx_deepep_hybrid_cache_is_valid "$3" "$marker" "$revision" "$tree"
+              printf hybrid > "$3/hybrid_ep_cpp.so"
+              cp "$3/deep_ep_cpp.so" "$3/deep_ep_cpp-extra.so"
+              ! cx_deepep_hybrid_cache_is_valid "$3" "$marker" "$revision" "$tree"
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(common), str(runtime), temporary],
+                check=True,
+            )
+
+    def test_rack_backend_environment_is_shared_per_node_and_required(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        launcher = (ROOT / "launchers" / "launch_gb-nv.sh").read_text()
+        assignment = next(
+            line for line in launcher.splitlines()
+            if line.startswith("SOURCE_BACKEND_ENV=")
+        )
+        self.assertNotIn("/tmp/.cx_backend_env", launcher)
+        self.assertIn('[ -f "$env_file" ] && [ -r "$env_file" ]', launcher)
+        self.assertIn('[ ! -L "$env_file" ]', launcher)
+        self.assertIn('$(stat -c "%u" "$env_root"):600', launcher)
+        self.assertIn('case "$(stat -c "%a" "$env_root")" in 700|[1-7]700)', launcher)
+        self.assertIn("node-${SLURM_NODEID}.sh", launcher)
+        self.assertIn("HybridEPBuffer", launcher)
+        self.assertIn('. "$env_file" || exit 66', launcher)
+        with tempfile.TemporaryDirectory() as temporary:
+            consumer = r'''
+              eval "$1"
+              env_root="$2/env"
+              SOURCE_BACKEND_ENV="${SOURCE_BACKEND_ENV//\/ix\/experimental\/CollectiveX\/.cx_backend\/env/$env_root}"
+              mkdir -p "$env_root"
+              env_file="$env_root/node-1.sh"
+              printf 'printf sourced > "$CX_SENTINEL"\n' > "$env_file"
+              chmod 600 "$env_file"
+              export CX_SENTINEL="$2/sentinel"
+              stat() {
+                [ "${STAT_FAIL:-0}" = 0 ] || return 1
+                case "$2" in
+                  %a) printf '%s\n' "$ROOT_MODE" ;;
+                  %u) printf '1000\n' ;;
+                  %u:%a) printf '%s\n' "$FILE_OWNER_MODE" ;;
+                  *) return 2 ;;
+                esac
+              }
+              run_case() {
+                rm -f "$CX_SENTINEL"
+                ROOT_MODE="$1" FILE_OWNER_MODE="$2" STAT_FAIL="$3" SLURM_NODEID="$4"
+                ( eval "$SOURCE_BACKEND_ENV" )
+                rc=$?
+                [ "$rc" = "$5" ] || return 1
+                if [ "$5" = 0 ]; then
+                  [ -f "$CX_SENTINEL" ]
+                else
+                  [ ! -e "$CX_SENTINEL" ]
+                fi
+              }
+              run_case 700 1000:600 0 1 0
+              run_case 2700 1000:600 0 1 0
+              run_case 755 1000:600 0 1 66
+              run_case 700 1000:600 1 1 66
+              run_case 700 2000:600 0 1 66
+              mv "$env_file" "$env_file.real"
+              ln -s "$env_file.real" "$env_file"
+              run_case 700 1000:600 0 1 66
+              rm "$env_file"
+              mv "$env_file.real" "$env_file"
+              run_case 700 1000:600 0 invalid 66
+            '''
+            subprocess.run(
+                ["bash", "-c", consumer, "_", assignment, temporary],
+                check=True,
+            )
+            command = r'''
+              set -euo pipefail
+              cd "$2"
+              eval "$(sed -n '/^cx_persist_backend_env()/,/^}/p' "$1")"
+              export SLURM_NODEID=1 PYTHONPATH=/ix/pinned DEEPEP_COMMIT=abc
+              cx_persist_backend_env
+              env_file="$PWD/.cx_backend/env/node-1.sh"
+              test -f "$env_file"
+              test "$(stat -f %Lp "$env_file" 2>/dev/null || stat -c %a "$env_file")" = 600
+              unset PYTHONPATH DEEPEP_COMMIT
+              . "$env_file"
+              test "$PYTHONPATH" = /ix/pinned
+              test "$DEEPEP_COMMIT" = abc
+              SLURM_NODEID=invalid && ! cx_persist_backend_env
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(runtime), temporary],
+                check=True,
+            )
+
+    def test_stage_cleanup_failure_fails_job_but_marks_allocation_safe(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            (root / "repo").mkdir()
+            (root / "stage").mkdir()
+            command = r'''
+              source "$1"
+              cx_write_cleanup_guard() {
+                rm -f -- "$CX_JOB_ROOT/cleanup-safe" "$CX_JOB_ROOT/cleanup-unsafe"
+                : > "$CX_JOB_ROOT/cleanup-$1"
+              }
+              cx_cleanup_stage() { return 1; }
+              cx_cleanup_private_logs() { : > "$CX_JOB_ROOT/logs-deleted"; }
+              export CX_JOB_ROOT="$2" REPO_ROOT="$2/repo" MOUNT_SRC="$2/stage"
+              export COLLECTIVEX_CANONICAL_GHA=1 CX_ALLOCATION_REQUESTED=0
+              unset CX_BENCH JOB_ID
+              cx_launcher_cleanup 0
+            '''
+            result = subprocess.run(
+                ["bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh"),
+                 str(root)],
+                text=True,
+                capture_output=True,
+                env={**os.environ, "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null"},
+            )
+            self.assertEqual(result.returncode, 1, result.stderr)
+            self.assertTrue((root / "cleanup-safe").is_file())
+            self.assertFalse((root / "cleanup-unsafe").exists())
+            self.assertFalse((root / "logs-deleted").exists())
+
+    def test_generated_stage_cleanup_never_removes_configured_base(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            base = root / "stage"
+            repo = root / "repo"
+            generated = base / "job_execution"
+            generated.mkdir(parents=True)
+            repo.mkdir()
+            (generated / "payload").write_text("temporary")
+            subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; cx_cleanup_stage "$2" "$3"; '
+                    '! cx_cleanup_stage "$4" "$3"',
+                    "_", str(ROOT / "runtime" / "common.sh"), str(generated),
+                    str(repo), str(base),
+                ],
+                check=True,
+                env={
+                    **os.environ,
+                    "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                    "COLLECTIVEX_EXECUTION_ID": "execution",
+                    "CX_STAGE_DIR": str(base),
+                },
+            )
+            self.assertFalse(generated.exists())
+            self.assertTrue(base.is_dir())
+            self.assertTrue(repo.is_dir())
+
+    def test_adapters_do_not_retain_dead_expected_methods(self) -> None:
+        for path in HERE.glob("ep_*.py"):
+            tree = ast.parse(path.read_text(), str(path))
+            methods = {
+                node.name for node in ast.walk(tree)
+                if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))
+            }
+            self.assertNotIn("expected", methods, path.name)
+
+    def test_artifact_safety_rejects_sensitive_material(self) -> None:
+        private_address = ".".join(str(octet) for octet in (10, 0, 0, 1))
+        secret = "github_pat_" + "A" * 24
+        sensitive = {
+            "ipv4": ({"note": private_address}, private_address),
+            "ipv6": ({"note": "[2001:db8::1]:29500"}, "2001:db8::1"),
+            "user-at-host": ({"note": "ssh admin@private-host"}, "admin@private-host"),
+            "hostname": ({"note": "host=compute-17"}, "compute-17"),
+            "private-dns": ({"note": "worker-7.cluster.local"}, "worker-7.cluster.local"),
+            "suffixed-host": ({"worker_hostname": "relative"}, "worker_hostname"),
+            "suffixed-address": ({"control_address": "relative"}, "control_address"),
+            "suffixed-path": ({"scheduler_path": "relative"}, "scheduler_path"),
+            "exact-address": ({"address": "relative"}, "address"),
+            "exact-ip": ({"ip": "relative"}, "ip"),
+            "camel-host": ({"workerHost": "relative"}, "workerHost"),
+            "camel-path": ({"schedulerPath": "relative"}, "schedulerPath"),
+            "acronym-gpu-uuid": ({"gpuUUID": "relative"}, "gpuUUID"),
+            "acronym-device-uuid": ({"deviceUUID": "relative"}, "deviceUUID"),
+            "acronym-pci-bus": ({"pciBusID": "relative"}, "pciBusID"),
+            "mac-address": ({"note": "00:11:22:33:44:55"}, "00:11:22:33:44:55"),
+            "ib-guid": ({"note": "00:11:22:33:44:55:66:77"}, "00:11:22:33:44:55:66:77"),
+            "dgx-host": ({"note": "dgx-b300-001"}, "dgx-b300-001"),
+            "cloud-host": ({"note": "ip-10-20-30-40"}, "ip-10-20-30-40"),
+            "credential-field": ({"service_token": "short"}, "service_token"),
+            "prefixed-token": ({"note": secret}, secret),
+            "hf-token": ({"note": "hf_" + "A" * 24}, "hf_" + "A" * 24),
+            "payment-token": ({"note": "sk_live_" + "A" * 24}, "sk_live_" + "A" * 24),
+            "generic-secret": ({"note": "password=not-a-real-secret"}, "not-a-real-secret"),
+        }
+        for root in ("data", "it-share", "lustre", "raid", "nvme_home", "scratch", "gpfs", "fsx"):
+            value = f"/{root}/collectivex/run"
+            sensitive[f"private-root-{root}"] = ({"note": value}, value)
+        for name, (document, offending) in sensitive.items():
+            with self.subTest(name=name), self.assertRaises(
+                artifact_safety.ArtifactSafetyError
+            ) as caught:
+                artifact_safety.assert_publication_safe([document])
+            self.assertNotIn(offending, str(caught.exception))
+
+        artifact_safety.assert_publication_safe([{
+            "runner": "b300",
+            "redaction": "sanitized-v1",
+            "path": "datasets/" + "a" * 64 + "/dataset.json",
+            "timing": "8:64:32",
+            "image_digest": "sha256:" + "b" * 64,
+            "source": "github.com",
+        }])
+        for ref in ("release@candidate", "worker1-feature", "sk-refactor-long-component-name"):
+            artifact_safety.assert_publication_safe([{"ref": ref}])
+
+    def test_artifact_safety_cli_does_not_echo_sensitive_values(self) -> None:
+        private_value = ".".join(str(octet) for octet in (10, 24, 68, 12))
+        with tempfile.TemporaryDirectory() as temporary:
+            path = Path(temporary) / "artifact.json"
+            path.write_text(json.dumps({"note": private_value}))
+            result = subprocess.run(
+                [sys.executable, str(ROOT / "artifact_safety.py"), str(path)],
+                text=True,
+                capture_output=True,
+            )
+        self.assertNotEqual(result.returncode, 0)
+        self.assertIn("forbidden ipv4-address value", result.stderr)
+        self.assertNotIn(private_value, result.stderr)
+
+    def test_artifact_safety_rejects_linked_and_special_inputs(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            source = root / "source.json"
+            source.write_text("{}")
+            linked = root / "linked.json"
+            linked.symlink_to(source)
+            fifo = root / "fifo.json"
+            os.mkfifo(fifo)
+            for path in (linked, fifo):
+                with self.subTest(path=path.name), self.assertRaises(
+                    artifact_safety.ArtifactSafetyError
+                ):
+                    artifact_safety.load_documents([str(path)])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/experimental/CollectiveX/tests/workload.py b/experimental/CollectiveX/tests/workload.py
new file mode 100644
index 0000000000..89a6b46052
--- /dev/null
+++ b/experimental/CollectiveX/tests/workload.py
@@ -0,0 +1,358 @@
+#!/usr/bin/env python3
+"""Canonical, byte-stable CollectiveX routing workloads.
+
+A *canonical workload* is a routing trace generated ONCE, serialized to a platform-independent
+file, and referenced by an immutable `workload_id`. Every promoted benchmark point consumes the
+SAME serialized bytes, so "did NVIDIA and AMD run the identical workload?" is answered by a
+checksum match, not by trusting that two machines re-ran the same seeded generator.
+
+Layout on disk (one workload = two files, basename = workload_id):
+  <dir>/<workload_id>.npz            topk_idx [gt,topk] int32, topk_weights [gt,topk] float32
+  <dir>/<workload_id>.manifest.json  dims, routing profile, generator version, seed, SHA-256s
+
+Routing and gate weights come from a stdlib integer counter, not a framework RNG. The same
+parameters therefore produce the same int32/float32 bytes across PyTorch and accelerator images.
+"""
+from __future__ import annotations
+
+from array import array
+import bisect
+import hashlib
+import json
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import identity  # noqa: E402
+
+WORKLOAD_SCHEMA_VERSION = 1
+# Bump when the counter or byte encoding changes. The workload ID binds parameters and trace bytes.
+GENERATOR_VERSION = "collectivex-routing-counter-v3"
+GATE_WEIGHT_FORMAT = "counter-u16-normalized-f32"
+ACTIVATION_GENERATOR = "collectivex-activation-counter-v3"
+_MASK64 = (1 << 64) - 1
+
+
+def _sha256(b: bytes) -> str:
+    return hashlib.sha256(b).hexdigest()
+
+
+def _mix64(value: int) -> int:
+    value = (value + 0x9E3779B97F4A7C15) & _MASK64
+    value = ((value ^ (value >> 30)) * 0xBF58476D1CE4E5B9) & _MASK64
+    value = ((value ^ (value >> 27)) * 0x94D049BB133111EB) & _MASK64
+    return value ^ (value >> 31)
+
+
+def _counter(seed: int, token: int, slot: int, attempt: int, stream: int) -> int:
+    value = (
+        (seed & _MASK64)
+        ^ (((token + 1) * 0xD2B74407B1CE6E93) & _MASK64)
+        ^ (((slot + 1) * 0xCA5A826395121157) & _MASK64)
+        ^ (((attempt + 1) * 0x9E3779B185EBCA87) & _MASK64)
+        ^ (((stream + 1) * 0xA24BAED4963EE407) & _MASK64)
+    )
+    return _mix64(value)
+
+
+def canonical_routing_rows(
+    global_tokens: int, experts: int, topk: int, routing: str, seed: int
+) -> tuple[list[list[int]], list[list[float]]]:
+    """Generate distinct experts and normalized weights using exact integer counters."""
+    if routing not in {"uniform", "zipf"}:
+        raise ValueError(f"unknown routing {routing!r} (uniform|zipf)")
+    if global_tokens <= 0 or experts <= 0 or topk <= 0 or topk > experts:
+        raise ValueError("global_tokens/experts/topk must be positive and topk <= experts")
+
+    cumulative: list[int] | None = None
+    if routing == "zipf":
+        total = 0
+        cumulative = []
+        for expert in range(experts):
+            total += (1 << 32) // (expert + 1)
+            cumulative.append(total)
+
+    indices: list[list[int]] = []
+    weights: list[list[float]] = []
+    for token in range(global_tokens):
+        selected: list[int] = []
+        used: set[int] = set()
+        for slot in range(topk):
+            attempt = 0
+            while True:
+                value = _counter(seed, token, slot, attempt, 0)
+                expert = (
+                    value % experts
+                    if cumulative is None
+                    else bisect.bisect_right(cumulative, value % cumulative[-1])
+                )
+                if expert not in used:
+                    used.add(expert)
+                    selected.append(expert)
+                    break
+                attempt += 1
+                if attempt > experts * 16:
+                    raise RuntimeError("counter routing could not select distinct experts")
+        raw = [1 + _counter(seed, token, slot, 0, 1) % 65535 for slot in range(topk)]
+        denominator = float(sum(raw))
+        indices.append(selected)
+        weights.append([value / denominator for value in raw])
+    return indices, weights
+
+
+def _canonical_bytes(
+    indices: list[list[int]], weights: list[list[float]]
+) -> tuple[bytes, bytes]:
+    idx = array("i", (value for row in indices for value in row))
+    gate = array("f", (value for row in weights for value in row))
+    if idx.itemsize != 4 or gate.itemsize != 4:
+        raise RuntimeError("canonical workload requires 32-bit int and float arrays")
+    if sys.byteorder != "little":
+        idx.byteswap()
+        gate.byteswap()
+    return idx.tobytes(), gate.tobytes()
+
+
+def trace_checksums(
+    indices: list[list[int]], weights: list[list[float]]
+) -> dict[str, str]:
+    """Return the manifest hashes for exact logical or remapped routing rows."""
+    idx_bytes, weight_bytes = _canonical_bytes(indices, weights)
+    return {
+        "topk_idx": _sha256(idx_bytes),
+        "topk_weights": _sha256(weight_bytes),
+        "trace": _sha256(idx_bytes + weight_bytes),
+    }
+
+
+def canonical_member(
+    routing: str,
+    hidden: int,
+    topk: int,
+    experts: int,
+    ep_size: int,
+    tokens_per_rank: int,
+    seed: int,
+) -> tuple[str, dict[str, str], list[list[int]], list[list[float]]]:
+    """Derive one canonical manifest member and retain its rows for proof checks."""
+    global_tokens = ep_size * tokens_per_rank
+    indices, weights = canonical_routing_rows(global_tokens, experts, topk, routing, seed)
+    checksums = trace_checksums(indices, weights)
+    member = compute_workload_id(
+        routing,
+        hidden,
+        topk,
+        experts,
+        ep_size,
+        global_tokens,
+        seed,
+        trace_checksum=checksums["trace"],
+    )
+    return member, checksums, indices, weights
+
+
+def compute_workload_id(routing: str, hidden: int, topk: int, experts: int,
+                        ep_size: int, global_tokens: int, seed: int,
+                        generator: str = GENERATOR_VERSION,
+                        trace_checksum: str | None = None) -> str:
+    """Deterministic ID over parameters and canonical trace bytes."""
+    if generator != GENERATOR_VERSION:
+        raise ValueError(f"unsupported workload generator {generator!r}")
+    if trace_checksum is None:
+        indices, weights = canonical_routing_rows(global_tokens, experts, topk, routing, seed)
+        idx_bytes, weight_bytes = _canonical_bytes(indices, weights)
+        trace_checksum = _sha256(idx_bytes + weight_bytes)
+    key = {
+        "generator": generator, "routing": routing, "hidden": hidden, "topk": topk,
+        "experts": experts, "ep_size": ep_size, "global_tokens": global_tokens,
+        "seed": seed, "trace_sha256": trace_checksum,
+        "activation_generator": ACTIVATION_GENERATOR,
+        "activation_identity": compute_activation_identity(seed, hidden),
+    }
+    return identity.workload_id(key)
+
+
+def compute_activation_identity(seed, hidden, generator=ACTIVATION_GENERATOR) -> str:
+    """Identity of the exact counter-derived activation generator."""
+    key = f"counter|seed={seed}|hidden={hidden}|gen={generator}"
+    return _sha256(key.encode())
+
+
+def build_manifest(routing, hidden, topk, experts, global_tokens, seed, experts_per_rank,
+                   idx_np, weights_np):
+    """Assemble the manifest dict from the (numpy) trace arrays. Pure numpy/stdlib."""
+    if experts % experts_per_rank:
+        raise ValueError("experts must be divisible by experts_per_rank")
+    idx_bytes = idx_np.astype("<i4", copy=False).tobytes()
+    w_bytes = weights_np.astype("<f4", copy=False).tobytes()
+    ep_size = experts // experts_per_rank
+    trace_checksum = _sha256(idx_bytes + w_bytes)
+    wid = compute_workload_id(
+        routing, hidden, topk, experts, ep_size, global_tokens, seed,
+        trace_checksum=trace_checksum,
+    )
+    return {
+        "schema_version": WORKLOAD_SCHEMA_VERSION,
+        "workload_id": wid,
+        "generator_version": GENERATOR_VERSION,
+        "gate_weight_format": GATE_WEIGHT_FORMAT,
+        "dims": {"hidden": hidden, "topk": topk, "experts": experts, "ep_size": ep_size,
+                 "tokens_per_rank": int(global_tokens) // ep_size,
+                 "global_tokens": int(global_tokens), "experts_per_rank": experts_per_rank},
+        "routing_profile": routing,
+        "seed": seed,
+        "checksums": {  # SHA-256 over the raw little-endian array bytes (int32 / float32)
+            "topk_idx": _sha256(idx_bytes),
+            "topk_weights": _sha256(w_bytes),   # gate-weight (value) distribution identity
+            "trace": trace_checksum,
+        },
+        "activation_profile": "canonical-counter-source-v3",
+        "activation_generator": ACTIVATION_GENERATOR,
+        "activation_identity": compute_activation_identity(seed, hidden),
+    }
+
+
+def build_workload(hidden, topk, experts, routing, global_tokens, seed, experts_per_rank):
+    """Generate a canonical trace. Returns (idx_np, weights_np, manifest)."""
+    import numpy as np
+    indices, weights = canonical_routing_rows(global_tokens, experts, topk, routing, seed)
+    idx_np = np.asarray(indices, dtype=np.int32)
+    w_np = np.asarray(weights, dtype=np.float32)
+    manifest = build_manifest(
+        routing, hidden, topk, experts, global_tokens, seed,
+        experts_per_rank, idx_np, w_np,
+    )
+    return idx_np, w_np, manifest
+
+
+def save_workload(out_dir, idx_np, weights_np, manifest) -> str:
+    import numpy as np
+    os.makedirs(out_dir, exist_ok=True)
+    wid = manifest["workload_id"]
+    np.savez_compressed(os.path.join(out_dir, f"{wid}.npz"),
+                        topk_idx=idx_np.astype(np.int32), topk_weights=weights_np.astype(np.float32))
+    with open(os.path.join(out_dir, f"{wid}.manifest.json"), "w") as fh:
+        json.dump(manifest, fh, indent=2, sort_keys=True)
+    return wid
+
+
+def load_workload(npz_path, verify=True):
+    """Load a canonical trace (numpy + stdlib only). Returns (idx_np, weights_np, manifest).
+    Raises ValueError if verify=True and the on-disk bytes don't match the manifest checksums."""
+    import numpy as np
+    base = npz_path[:-4] if npz_path.endswith(".npz") else npz_path
+    with open(base + ".manifest.json") as fh:
+        manifest = json.load(fh)
+    if manifest.get("workload_id") != os.path.basename(base):
+        raise ValueError(f"workload manifest ID does not match filename for {base}")
+    with np.load(base + ".npz", allow_pickle=False) as archive:
+        if set(archive.files) != {"topk_idx", "topk_weights"}:
+            raise ValueError(f"workload archive fields differ for {base}")
+        idx_np = np.ascontiguousarray(archive["topk_idx"])
+        w_np = np.ascontiguousarray(archive["topk_weights"])
+    if verify:
+        ok, reason = verify_workload(manifest, idx_np, w_np)
+        if not ok:
+            raise ValueError(f"workload checksum mismatch for {base}: {reason}")
+    return idx_np, w_np, manifest
+
+
+def verify_workload(manifest, idx_np, weights_np):
+    """Recompute checksums and compare to the manifest. Returns (ok, reason)."""
+    import numpy as np
+    expected_fields = {
+        "schema_version", "workload_id", "generator_version", "gate_weight_format", "dims",
+        "routing_profile", "seed", "checksums", "activation_profile", "activation_generator",
+        "activation_identity",
+    }
+    if not isinstance(manifest, dict) or set(manifest) != expected_fields:
+        return False, "manifest fields differ from the v1 contract"
+    if (manifest["schema_version"] != WORKLOAD_SCHEMA_VERSION
+            or manifest["generator_version"] != GENERATOR_VERSION
+            or manifest["gate_weight_format"] != GATE_WEIGHT_FORMAT
+            or manifest["routing_profile"] not in {"uniform", "zipf"}):
+        return False, "manifest version or generator is unsupported"
+    if (isinstance(manifest["seed"], bool) or not isinstance(manifest["seed"], int)
+            or not identity.is_typed_id(manifest["workload_id"], "workload")):
+        return False, "manifest seed or workload ID is invalid"
+    dims = manifest["dims"]
+    dim_fields = {"hidden", "topk", "experts", "ep_size", "tokens_per_rank",
+                  "global_tokens", "experts_per_rank"}
+    if not isinstance(dims, dict) or set(dims) != dim_fields:
+        return False, "manifest dimensions are invalid"
+    if any(isinstance(dims[key], bool) or not isinstance(dims[key], int) or dims[key] <= 0
+           for key in dim_fields):
+        return False, "manifest dimensions must be positive integers"
+    if (dims["experts"] != dims["ep_size"] * dims["experts_per_rank"]
+            or dims["global_tokens"] != dims["ep_size"] * dims["tokens_per_rank"]):
+        return False, "manifest EP dimensions are inconsistent"
+    shape = (dims["global_tokens"], dims["topk"])
+    if (idx_np.dtype != np.int32 or weights_np.dtype != np.float32
+            or idx_np.shape != shape or weights_np.shape != shape
+            or not idx_np.flags.c_contiguous or not weights_np.flags.c_contiguous):
+        return False, "workload array dtype, shape, or layout is invalid"
+    if (np.any(idx_np < 0) or np.any(idx_np >= dims["experts"])
+            or np.any(np.diff(np.sort(idx_np, axis=1), axis=1) == 0)):
+        return False, "expert indices are out of range or repeated"
+    if (not np.isfinite(weights_np).all() or np.any(weights_np < 0)
+            or not np.allclose(weights_np.sum(axis=1), 1.0, rtol=1e-5, atol=1e-6)):
+        return False, "gate weights are invalid"
+    if (manifest["activation_profile"] != "canonical-counter-source-v3"
+            or manifest["activation_generator"] != ACTIVATION_GENERATOR
+            or manifest["activation_identity"]
+            != compute_activation_identity(
+                manifest["seed"], dims["hidden"], manifest["activation_generator"]
+            )):
+        return False, "activation identity is invalid"
+    ib = idx_np.astype("<i4", copy=False).tobytes()
+    wb = weights_np.astype("<f4", copy=False).tobytes()
+    cs = manifest.get("checksums", {})
+    if set(cs) != {"topk_idx", "topk_weights", "trace"}:
+        return False, "checksum fields are invalid"
+    if _sha256(ib) != cs.get("topk_idx"):
+        return False, "topk_idx hash differs"
+    if _sha256(wb) != cs.get("topk_weights"):
+        return False, "topk_weights hash differs"
+    if _sha256(ib + wb) != cs.get("trace"):
+        return False, "trace hash differs"
+    wid = compute_workload_id(
+        manifest["routing_profile"], manifest["dims"]["hidden"],
+        manifest["dims"]["topk"], manifest["dims"]["experts"],
+        manifest["dims"]["ep_size"], manifest["dims"]["global_tokens"], manifest["seed"],
+        manifest.get("generator_version", GENERATOR_VERSION), trace_checksum=cs["trace"],
+    )
+    if wid != manifest["workload_id"]:
+        return False, f"workload_id mismatch (recomputed {wid} != {manifest['workload_id']})"
+    return True, "ok"
+
+
+# --------------------------------------------------------------------------- self-test
+if __name__ == "__main__":
+    import sys
+    import tempfile
+    # (1) workload_id determinism + sensitivity — pure stdlib, always runs.
+    a = compute_workload_id("zipf", 7168, 8, 256, 8, 4096, 67)
+    b = compute_workload_id("zipf", 7168, 8, 256, 8, 4096, 67)
+    c = compute_workload_id("uniform", 7168, 8, 256, 8, 4096, 67)
+    assert a == b, "workload_id must be deterministic"
+    assert a != c, "workload_id must depend on routing"
+    print(f"workload_id determinism OK (zipf={a} uniform={c})")
+    # (2) build/save/load/verify roundtrip + cross-build identity — needs torch+numpy.
+    try:
+        import numpy as np  # noqa: F401
+        idx, w, man = build_workload(7168, 8, 256, "zipf", 512, 67, 32)
+        with tempfile.TemporaryDirectory() as d:
+            wid = save_workload(d, idx, w, man)
+            idx2, w2, man2 = load_workload(os.path.join(d, f"{wid}.npz"), verify=True)
+            assert (idx2 == idx).all() and (w2 == w).all(), "roundtrip array mismatch"
+            ok, reason = verify_workload(man2, idx2, w2)
+            assert ok, reason
+            # tamper -> must fail
+            idx2[0, 0] = (int(idx2[0, 0]) + 1) % 256
+            bad, _ = verify_workload(man2, idx2, w2)
+            assert not bad, "verify must catch tampering"
+        print(f"save/load/verify roundtrip OK (workload_id={wid})")
+    except ImportError:
+        print("(numpy unavailable — skipped serialization roundtrip; id logic passed)")
+    print("workload self-test: PASS")
+    sys.exit(0)