aetheronhq · jacsamell · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/python/cube/commands/auto_approve.py b/python/cube/commands/auto_approve.py
@@ -27,12 +27,13 @@ class GateResult:
     reasons: list[str]  # reasons the gate did NOT auto-approve (empty when APPROVE)
 
 
-def _gh_json(args: list[str], cwd: Optional[str] = None) -> Any:
+def _gh_json(args: list[str], cwd: Optional[str] = None, env: Optional[dict] = None) -> Any:
     result = subprocess.run(
         ["gh", *args],
         capture_output=True,
         text=True,
         cwd=cwd,
+        env=env,
         check=False,
     )
     if result.returncode != 0:
@@ -112,7 +113,29 @@ def _pr_author(pr_number: int, cwd: Optional[str] = None) -> str:
 
 
 def _cube_login(cwd: Optional[str] = None) -> Optional[str]:
-    """Return the GitHub login of the gh-CLI's authenticated user (the 'cube identity')."""
+    """Return the GitHub login of the identity that posts reviews — the 'cube identity'.
+
+    When the GitHub App is configured (cube reviews post as `the-agent-cube[bot]`),
+    that bot is the cube identity, not the operator's gh-auth user.
+
+    The author guard exists so the bot can't auto-approve its own PRs. Returning
+    the operator's login here previously blocked operators from auto-approving
+    any PR they authored — wrong: as long as the operator isn't the bot, the
+    bot reviewing a human-authored PR is exactly the intended flow.
+    """
+    from ..github.app_auth import get_app_login, is_configured
+
+    if is_configured():
+        login = get_app_login()
+        if login:
+            return login
+        # If the App is configured but we couldn't fetch its login, fail closed:
+        # don't pretend the operator is the cube identity.
+        return None
+
+    # No App configured — fall back to the operator. Single-developer setups
+    # where cube reviews post as a real user (the operator) still need the
+    # self-approve guard.
     try:
         data = _gh_json(["api", "user"], cwd=cwd)
         return (data or {}).get("login")

diff --git a/python/cube/commands/peer_review.py b/python/cube/commands/peer_review.py
@@ -207,7 +207,9 @@ def _confirm_review_merged_pr(pr_number: int) -> bool:
     return typer.confirm("Are you sure you want to review this already merged PR?", default=False)
 
 
-def _ensure_pr_review_worktree(pr_number: int, head_branch: str, base_branch: str) -> tuple[Path, list[str]]:
+def _ensure_pr_review_worktree(
+    pr_number: int, head_branch: str, base_branch: str, expected_head_sha: str = ""
+) -> tuple[Path, list[str]]:
     """Sync a dedicated review worktree to origin/<head_branch>.
 
     Returns the worktree path AND the list of files this PR touches (relative
@@ -216,35 +218,49 @@ def _ensure_pr_review_worktree(pr_number: int, head_branch: str, base_branch: st
 
     The worktree is reused across panel runs for the same PR — judges read
     files directly via Read instead of reasoning over diff blobs.
+
+    Aborts hard if the worktree ends up at a SHA different from the PR head.
+    The previous behaviour (silent fall-through on fetch/reset failure) meant
+    judges Read pre-rewrite content for any force-pushed PR, producing 10+
+    findings against code that no longer existed.
     """
     import subprocess
 
     project = Path(PROJECT_ROOT).name
     worktree = Path.home() / ".cube" / "worktrees" / project / f"pr-review-{pr_number}"
 
-    # Make sure origin has the head branch.
+    # --force handles force-pushes (rebases, squashes). Without it, the local
+    # ref refuses to update on non-fast-forward and the worktree stays stale.
     fetch = subprocess.run(
-        ["git", "fetch", "origin", head_branch, base_branch],
+        ["git", "fetch", "--force", "origin", head_branch, base_branch],
         cwd=PROJECT_ROOT,
         capture_output=True,
         text=True,
         timeout=120,
     )
     if fetch.returncode != 0:
-        # Not fatal — branch may be a fork; we fall back to repo refs.
-        print_warning(f"git fetch origin {head_branch}: {fetch.stderr.strip()}")
+        raise RuntimeError(
+            f"git fetch --force origin {head_branch} {base_branch} failed: "
+            f"{fetch.stderr.strip()}. Cannot proceed without latest PR refs — "
+            "judges would review stale content."
+        )
 
     if worktree.exists():
-        # Re-sync existing worktree to latest origin/<head>.
-        reset = subprocess.run(
+        # Re-sync existing worktree to latest origin/<head>. Clean untracked
+        # files too — leftover state from a prior round can mask file moves.
+        for cmd in (
             ["git", "reset", "--hard", f"origin/{head_branch}"],
-            cwd=worktree,
-            capture_output=True,
-            text=True,
-            timeout=60,
-        )
-        if reset.returncode != 0:
-            print_warning(f"Could not reset PR review worktree: {reset.stderr.strip()}")
+            ["git", "clean", "-fdx"],
+        ):
+            result = subprocess.run(
+                cmd,
+                cwd=worktree,
+                capture_output=True,
+                text=True,
+                timeout=60,
+            )
+            if result.returncode != 0:
+                raise RuntimeError(f"PR review worktree sync failed ({' '.join(cmd)}): " f"{result.stderr.strip()}")
     else:
         worktree.parent.mkdir(parents=True, exist_ok=True)
         add = subprocess.run(
@@ -255,8 +271,27 @@ def _ensure_pr_review_worktree(pr_number: int, head_branch: str, base_branch: st
             timeout=120,
         )
         if add.returncode != 0:
-            print_warning(f"Could not create PR review worktree: {add.stderr.strip()}")
-            return worktree, []
+            raise RuntimeError(f"Could not create PR review worktree at {worktree}: {add.stderr.strip()}")
+
+    # Verify the worktree HEAD matches the PR head. If not, abort loud — we'd
+    # rather fail the run than silently review the wrong commit.
+    head_check = subprocess.run(
+        ["git", "rev-parse", "HEAD"],
+        cwd=worktree,
+        capture_output=True,
+        text=True,
+        timeout=10,
+    )
+    if head_check.returncode != 0:
+        raise RuntimeError(f"Could not read worktree HEAD: {head_check.stderr.strip()}")
+    worktree_sha = head_check.stdout.strip()
+    if expected_head_sha and worktree_sha != expected_head_sha:
+        raise RuntimeError(
+            f"PR review worktree HEAD ({worktree_sha[:8]}) does not match PR head "
+            f"({expected_head_sha[:8]}). Refusing to run judges against stale content. "
+            f"This usually means `git fetch` got a stale ref — try again, or "
+            f"delete {worktree} and rerun."
+        )
 
     # Enumerate files this PR touches (added/modified/renamed; not deleted —
     # judges can't Read a deleted file).
@@ -339,10 +374,17 @@ def _run_pr_review(
 
     # Sync a dedicated review worktree to origin/<head> so judges Read files
     # directly. Also enumerate the PR's touched files once for routing.
-    print_info(f"Syncing PR review worktree for #{pr_number}...")
-    review_worktree, pr_files = _ensure_pr_review_worktree(pr_number, pr.head_branch, pr.base_branch)
+    print_info(f"Syncing PR review worktree for #{pr_number} → {pr.head_sha[:8]}...")
+    try:
+        review_worktree, pr_files = _ensure_pr_review_worktree(
+            pr_number, pr.head_branch, pr.base_branch, expected_head_sha=pr.head_sha
+        )
+    except RuntimeError as e:
+        print_error(f"PR review worktree sync failed: {e}")
+        raise typer.Exit(1) from e
+
     if pr_files:
-        console.print(f"[dim]Worktree: {review_worktree} ({len(pr_files)} files)[/dim]")
+        console.print(f"[dim]Worktree: {review_worktree} @ {pr.head_sha[:8]} ({len(pr_files)} files)[/dim]")
     else:
         print_warning("Could not enumerate PR files — judges will see full diff (no routing)")
 

diff --git a/python/cube/github/app_auth.py b/python/cube/github/app_auth.py
@@ -253,3 +253,46 @@ def env_with_app_token(base_env: Optional[dict[str, str]] = None) -> dict[str, s
 def is_configured() -> bool:
     """Cheap check for callers that want to log whether the App path is active."""
     return GitHubAppConfig.load() is not None
+
+
+# Cache the App's bot login so callers (auto-approve gate, etc.) don't pay a
+# JWT round-trip on every call. The slug is immutable per App.
+_app_login_cache: dict[str, str] = {}
+
+
+def get_app_login(cfg: Optional[GitHubAppConfig] = None) -> Optional[str]:
+    """Return the GitHub bot login for the configured App (e.g. `the-agent-cube[bot]`).
+
+    GitHub Apps can't call `/user` — they're not users. The `<slug>[bot]`
+    convention is reliable: bot login is always the App's slug + `[bot]`.
+    Reads the slug from `/app` (JWT-authenticated) and caches it.
+    """
+    cfg = cfg or GitHubAppConfig.load()
+    if cfg is None:
+        return None
+    cached = _app_login_cache.get(cfg.app_id)
+    if cached:
+        return cached
+
+    jwt = _mint_jwt(cfg)
+    req = urllib.request.Request(
+        "https://api.github.com/app",
+        method="GET",
+        headers={
+            "Authorization": f"Bearer {jwt}",
+            "Accept": "application/vnd.github+json",
+            "User-Agent": "agent-cube",
+        },
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            data = json.loads(resp.read())
+    except (urllib.error.HTTPError, urllib.error.URLError):
+        return None
+
+    slug = (data or {}).get("slug")
+    if not slug:
+        return None
+    login = f"{slug}[bot]"
+    _app_login_cache[cfg.app_id] = login
+    return login