diff --git a/python/cube/commands/auto_approve.py b/python/cube/commands/auto_approve.py index a404be94..8208127e 100644 --- a/python/cube/commands/auto_approve.py +++ b/python/cube/commands/auto_approve.py @@ -27,12 +27,13 @@ class GateResult: reasons: list[str] # reasons the gate did NOT auto-approve (empty when APPROVE) -def _gh_json(args: list[str], cwd: Optional[str] = None) -> Any: +def _gh_json(args: list[str], cwd: Optional[str] = None, env: Optional[dict] = None) -> Any: result = subprocess.run( ["gh", *args], capture_output=True, text=True, cwd=cwd, + env=env, check=False, ) if result.returncode != 0: @@ -112,7 +113,29 @@ def _pr_author(pr_number: int, cwd: Optional[str] = None) -> str: def _cube_login(cwd: Optional[str] = None) -> Optional[str]: - """Return the GitHub login of the gh-CLI's authenticated user (the 'cube identity').""" + """Return the GitHub login of the identity that posts reviews — the 'cube identity'. + + When the GitHub App is configured (cube reviews post as `the-agent-cube[bot]`), + that bot is the cube identity, not the operator's gh-auth user. + + The author guard exists so the bot can't auto-approve its own PRs. Returning + the operator's login here previously blocked operators from auto-approving + any PR they authored — wrong: as long as the operator isn't the bot, the + bot reviewing a human-authored PR is exactly the intended flow. + """ + from ..github.app_auth import get_app_login, is_configured + + if is_configured(): + login = get_app_login() + if login: + return login + # If the App is configured but we couldn't fetch its login, fail closed: + # don't pretend the operator is the cube identity. + return None + + # No App configured — fall back to the operator. Single-developer setups + # where cube reviews post as a real user (the operator) still need the + # self-approve guard. try: data = _gh_json(["api", "user"], cwd=cwd) return (data or {}).get("login") diff --git a/python/cube/commands/peer_review.py b/python/cube/commands/peer_review.py index 6081c6fb..b56cbd89 100644 --- a/python/cube/commands/peer_review.py +++ b/python/cube/commands/peer_review.py @@ -207,7 +207,9 @@ def _confirm_review_merged_pr(pr_number: int) -> bool: return typer.confirm("Are you sure you want to review this already merged PR?", default=False) -def _ensure_pr_review_worktree(pr_number: int, head_branch: str, base_branch: str) -> tuple[Path, list[str]]: +def _ensure_pr_review_worktree( + pr_number: int, head_branch: str, base_branch: str, expected_head_sha: str = "" +) -> tuple[Path, list[str]]: """Sync a dedicated review worktree to origin/. Returns the worktree path AND the list of files this PR touches (relative @@ -216,35 +218,49 @@ def _ensure_pr_review_worktree(pr_number: int, head_branch: str, base_branch: st The worktree is reused across panel runs for the same PR — judges read files directly via Read instead of reasoning over diff blobs. + + Aborts hard if the worktree ends up at a SHA different from the PR head. + The previous behaviour (silent fall-through on fetch/reset failure) meant + judges Read pre-rewrite content for any force-pushed PR, producing 10+ + findings against code that no longer existed. """ import subprocess project = Path(PROJECT_ROOT).name worktree = Path.home() / ".cube" / "worktrees" / project / f"pr-review-{pr_number}" - # Make sure origin has the head branch. + # --force handles force-pushes (rebases, squashes). Without it, the local + # ref refuses to update on non-fast-forward and the worktree stays stale. fetch = subprocess.run( - ["git", "fetch", "origin", head_branch, base_branch], + ["git", "fetch", "--force", "origin", head_branch, base_branch], cwd=PROJECT_ROOT, capture_output=True, text=True, timeout=120, ) if fetch.returncode != 0: - # Not fatal — branch may be a fork; we fall back to repo refs. - print_warning(f"git fetch origin {head_branch}: {fetch.stderr.strip()}") + raise RuntimeError( + f"git fetch --force origin {head_branch} {base_branch} failed: " + f"{fetch.stderr.strip()}. Cannot proceed without latest PR refs — " + "judges would review stale content." + ) if worktree.exists(): - # Re-sync existing worktree to latest origin/. - reset = subprocess.run( + # Re-sync existing worktree to latest origin/. Clean untracked + # files too — leftover state from a prior round can mask file moves. + for cmd in ( ["git", "reset", "--hard", f"origin/{head_branch}"], - cwd=worktree, - capture_output=True, - text=True, - timeout=60, - ) - if reset.returncode != 0: - print_warning(f"Could not reset PR review worktree: {reset.stderr.strip()}") + ["git", "clean", "-fdx"], + ): + result = subprocess.run( + cmd, + cwd=worktree, + capture_output=True, + text=True, + timeout=60, + ) + if result.returncode != 0: + raise RuntimeError(f"PR review worktree sync failed ({' '.join(cmd)}): " f"{result.stderr.strip()}") else: worktree.parent.mkdir(parents=True, exist_ok=True) add = subprocess.run( @@ -255,8 +271,27 @@ def _ensure_pr_review_worktree(pr_number: int, head_branch: str, base_branch: st timeout=120, ) if add.returncode != 0: - print_warning(f"Could not create PR review worktree: {add.stderr.strip()}") - return worktree, [] + raise RuntimeError(f"Could not create PR review worktree at {worktree}: {add.stderr.strip()}") + + # Verify the worktree HEAD matches the PR head. If not, abort loud — we'd + # rather fail the run than silently review the wrong commit. + head_check = subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=worktree, + capture_output=True, + text=True, + timeout=10, + ) + if head_check.returncode != 0: + raise RuntimeError(f"Could not read worktree HEAD: {head_check.stderr.strip()}") + worktree_sha = head_check.stdout.strip() + if expected_head_sha and worktree_sha != expected_head_sha: + raise RuntimeError( + f"PR review worktree HEAD ({worktree_sha[:8]}) does not match PR head " + f"({expected_head_sha[:8]}). Refusing to run judges against stale content. " + f"This usually means `git fetch` got a stale ref — try again, or " + f"delete {worktree} and rerun." + ) # Enumerate files this PR touches (added/modified/renamed; not deleted — # judges can't Read a deleted file). @@ -339,10 +374,17 @@ def _run_pr_review( # Sync a dedicated review worktree to origin/ so judges Read files # directly. Also enumerate the PR's touched files once for routing. - print_info(f"Syncing PR review worktree for #{pr_number}...") - review_worktree, pr_files = _ensure_pr_review_worktree(pr_number, pr.head_branch, pr.base_branch) + print_info(f"Syncing PR review worktree for #{pr_number} → {pr.head_sha[:8]}...") + try: + review_worktree, pr_files = _ensure_pr_review_worktree( + pr_number, pr.head_branch, pr.base_branch, expected_head_sha=pr.head_sha + ) + except RuntimeError as e: + print_error(f"PR review worktree sync failed: {e}") + raise typer.Exit(1) from e + if pr_files: - console.print(f"[dim]Worktree: {review_worktree} ({len(pr_files)} files)[/dim]") + console.print(f"[dim]Worktree: {review_worktree} @ {pr.head_sha[:8]} ({len(pr_files)} files)[/dim]") else: print_warning("Could not enumerate PR files — judges will see full diff (no routing)") diff --git a/python/cube/github/app_auth.py b/python/cube/github/app_auth.py index c85387a5..2696e70d 100644 --- a/python/cube/github/app_auth.py +++ b/python/cube/github/app_auth.py @@ -253,3 +253,46 @@ def env_with_app_token(base_env: Optional[dict[str, str]] = None) -> dict[str, s def is_configured() -> bool: """Cheap check for callers that want to log whether the App path is active.""" return GitHubAppConfig.load() is not None + + +# Cache the App's bot login so callers (auto-approve gate, etc.) don't pay a +# JWT round-trip on every call. The slug is immutable per App. +_app_login_cache: dict[str, str] = {} + + +def get_app_login(cfg: Optional[GitHubAppConfig] = None) -> Optional[str]: + """Return the GitHub bot login for the configured App (e.g. `the-agent-cube[bot]`). + + GitHub Apps can't call `/user` — they're not users. The `[bot]` + convention is reliable: bot login is always the App's slug + `[bot]`. + Reads the slug from `/app` (JWT-authenticated) and caches it. + """ + cfg = cfg or GitHubAppConfig.load() + if cfg is None: + return None + cached = _app_login_cache.get(cfg.app_id) + if cached: + return cached + + jwt = _mint_jwt(cfg) + req = urllib.request.Request( + "https://api.github.com/app", + method="GET", + headers={ + "Authorization": f"Bearer {jwt}", + "Accept": "application/vnd.github+json", + "User-Agent": "agent-cube", + }, + ) + try: + with urllib.request.urlopen(req, timeout=15) as resp: + data = json.loads(resp.read()) + except (urllib.error.HTTPError, urllib.error.URLError): + return None + + slug = (data or {}).get("slug") + if not slug: + return None + login = f"{slug}[bot]" + _app_login_cache[cfg.app_id] = login + return login