diff --git a/README.md b/README.md
index b316fce..4a099ef 100644
--- a/README.md
+++ b/README.md
@@ -186,9 +186,11 @@ gh llm doctor
`doctor` prints the current entrypoint, resolved executable paths, `gh` / `gh-llm` versions,
active-host `gh auth status`, a REST probe, a minimal GraphQL probe, and proxy-related environment variables.
+If `gh auth status` is noisy but both API probes succeed, `doctor` reports that auth check as a warning instead
+of failing the whole diagnosis.
When `gh-llm` hits transport errors such as GraphQL `EOF` / timeout failures, the CLI now reports the
-retry count and suggests concrete follow-up commands such as `gh auth status`,
+retry count and suggests concrete follow-up probes such as `gh api user`,
`gh api graphql -f query='query{viewer{login}}'`, and `gh-llm doctor`.
## PR Review Workflow
diff --git a/src/gh_llm/commands/doctor.py b/src/gh_llm/commands/doctor.py
index 42913a6..312a5c2 100644
--- a/src/gh_llm/commands/doctor.py
+++ b/src/gh_llm/commands/doctor.py
@@ -64,12 +64,22 @@ def cmd_doctor(_: Any) -> int:
entrypoint = display_command()
argv0 = detect_prog_name(sys.argv[0])
target_host = resolve_target_host()
+ entrypoint_probe = _probe_entrypoint_version(entrypoint)
+ gh_version_probe = _probe_gh_version()
+ auth_status_probe = _probe_auth_status(target_host)
+ rest_user_probe = _probe_rest_user()
+ graphql_viewer_probe = _probe_graphql_viewer()
+ auth_status_probe = _reconcile_auth_status_probe(
+ auth_status_probe,
+ rest_user_probe=rest_user_probe,
+ graphql_viewer_probe=graphql_viewer_probe,
+ )
critical_probes = (
- _probe_entrypoint_version(entrypoint),
- _probe_gh_version(),
- _probe_auth_status(target_host),
- _probe_rest_user(),
- _probe_graphql_viewer(),
+ entrypoint_probe,
+ gh_version_probe,
+ auth_status_probe,
+ rest_user_probe,
+ graphql_viewer_probe,
)
failed = [probe.name for probe in critical_probes if not probe.ok and probe.critical]
@@ -179,6 +189,30 @@ def _probe_auth_status(target_host: str) -> _ProbeResult:
)
+def _reconcile_auth_status_probe(
+ auth_status_probe: _ProbeResult,
+ *,
+ rest_user_probe: _ProbeResult,
+ graphql_viewer_probe: _ProbeResult,
+) -> _ProbeResult:
+ if auth_status_probe.ok or not (rest_user_probe.ok and graphql_viewer_probe.ok):
+ return auth_status_probe
+
+ detail_parts = [
+ part
+ for part in (auth_status_probe.detail.strip(), "API probes succeeded; treating auth status as a warning.")
+ if part
+ ]
+ return _ProbeResult(
+ name=auth_status_probe.name,
+ command=auth_status_probe.command,
+ ok=False,
+ summary="warning (API probes ok)",
+ detail="\n\n".join(detail_parts),
+ critical=False,
+ )
+
+
def _probe_rest_user() -> _ProbeResult:
command = ["gh", "api", "user"]
result = _run_command(command)
diff --git a/src/gh_llm/diagnostics.py b/src/gh_llm/diagnostics.py
index 069d855..f6049bc 100644
--- a/src/gh_llm/diagnostics.py
+++ b/src/gh_llm/diagnostics.py
@@ -19,6 +19,38 @@
("gh", "pr", "view"),
("gh", "issue", "view"),
}
+TRANSPORT_ERROR_PATTERNS = (
+ 'post "https://api.github.com/graphql": eof',
+ "eof",
+ "timeout",
+ "i/o timeout",
+ "context deadline exceeded",
+ "client.timeout exceeded",
+ "request canceled",
+ "tls handshake timeout",
+ "remote error: tls",
+ "connection reset",
+ "connection reset by peer",
+ "connection refused",
+ "connection closed",
+ "connection aborted",
+ "broken pipe",
+ "temporary failure",
+ "temporarily unavailable",
+ "network is unreachable",
+ "server misbehaving",
+ "stream error",
+ "goaway",
+ "proxyconnect",
+ "http 500",
+ "http 502",
+ "http 503",
+ "http 504",
+ "500 internal server error",
+ "502 bad gateway",
+ "503 service unavailable",
+ "504 gateway timeout",
+)
class GhCommandError(RuntimeError):
@@ -82,6 +114,11 @@ def format_command_error(error: GhCommandError) -> list[str]:
return lines
+def looks_like_transport_error(message: str) -> bool:
+ lowered = message.lower()
+ return any(pattern in lowered for pattern in TRANSPORT_ERROR_PATTERNS)
+
+
def _diagnose_command_error(error: GhCommandError) -> _Diagnosis:
lowered = str(error).lower()
if _looks_like_rate_limit_error(lowered):
@@ -105,17 +142,17 @@ def _diagnose_command_error(error: GhCommandError) -> _Diagnosis:
),
)
- if _is_graphql_backed_command(error.cmd) and _looks_like_transport_error(lowered):
+ if _is_graphql_backed_command(error.cmd) and looks_like_transport_error(lowered):
attempt_suffix = _format_attempt_suffix(error)
return _Diagnosis(
headline=f"GitHub GraphQL request failed{attempt_suffix}.",
category="GraphQL transport / network",
explanation=(
"The request appears to have failed while GitHub GraphQL data was being fetched. "
- "This usually points to transient network, proxy, TLS, or GitHub-side transport issues."
+ "This usually points to transient network, proxy, TLS, or GitHub-side transport issues. "
+ "The direct REST and GraphQL probes are the useful source of truth here."
),
next_commands=(
- _auth_status_command(),
_REST_PROBE_COMMAND,
_GRAPHQL_PROBE_COMMAND,
display_command_with("doctor"),
@@ -152,21 +189,6 @@ def _is_graphql_backed_command(cmd: Sequence[str]) -> bool:
return tuple(str(part) for part in cmd[:3]) in _GRAPHQL_BACKED_COMMANDS
-def _looks_like_transport_error(lowered: str) -> bool:
- patterns = (
- 'post "https://api.github.com/graphql": eof',
- "eof",
- "timeout",
- "tls handshake timeout",
- "connection reset",
- "connection refused",
- "temporary failure",
- "network is unreachable",
- "server misbehaving",
- )
- return any(pattern in lowered for pattern in patterns)
-
-
def _looks_like_auth_error(lowered: str) -> bool:
patterns = (
"authentication failed",
diff --git a/src/gh_llm/github_api.py b/src/gh_llm/github_api.py
index 57ae90c..b397f31 100644
--- a/src/gh_llm/github_api.py
+++ b/src/gh_llm/github_api.py
@@ -13,7 +13,7 @@
from typing import TYPE_CHECKING, cast
from urllib.parse import quote, urlparse
-from gh_llm.diagnostics import GhCommandError
+from gh_llm.diagnostics import GhCommandError, looks_like_transport_error
from gh_llm.invocation import display_command, display_command_with
from gh_llm.models import (
CheckItem,
@@ -37,9 +37,10 @@
MAX_INLINE_TEXT = 8000
MAX_INLINE_LINES = 200
DEFAULT_REVIEW_DIFF_HUNK_LINES = 12
-GRAPHQL_MAX_ATTEMPTS = 4
+GRAPHQL_MAX_ATTEMPTS = 6
+GRAPHQL_MUTATION_MAX_ATTEMPTS = 4
GRAPHQL_BACKOFF_BASE_SECONDS = 0.25
-GRAPHQL_BACKOFF_MAX_SECONDS = 2.0
+GRAPHQL_BACKOFF_MAX_SECONDS = 4.0
DETAILS_BLOCK_RE = re.compile(r"(?is)]*>(.*?) ")
SUMMARY_RE = re.compile(r"(?is)]*>(.*?)")
HTML_TAG_RE = re.compile(r"(?is)<[^>]+>")
@@ -1528,7 +1529,12 @@ def _try_update_pull_request_review_comment(self, *, comment_id: str, body: str)
return updated_id or None
def _get_viewer_login(self) -> str:
- payload = _run_command_json(["gh", "api", "user"])
+ payload = _run_command_json(
+ ["gh", "api", "user"],
+ max_attempts=GRAPHQL_MAX_ATTEMPTS,
+ backoff_base_seconds=GRAPHQL_BACKOFF_BASE_SECONDS,
+ backoff_max_seconds=GRAPHQL_BACKOFF_MAX_SECONDS,
+ )
login = _as_optional_str(payload.get("login"))
return login or ""
@@ -2160,7 +2166,7 @@ def _run_graphql_payload(query: str, variables: dict[str, str | int]) -> dict[st
cmd.extend(["-F", f"{key}={value}"])
return _run_command_json(
cmd,
- max_attempts=GRAPHQL_MAX_ATTEMPTS,
+ max_attempts=_graphql_query_max_attempts(query),
backoff_base_seconds=GRAPHQL_BACKOFF_BASE_SECONDS,
backoff_max_seconds=GRAPHQL_BACKOFF_MAX_SECONDS,
)
@@ -2175,12 +2181,18 @@ def _run_graphql_payload_any(query: str, variables: dict[str, object]) -> dict[s
cmd.extend(["-F", f"{key}={value}"])
return _run_command_json(
cmd,
- max_attempts=GRAPHQL_MAX_ATTEMPTS,
+ max_attempts=_graphql_query_max_attempts(query),
backoff_base_seconds=GRAPHQL_BACKOFF_BASE_SECONDS,
backoff_max_seconds=GRAPHQL_BACKOFF_MAX_SECONDS,
)
+def _graphql_query_max_attempts(query: str) -> int:
+ if query.lstrip().startswith("mutation"):
+ return GRAPHQL_MUTATION_MAX_ATTEMPTS
+ return GRAPHQL_MAX_ATTEMPTS
+
+
def _run_command_json(
cmd: list[str],
*,
@@ -2199,7 +2211,8 @@ def _run_command_json(
return {str(k): v for k, v in raw.items()}
stderr = result.stderr.strip()
- if attempt >= attempts or not _is_retryable_gh_error(stderr):
+ error_output = _combine_command_error_output(result.stderr, result.stdout)
+ if attempt >= attempts or not looks_like_transport_error(error_output):
raise GhCommandError(
cmd=cmd,
stderr=stderr,
@@ -2228,7 +2241,8 @@ def _run_command_json_any(
return json.loads(result.stdout)
stderr = result.stderr.strip()
- if attempt >= attempts or not _is_retryable_gh_error(stderr):
+ error_output = _combine_command_error_output(result.stderr, result.stdout)
+ if attempt >= attempts or not looks_like_transport_error(error_output):
raise GhCommandError(
cmd=cmd,
stderr=stderr,
@@ -2256,7 +2270,8 @@ def _run_command_text(
if result.returncode == 0:
return result.stdout
stderr = result.stderr.strip()
- if attempt >= attempts or not _is_retryable_gh_error(stderr):
+ error_output = _combine_command_error_output(result.stderr, result.stdout)
+ if attempt >= attempts or not looks_like_transport_error(error_output):
raise GhCommandError(
cmd=cmd,
stderr=stderr,
@@ -3656,18 +3671,8 @@ def _reaction_emoji(content: str) -> str:
return mapping.get(content, "")
-def _is_retryable_gh_error(stderr: str) -> bool:
- lowered = stderr.lower()
- retryable_patterns = (
- 'post "https://api.github.com/graphql": eof',
- "eof",
- "timeout",
- "tls handshake timeout",
- "connection reset",
- "connection refused",
- "temporary failure",
- )
- return any(pattern in lowered for pattern in retryable_patterns)
+def _combine_command_error_output(stderr: str, stdout: str) -> str:
+ return "\n".join(part.strip() for part in (stderr, stdout) if part.strip())
def _is_check_run_passed(*, status: str, conclusion: str | None) -> bool:
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 348f6a2..92e405d 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -522,6 +522,40 @@ def fake_run(cmd: list[str], *, check: bool, capture_output: bool, text: bool) -
assert ["gh", "auth", "status"] not in calls
+def test_doctor_warns_when_auth_status_fails_but_api_probes_pass(
+ monkeypatch: pytest.MonkeyPatch,
+ capsys: pytest.CaptureFixture[str],
+) -> None:
+ entrypoint_version = __version__
+
+ def fake_run(cmd: list[str], *, check: bool, capture_output: bool, text: bool) -> FakeCompletedProcess:
+ del check, capture_output, text
+ if cmd == ["gh", "llm", "--version"]:
+ return FakeCompletedProcess(f"{entrypoint_version}\n")
+ if cmd == ["gh", "--version"]:
+ return FakeCompletedProcess("gh version test-build\n")
+ if cmd == ["gh", "auth", "status", "--active", "--hostname", "github.com"]:
+ return FakeCompletedProcess("", returncode=1, stderr="token is invalid")
+ if cmd == ["gh", "api", "user"]:
+ return FakeCompletedProcess(json.dumps({"login": "ShigureNyako"}))
+ if cmd[:3] == ["gh", "api", "graphql"]:
+ return FakeCompletedProcess(json.dumps({"data": {"viewer": {"login": "ShigureNyako"}}}))
+ return FakeCompletedProcess("", returncode=1, stderr="unexpected command")
+
+ monkeypatch.setattr(doctor_commands.subprocess, "run", fake_run)
+ monkeypatch.setenv("GH_LLM_DISPLAY_CMD", "gh llm")
+ monkeypatch.setattr(sys, "argv", ["gh-llm"])
+
+ code = cli.run(["doctor"])
+ assert code == 0
+ out = capsys.readouterr().out
+ assert "- auth status (`gh auth status --active --hostname github.com`): warning (API probes ok)" in out
+ assert "token is invalid" in out
+ assert "API probes succeeded; treating auth status as a warning." in out
+ assert "status: ok" in out
+ assert "failed_checks:" not in out
+
+
def test_parse_event_indexes_batch() -> None:
assert cli.parse_event_indexes(["5,11", "8-6"]) == [5, 6, 7, 8, 11]
@@ -2664,6 +2698,86 @@ def no_sleep(_: float) -> None:
assert state["failed_once"] is True
+def test_graphql_stream_error_retries_with_backoff(
+ monkeypatch: pytest.MonkeyPatch,
+ capsys: pytest.CaptureFixture[str],
+) -> None:
+ responder = GhResponder()
+ state = {"failed_once": False}
+
+ def flaky_run(cmd: list[str], *, check: bool, capture_output: bool, text: bool) -> FakeCompletedProcess:
+ if cmd[:3] == ["gh", "api", "graphql"] and not state["failed_once"]:
+ state["failed_once"] = True
+ return FakeCompletedProcess("", returncode=1, stderr="stream error: stream ID 1; INTERNAL_ERROR")
+ return responder.run(cmd, check=check, capture_output=capture_output, text=text)
+
+ def no_sleep(_: float) -> None:
+ return None
+
+ monkeypatch.setattr(github_api.subprocess, "run", flaky_run)
+ monkeypatch.setattr(github_api.time, "sleep", no_sleep)
+
+ code = cli.run(["pr", "view", "77928", "--repo", "PaddlePaddle/Paddle", "--page-size", "2"])
+ assert code == 0
+ out = capsys.readouterr().out
+ assert "### Page 1/4" in out
+ assert state["failed_once"] is True
+
+
+def test_rest_viewer_login_eof_retries_with_backoff(
+ monkeypatch: pytest.MonkeyPatch,
+ capsys: pytest.CaptureFixture[str],
+) -> None:
+ responder = GhResponder()
+ state = {"failed_once": False}
+
+ def flaky_run(cmd: list[str], *, check: bool, capture_output: bool, text: bool) -> FakeCompletedProcess:
+ if cmd == ["gh", "api", "user"] and not state["failed_once"]:
+ state["failed_once"] = True
+ return FakeCompletedProcess("", returncode=1, stderr='Get "https://api.github.com/user": EOF')
+ return responder.run(cmd, check=check, capture_output=capture_output, text=text)
+
+ def no_sleep(_: float) -> None:
+ return None
+
+ monkeypatch.setattr(github_api.subprocess, "run", flaky_run)
+ monkeypatch.setattr(github_api.time, "sleep", no_sleep)
+
+ code = cli.run(["pr", "view", "77928", "--repo", "PaddlePaddle/Paddle", "--page-size", "2"])
+ assert code == 0
+ out = capsys.readouterr().out
+ assert "### Page 1/4" in out
+ assert state["failed_once"] is True
+
+
+def test_graphql_mutation_transport_failure_uses_mutation_retry_cap(
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ state = {"attempts": 0}
+
+ def failing_run(cmd: list[str], *, check: bool, capture_output: bool, text: bool) -> FakeCompletedProcess:
+ del check, capture_output, text
+ if cmd[:3] == ["gh", "api", "graphql"]:
+ state["attempts"] += 1
+ return FakeCompletedProcess("", returncode=1, stderr='Post "https://api.github.com/graphql": EOF')
+ return FakeCompletedProcess("", returncode=1, stderr="unexpected command")
+
+ def no_sleep(_: float) -> None:
+ return None
+
+ monkeypatch.setattr(github_api.subprocess, "run", failing_run)
+ monkeypatch.setattr(github_api.time, "sleep", no_sleep)
+
+ try:
+ github_api.GitHubClient().resolve_review_thread("PRRT_retry_cap")
+ except RuntimeError as error:
+ assert 'Post "https://api.github.com/graphql": EOF' in str(error)
+ else:
+ raise AssertionError("expected mutation transport failure")
+
+ assert state["attempts"] == github_api.GRAPHQL_MUTATION_MAX_ATTEMPTS
+
+
def test_graphql_eof_failure_prints_layered_diagnostics(
monkeypatch: pytest.MonkeyPatch,
capsys: pytest.CaptureFixture[str],
@@ -2685,24 +2799,24 @@ def no_sleep(_: float) -> None:
code = cli.run(["pr", "view", "77928", "--repo", "PaddlePaddle/Paddle", "--page-size", "2"])
assert code == 1
err = capsys.readouterr().err
- assert "error: GitHub GraphQL request failed after 4 attempts." in err
+ assert "error: GitHub GraphQL request failed after 6 attempts." in err
assert 'Last error: Post "https://api.github.com/graphql": EOF' in err
assert "Category: GraphQL transport / network" in err
assert "Command: gh api graphql" in err
assert "Try next:" in err
- assert "- gh auth status --active --hostname github.com" in err
assert "- gh api user" in err
assert "- gh api graphql -f query='query{viewer{login}}'" in err
assert "- gh llm doctor" in err
+ assert "gh auth status" not in err
-def test_graphql_error_hints_scope_auth_status_to_target_host(
+def test_graphql_error_hints_prefer_api_probes_over_auth_status(
monkeypatch: pytest.MonkeyPatch,
capsys: pytest.CaptureFixture[str],
) -> None:
def failing_run(cmd: list[str], *, check: bool, capture_output: bool, text: bool) -> FakeCompletedProcess:
del check, capture_output, text
- if cmd[:3] == ["gh", "api", "graphql"]:
+ if cmd[:3] in (["gh", "api", "graphql"], ["gh", "pr", "view"]):
return FakeCompletedProcess("", returncode=1, stderr='Post "https://api.github.com/graphql": EOF')
return FakeCompletedProcess("", returncode=1, stderr="unexpected command")
@@ -2717,8 +2831,10 @@ def no_sleep(_: float) -> None:
code = cli.run(["pr", "view", "77928", "--repo", "PaddlePaddle/Paddle", "--page-size", "2"])
assert code == 1
err = capsys.readouterr().err
- assert "- gh auth status --active --hostname github.example.com" in err
+ assert "- gh api user" in err
+ assert "- gh api graphql -f query='query{viewer{login}}'" in err
assert "- gh llm doctor" in err
+ assert "gh auth status" not in err
def test_pr_view_graphql_transport_error_uses_layered_diagnostics(
@@ -2743,9 +2859,9 @@ def no_sleep(_: float) -> None:
code = cli.run(["pr", "view", "77928", "--repo", "PaddlePaddle/Paddle", "--page-size", "2"])
assert code == 1
- assert state["attempts"] == 4
+ assert state["attempts"] == 6
err = capsys.readouterr().err
- assert "error: GitHub GraphQL request failed after 4 attempts." in err
+ assert "error: GitHub GraphQL request failed after 6 attempts." in err
assert "Category: GraphQL transport / network" in err
assert "Command: gh pr view" in err
assert "- gh llm doctor" in err
@@ -2773,9 +2889,9 @@ def no_sleep(_: float) -> None:
code = cli.run(["issue", "view", "77924", "--repo", "PaddlePaddle/Paddle", "--page-size", "2"])
assert code == 1
- assert state["attempts"] == 4
+ assert state["attempts"] == 6
err = capsys.readouterr().err
- assert "error: GitHub GraphQL request failed after 4 attempts." in err
+ assert "error: GitHub GraphQL request failed after 6 attempts." in err
assert "Category: GraphQL transport / network" in err
assert "Command: gh issue view" in err
assert "- gh llm doctor" in err