Skip to content

Commit c0c19f5

Browse files
committed
fix(sync/git[remote]) Drop git remote show to avoid ref enumeration hang
why: `GitSync.remote()` relied on `git remote show -n <name>`, whose output begins with the fetch/push URLs but then enumerates every remote-tracking ref in the repository. Repositories like openai/codex (2,400+ branches) ship that list as ~2,500 lines through libvcs's real-time progress callback, which read stderr in 128-byte chunks inside a busy-poll loop. On WSL2 the combination stalled vcspull sync for long enough to look like a hang. The URL lookup itself only needs the two URL lines, so reading from the cached remote manager (`git remote -v`, O(remotes)) is enough. what: - Replace the `cmd.remotes.show(..., log_in_real_time=True)` call with `self.cmd.remotes.get(remote_name=name)`, using the fetch_url / push_url already populated by `git remote -v` parsing. - Fall back to the fetch URL when no explicit pushurl is configured, matching git's own behaviour. - Add `test_remote_is_fast_for_repos_with_many_refs` seeding 500 synthetic refs/remotes/origin/* refs and asserting `remote("origin")` returns in under 5 seconds. This would have been pathological on the old path.
1 parent 03e544a commit c0c19f5

2 files changed

Lines changed: 70 additions & 18 deletions

File tree

src/libvcs/sync/git.py

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -673,25 +673,34 @@ def remote(self, name: str, **kwargs: t.Any) -> GitRemote | None:
673673
Returns
674674
-------
675675
Remote name and url in tuple form
676+
677+
Notes
678+
-----
679+
Uses ``git remote -v`` (via the cached remote manager) instead of
680+
``git remote show -n``. ``git remote show`` also enumerates every
681+
remote-tracking ref, which pipes thousands of lines through the
682+
subprocess progress callback for repositories with large branch
683+
counts (e.g. ``openai/codex`` at 2,400+ refs) and can appear to
684+
hang. ``git remote -v`` is O(remotes) and cannot block on ref
685+
enumeration.
676686
"""
677-
try:
678-
ret = self.cmd.remotes.show(
679-
name=name,
680-
no_query_remotes=True,
681-
log_in_real_time=True,
682-
)
683-
lines = ret.split("\n")
684-
remote_fetch_url = lines[1].replace("Fetch URL: ", "").strip()
685-
remote_push_url = lines[2].replace("Push URL: ", "").strip()
686-
if name not in {remote_fetch_url, remote_push_url}:
687-
return GitRemote(
688-
name=name,
689-
fetch_url=remote_fetch_url,
690-
push_url=remote_push_url,
691-
)
692-
except exc.LibVCSException:
693-
pass
694-
return None
687+
remote_cmd = self.cmd.remotes.get(remote_name=name, default=None)
688+
if remote_cmd is None:
689+
return None
690+
691+
fetch_url = (remote_cmd.fetch_url or "").strip()
692+
if not fetch_url:
693+
return None
694+
695+
# When no explicit ``remote.<name>.pushurl`` is set, git itself falls
696+
# back to the fetch URL -- mirror that so callers always see a URL.
697+
push_url = (remote_cmd.push_url or fetch_url).strip()
698+
699+
return GitRemote(
700+
name=name,
701+
fetch_url=fetch_url,
702+
push_url=push_url,
703+
)
695704

696705
def set_remote(
697706
self,

tests/sync/test_git.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1666,3 +1666,46 @@ def test_sync_result_multiple_errors() -> None:
16661666
assert len(result.errors) == 2
16671667
assert result.errors[0].step == "fetch"
16681668
assert result.errors[1].step == "checkout"
1669+
1670+
1671+
def test_remote_is_fast_for_repos_with_many_refs(
1672+
git_repo: GitSync,
1673+
tmp_path: pathlib.Path,
1674+
) -> None:
1675+
"""``GitSync.remote`` stays O(1) even when the repo has thousands of refs.
1676+
1677+
The prior implementation called ``git remote show -n origin`` whose output
1678+
enumerates every remote-tracking ref. For repositories like openai/codex
1679+
(2,400+ branches) that stream of lines piped through the subprocess
1680+
progress callback produced the appearance of a hang during vcspull sync.
1681+
1682+
This test seeds synthetic refs/remotes/origin/branch-N entries and asserts
1683+
that ``remote('origin')`` returns promptly. A generous 5-second budget is
1684+
enough to catch the pathological old path (many seconds on WSL2) without
1685+
flaking on slow CI.
1686+
"""
1687+
import time
1688+
1689+
# Point origin at a commit we already have so fake refs don't dangle.
1690+
head_sha = run(["git", "rev-parse", "HEAD"], cwd=git_repo.path).strip()
1691+
1692+
# Seed 500 fake remote-tracking refs. ``git update-ref`` is quick but the
1693+
# cumulative count is what would blow up ``git remote show -n``.
1694+
for i in range(500):
1695+
run(
1696+
[
1697+
"git",
1698+
"update-ref",
1699+
f"refs/remotes/origin/fake-branch-{i:04d}",
1700+
head_sha,
1701+
],
1702+
cwd=git_repo.path,
1703+
)
1704+
1705+
started = time.monotonic()
1706+
remote = git_repo.remote("origin")
1707+
elapsed = time.monotonic() - started
1708+
1709+
assert remote is not None
1710+
assert remote.fetch_url, "fetch URL must be populated"
1711+
assert elapsed < 5.0, f"remote() too slow: {elapsed:.2f}s with 500 fake refs"

0 commit comments

Comments
 (0)