From 6d055aafa90645f229ee4e6f74a937be1f65f16c Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 13 Jun 2026 23:29:57 +0000
Subject: [PATCH 1/6] ci: add Windows to the test matrix and fix Windows-only
 test breakage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a `windows` job (windows-latest, py3.12 + py3.13) running the pytest
suite, plus a stable `tests (windows)` aggregator for branch protection.
The full scripts/check.sh gate stays Linux-only (it's bash + Go/Homebrew
tooling), so the Windows job covers just the test suite — path handling,
subprocess/encoding, and POSIX-only assumptions.

Fix the two static blockers that would error on a Windows run:
- test_hotkey.py imported termios and used os.openpty at module scope, which
  crashes collection on Windows. Skip the module there via importorskip
  (keeps it out of the skip/xfail escape-hatch count the Linux gate tracks).
- test_init_scaffold.py asserted 0600 permission bits that don't exist on
  Windows; gate the POSIX-only mode assertions on os.name == "posix" while
  still verifying the .env rewrite on every platform.

https://claude.ai/code/session_01RZW2ga1Phmt4CQpkZp3LqE
---
 .github/workflows/ci.yml    | 55 +++++++++++++++++++++++++++++++++++++
 tests/test_hotkey.py        |  6 +++-
 tests/test_init_scaffold.py | 23 +++++++++++-----
 3 files changed, 76 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1f8541d7..abe9a0ce 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -103,6 +103,61 @@ jobs:
           fi
           echo "all py-version matrix cells passed"
 
+  windows:
+    name: tests (windows, py${{ matrix.python-version }})
+    runs-on: windows-latest
+    timeout-minutes: 20
+    # Windows can't run scripts/check.sh (it's bash plus Go/Homebrew/shell tooling), so
+    # this job runs only the pytest suite — enough to catch Windows-specific regressions
+    # (path handling, subprocess/encoding, POSIX-only assumptions). The lint/type/security
+    # gates stay on the Linux `check` job. Same Python ends as that matrix: 3.12 floor,
+    # 3.13 shipped; fail-fast off so one version's failure doesn't mask the other's.
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12", "3.13"]
+    # Pin the interpreter every `uv run` resolves to, so the matrix exercises each version.
+    env:
+      UV_PYTHON: ${{ matrix.python-version }}
+    steps:
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+        with:
+          persist-credentials: false  # no job pushes; don't leave the token in .git/config
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: pip
+
+      # No native media tooling is installed: the sounddevice wheel bundles PortAudio on
+      # Windows, and the unit suite mocks every ffmpeg/ffprobe shell-out. The e2e/install
+      # suites that would need real binaries are excluded by the default pytest
+      # `-m 'not e2e and not install'` selection.
+      - name: Install uv
+        run: python -m pip install uv
+
+      # `uv run` syncs the locked project + dev group into .venv, then runs the default
+      # suite (e2e/install excluded via addopts).
+      - name: Run test suite
+        run: uv run pytest -q
+
+  # Stable, un-suffixed name for branch protection, mirroring `check-result`: green only
+  # when every Windows matrix cell passed (a failed/skipped/cancelled matrix can't satisfy
+  # it). Point branch protection at this one name and matrix changes won't break it.
+  windows-result:
+    name: tests (windows)
+    needs: [windows]
+    if: always()
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - name: Require every Windows matrix cell to have passed
+        run: |
+          if [ "${{ needs.windows.result }}" != "success" ]; then
+            echo "windows matrix result: ${{ needs.windows.result }}"
+            exit 1
+          fi
+          echo "all windows matrix cells passed"
+
   lint-formula:
     name: brew style (Homebrew formula)
     runs-on: ubuntu-latest
diff --git a/tests/test_hotkey.py b/tests/test_hotkey.py
index 51df9601..3a138b1c 100644
--- a/tests/test_hotkey.py
+++ b/tests/test_hotkey.py
@@ -6,13 +6,17 @@
 
 import os
 import sys
-import termios
 
 import pytest
 
 from aai_cli.core.errors import CLIError
 from aai_cli.core.hotkey import TerminalKeys, _stdin_fd
 
+# termios and os.openpty are POSIX-only, so the whole module is skipped on Windows
+# (where TerminalKeys raises a clean CLIError rather than running). importorskip keeps
+# this out of the skip/xfail escape-hatch count the Linux gate tracks.
+termios = pytest.importorskip("termios")
+
 
 @pytest.fixture
 def pty_pair():
diff --git a/tests/test_init_scaffold.py b/tests/test_init_scaffold.py
index 0b86393b..0e9bc38c 100644
--- a/tests/test_init_scaffold.py
+++ b/tests/test_init_scaffold.py
@@ -1,3 +1,4 @@
+import os
 import stat
 
 import pytest
@@ -7,13 +8,17 @@
 
 
 def test_scaffold_env_is_owner_only_readable(tmp_path):
-    # The .env holds the real API key, so it must not be world/group-readable. (CI is
-    # POSIX; the project gates its Windows-specific paths in scripts/check.sh, not here.)
+    # The .env holds the real API key, so it must not be world/group-readable.
     target = tmp_path / "app"
     scaffold.scaffold("audio-transcription", target, api_key="sk-real-key")
-    mode = stat.S_IMODE((target / ".env").stat().st_mode)
-    assert mode == 0o600
-    assert not mode & (stat.S_IRGRP | stat.S_IROTH)  # no group/other read of the key
+    env_path = target / ".env"
+    assert env_path.is_file()
+    # POSIX permission bits are meaningful only on POSIX; Windows has no 0600 mode,
+    # so the scaffolder's chmod is a best-effort no-op there.
+    if os.name == "posix":
+        mode = stat.S_IMODE(env_path.stat().st_mode)
+        assert mode == 0o600
+        assert not mode & (stat.S_IRGRP | stat.S_IROTH)  # no group/other read of the key
 
 
 def test_scaffold_tightens_existing_env_on_overwrite(tmp_path):
@@ -23,9 +28,13 @@ def test_scaffold_tightens_existing_env_on_overwrite(tmp_path):
     target.mkdir()
     stale = target / ".env"
     stale.write_text("ASSEMBLYAI_API_KEY=old\n")
-    stale.chmod(0o644)
+    if os.name == "posix":
+        stale.chmod(0o644)
     scaffold.scaffold("audio-transcription", target, api_key="sk-real-key")
-    assert stat.S_IMODE(stale.stat().st_mode) == 0o600
+    # The rewrite lands on every platform; the 0600 tightening is POSIX-only.
+    assert "ASSEMBLYAI_API_KEY=sk-real-key" in stale.read_text()
+    if os.name == "posix":
+        assert stat.S_IMODE(stale.stat().st_mode) == 0o600
 
 
 def test_scaffold_copies_files_and_renames_dotfiles(tmp_path):

From 4bd16ba85f323f1975c3f39f3e5c713faad1026f Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 13 Jun 2026 23:47:32 +0000
Subject: [PATCH 2/6] ci: fix Windows test-suite failures surfaced by the new
 matrix job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Windows job's first run exposed 147 failures across ~6 root causes, all
test-harness / test-expectation issues rather than source bugs (native
backslash paths and the Windows "no termios" dictate message are correct
platform behavior):

- .gitattributes: force LF on checkout so the byte-exact syrupy snapshot
  goldens (*.ambr) don't fail under Git's Windows autocrlf (~42 failures).
- conftest: on Windows, permit loopback sockets (allow_hosts 127.0.0.1/::1)
  for in-process async tests. The asyncio event loop's self-pipe is an
  AF_INET socketpair() there, which --disable-socket blocked, breaking every
  FastAPI TestClient / scaffolded-template test; POSIX uses os.pipe() so this
  is Windows-only. External network stays blocked.
- setup test fixtures: isolate USERPROFILE too — Path.home() reads it (not
  HOME) on Windows, so skill install/status wrote into the real profile.
- test_init_template_contract: read template files with encoding="utf-8"
  (cp1252 can't decode the UTF-8 assets).
- test_transcribe_batch_sources / test_onboard_sections: build expected paths
  with pathlib / from the fixture path instead of hardcoded "/".
- test_dictate_command: accept the unsupported-platform message as an
  equivalent usage error on Windows.

https://claude.ai/code/session_01RZW2ga1Phmt4CQpkZp3LqE
---
 .gitattributes                         |  6 ++++
 tests/conftest.py                      |  9 ++++++
 tests/test_dictate_command.py          |  6 +++-
 tests/test_init_template_contract.py   | 41 ++++++++++++++++----------
 tests/test_onboard_sections.py         |  4 +--
 tests/test_setup.py                    |  2 ++
 tests/test_setup_install.py            |  3 ++
 tests/test_transcribe_batch_sources.py |  4 ++-
 8 files changed, 55 insertions(+), 20 deletions(-)
 create mode 100644 .gitattributes

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..9b3021cd
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,6 @@
+# Normalize line endings to LF on checkout everywhere. The suite renders output
+# with "\n" and the syrupy snapshot goldens (tests/__snapshots__/*.ambr) are
+# compared byte-for-byte, so a Windows checkout converting them to CRLF (Git's
+# default autocrlf) would fail every snapshot test. text=auto still lets Git
+# auto-detect and leave binary files untouched.
+* text=auto eol=lf
diff --git a/tests/conftest.py b/tests/conftest.py
index 8afd5e3a..b60a69ad 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,4 +1,5 @@
 import os
+import sys
 import time
 
 import keyring
@@ -22,6 +23,14 @@ def pytest_collection_modifyitems(items: list[pytest.Item]) -> None:
     for item in items:
         if any(item.get_closest_marker(name) for name in _NETWORK_MARKERS):
             item.add_marker(pytest.mark.enable_socket)
+        elif sys.platform == "win32" and item.get_closest_marker("allow_hosts") is None:
+            # On Windows the asyncio event loop's self-pipe is an AF_INET socketpair(),
+            # which the suite-wide --disable-socket would block — so every in-process
+            # async test (FastAPI TestClient, the scaffolded template apps) would fail.
+            # POSIX uses an os.pipe() self-pipe, so this only bites on Windows. Permit
+            # loopback while still blocking external network (the hermeticity guarantee
+            # that matters), unless the test already pins its own allow_hosts.
+            item.add_marker(pytest.mark.allow_hosts(["127.0.0.1", "::1"]))
 
 
 @pytest.fixture
diff --git a/tests/test_dictate_command.py b/tests/test_dictate_command.py
index 8540816d..7bdb5e9b 100644
--- a/tests/test_dictate_command.py
+++ b/tests/test_dictate_command.py
@@ -81,4 +81,8 @@ def test_outside_a_terminal_is_a_usage_error_not_a_login():
     # terminal requirement, not start an authentication flow.
     result = runner.invoke(app, ["dictate"])
     assert result.exit_code == 2
-    assert "interactive terminal" in result.output
+    # POSIX surfaces the not-a-tty requirement; Windows (no termios) surfaces the
+    # unsupported-platform message first. Either is the point: a usage error, not a login.
+    assert (
+        "interactive terminal" in result.output or "not supported on this platform" in result.output
+    )
diff --git a/tests/test_init_template_contract.py b/tests/test_init_template_contract.py
index e4b85089..3d4f824a 100644
--- a/tests/test_init_template_contract.py
+++ b/tests/test_init_template_contract.py
@@ -48,7 +48,7 @@ def test_vercel_json_pins_fastapi_framework(template_dir):
     declared"). Pinning the FastAPI preset makes Vercel build `api/index.py` and route
     every request to the ASGI app — and stops auto-detection from ever picking
     `services` again."""
-    config = json.loads((template_dir / "vercel.json").read_text())
+    config = json.loads((template_dir / "vercel.json").read_text(encoding="utf-8"))
     assert config.get("framework") == "fastapi", (
         f'{template_dir.name}: vercel.json must pin "framework": "fastapi" so Vercel '
         f'never auto-detects the "services" framework; got {config.get("framework")!r}'
@@ -58,7 +58,7 @@ def test_vercel_json_pins_fastapi_framework(template_dir):
 def test_dockerfile_runs_uvicorn_on_platform_port(template_dir):
     """Fly/Railway/Render(Docker)/Cloudflare-Containers build this image. It must run
     uvicorn on the app, bind 0.0.0.0, and honor the platform's injected ${PORT}."""
-    dockerfile = (template_dir / "Dockerfile").read_text()
+    dockerfile = (template_dir / "Dockerfile").read_text(encoding="utf-8")
     assert "uvicorn api.index:app" in dockerfile, (
         f"{template_dir.name}: Dockerfile must run uvicorn api.index:app"
     )
@@ -96,7 +96,10 @@ def test_dockerfile_runs_uvicorn_on_platform_port(template_dir):
 def test_dockerignore_excludes_env(template_dir):
     """`.env` holds the real API key; the Dockerfile does COPY . . so it must be
     excluded from the build context or the key gets baked into the image."""
-    lines = {line.strip() for line in (template_dir / "dockerignore").read_text().splitlines()}
+    lines = {
+        line.strip()
+        for line in (template_dir / "dockerignore").read_text(encoding="utf-8").splitlines()
+    }
     assert ".env" in lines, (
         f"{template_dir.name}: dockerignore must list .env so the API key isn't baked in"
     )
@@ -116,7 +119,7 @@ def test_procfile_starts_the_app(template_dir):
     command. The contract gate boots it for real; here we pin its shape."""
     web = [
         line.split("web:", 1)[1].strip()
-        for line in (template_dir / "Procfile").read_text().splitlines()
+        for line in (template_dir / "Procfile").read_text(encoding="utf-8").splitlines()
         if line.strip().startswith("web:")
     ]
     assert web, f"{template_dir.name}: Procfile has no web: process"
@@ -129,7 +132,7 @@ def test_procfile_starts_the_app(template_dir):
 
 
 def test_runtime_pins_supported_python(template_dir):
-    pin = (template_dir / "runtime.txt").read_text().strip()
+    pin = (template_dir / "runtime.txt").read_text(encoding="utf-8").strip()
     assert re.fullmatch(r"python-3\.(12|13)(\.\d+)?", pin), (
         f"{template_dir.name}: runtime.txt pins {pin!r}; must be python-3.12 or python-3.13"
     )
@@ -141,7 +144,7 @@ def test_realtime_templates_have_audio_helpers(template_dir):
 
 
 def test_static_assets_referenced_by_html_exist(template_dir):
-    html = (template_dir / "static" / "index.html").read_text()
+    html = (template_dir / "static" / "index.html").read_text(encoding="utf-8")
     refs = set(re.findall(r'(?:href|src)=["\'](/static/[^"\']+)', html))
     assert refs, f"{template_dir.name}: static/index.html should load static assets"
     for ref in refs:
@@ -151,8 +154,8 @@ def test_static_assets_referenced_by_html_exist(template_dir):
 
 
 def test_codex_edit_points_are_explicit(template_dir):
-    notes = (template_dir / "AGENTS.md").read_text()
-    app_js = (template_dir / "static" / "app.js").read_text()
+    notes = (template_dir / "AGENTS.md").read_text(encoding="utf-8")
+    app_js = (template_dir / "static" / "app.js").read_text(encoding="utf-8")
     assert "ASSEMBLYAI_API_KEY" in notes
     assert "buildless" in notes
     assert "static/app.js" in notes
@@ -161,17 +164,23 @@ def test_codex_edit_points_are_explicit(template_dir):
 
 def test_no_committed_dotenv_or_real_key(template_dir):
     assert not (template_dir / ".env").exists(), f"{template_dir.name} ships a real .env"
-    assert "your_assemblyai_api_key_here" in (template_dir / "env.example").read_text()
+    assert "your_assemblyai_api_key_here" in (template_dir / "env.example").read_text(
+        encoding="utf-8"
+    )
 
 
 def test_frontend_routes_exist_in_backend(template_dir):
     """Every /api path the page fetches must be a route the backend registers."""
-    frontend = (template_dir / "static" / "index.html").read_text()
-    frontend += "\n".join(path.read_text() for path in (template_dir / "static").glob("*.js"))
+    frontend = (template_dir / "static" / "index.html").read_text(encoding="utf-8")
+    frontend += "\n".join(
+        path.read_text(encoding="utf-8") for path in (template_dir / "static").glob("*.js")
+    )
     fetched = set(re.findall(r'fetch\(\s*["\'`](/api/[^"\'`?]+)', frontend))
     # Also catch template-literal paths like fetch(`/api/status/${id}`) and "/api/x/" + id
     fetched |= set(re.findall(r'["\'`](/api/[A-Za-z0-9_\-/]+?)(?:/?\$\{|/?["\'`]\s*\+)', frontend))
-    src = "\n".join(path.read_text() for path in (template_dir / "api").glob("*.py"))
+    src = "\n".join(
+        path.read_text(encoding="utf-8") for path in (template_dir / "api").glob("*.py")
+    )
     registered = set(re.findall(r'@app\.\w+\(\s*["\']([^"\']+)["\']', src))
     registered_bases = {re.sub(r"/\{[^}]+\}$", "", r).rstrip("/") for r in registered}
     for path in fetched:
@@ -186,14 +195,14 @@ def test_requirements_cover_backend_imports(template_dir) -> None:
     """Every third-party import in api/*.py appears in requirements.txt."""
     imports: set[str] = set()
     for path in (template_dir / "api").glob("*.py"):
-        tree = ast.parse(path.read_text())
+        tree = ast.parse(path.read_text(encoding="utf-8"))
         for node in ast.walk(tree):
             if isinstance(node, ast.Import):
                 imports.add(node.names[0].name.split(".")[0])
             elif isinstance(node, ast.ImportFrom) and node.level == 0 and node.module:
                 imports.add(node.module.split(".")[0])
     third_party = imports - _STDLIB - _LOCAL_IMPORTS
-    reqs = (template_dir / "requirements.txt").read_text().lower()
+    reqs = (template_dir / "requirements.txt").read_text(encoding="utf-8").lower()
     for pkg in third_party:
         dist = _PKG_MAP.get(pkg, pkg)
         assert dist in reqs, (
@@ -210,7 +219,7 @@ def test_requirements_pin_versions(template_dir) -> None:
     """
     specifier = re.compile(r"(===|==|~=|!=|>=|<=|>|<)")
     unpinned: list[str] = []
-    for raw in (template_dir / "requirements.txt").read_text().splitlines():
+    for raw in (template_dir / "requirements.txt").read_text(encoding="utf-8").splitlines():
         line = raw.split("#", 1)[0].strip()
         if not line:
             continue
@@ -224,7 +233,7 @@ def test_requirements_pin_versions(template_dir) -> None:
 
 def test_status_endpoint_does_not_block(template_dir):
     """Guard against the blocking SDK call: a poll endpoint must not wait_for_completion."""
-    src = (template_dir / "api" / "index.py").read_text()
+    src = (template_dir / "api" / "index.py").read_text(encoding="utf-8")
     tree = ast.parse(src)
     blocking = {"get_by_id", "wait_for_completion"}
     called = {n.attr for n in ast.walk(tree) if isinstance(n, ast.Attribute)}
diff --git a/tests/test_onboard_sections.py b/tests/test_onboard_sections.py
index 13a4187c..194d0be9 100644
--- a/tests/test_onboard_sections.py
+++ b/tests/test_onboard_sections.py
@@ -407,7 +407,7 @@ def note(self, message: str) -> None:
     prompter = _RecordingNonInteractive()
     assert sections.launch_app(prompter, ctx) is SectionResult.SKIPPED
     assert captured == {}
-    assert any("cd /scaffolded/app && assembly dev" in note for note in prompter.notes)
+    assert any(f"cd {ctx.scaffolded} && assembly dev" in note for note in prompter.notes)
 
 
 def test_launch_app_declined_leaves_hint(
@@ -418,7 +418,7 @@ def test_launch_app_declined_leaves_hint(
     prompter = _ScriptedPrompter(confirm=False)
     assert sections.launch_app(prompter, ctx) is SectionResult.SKIPPED
     assert captured == {}
-    assert any("cd /scaffolded/app && assembly dev" in note for note in prompter.notes)
+    assert any(f"cd {ctx.scaffolded} && assembly dev" in note for note in prompter.notes)
 
 
 @pytest.mark.parametrize("uv", [True, False])
diff --git a/tests/test_setup.py b/tests/test_setup.py
index 5f3ecd95..e35bc145 100644
--- a/tests/test_setup.py
+++ b/tests/test_setup.py
@@ -20,6 +20,8 @@
 def _isolate_home(tmp_path, monkeypatch):
     """Keep skill writes/reads inside a temp HOME so tests never touch ~/.claude."""
     monkeypatch.setenv("HOME", str(tmp_path))
+    # Path.home() reads USERPROFILE on Windows, not HOME, so isolate both.
+    monkeypatch.setenv("USERPROFILE", str(tmp_path))
     monkeypatch.delenv("CLAUDE_CONFIG_DIR", raising=False)
 
 
diff --git a/tests/test_setup_install.py b/tests/test_setup_install.py
index 46dc472d..b3d8cf00 100644
--- a/tests/test_setup_install.py
+++ b/tests/test_setup_install.py
@@ -20,6 +20,9 @@
 def _isolate_home(tmp_path, monkeypatch):
     """Keep skill writes/reads inside a temp HOME so tests never touch ~/.claude."""
     monkeypatch.setenv("HOME", str(tmp_path))
+    # Path.home() reads USERPROFILE on Windows, not HOME, so isolate both or the
+    # skill install/status steps would write into the real user profile there.
+    monkeypatch.setenv("USERPROFILE", str(tmp_path))
     monkeypatch.delenv("CLAUDE_CONFIG_DIR", raising=False)
 
 
diff --git a/tests/test_transcribe_batch_sources.py b/tests/test_transcribe_batch_sources.py
index 2d247285..c7be8c0a 100644
--- a/tests/test_transcribe_batch_sources.py
+++ b/tests/test_transcribe_batch_sources.py
@@ -102,7 +102,9 @@ def test_directory_scan_is_recursive_and_audio_only(tmp_path, mocker, monkeypatc
     seen = _patch_transcribe(mocker, monkeypatch)
     result = runner.invoke(app, ["transcribe", "calls", "--json"])
     assert result.exit_code == 0
-    assert sorted(seen) == ["calls/a.mp3", "calls/sub/b.WAV"]
+    # The scanner emits native paths (str(Path)), so build the expectation the same way
+    # rather than hardcoding "/" — directory sources use backslashes on Windows.
+    assert sorted(seen) == [str(Path("calls", "a.mp3")), str(Path("calls", "sub", "b.WAV"))]
 
 
 def test_directory_without_audio_exits_2(tmp_path):

From e9de01615776317fd7e7baa598e2c04c5f50d9bb Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 14 Jun 2026 00:07:08 +0000
Subject: [PATCH 3/6] ci: fix second wave of Windows test failures (84 ->
 targeted)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Second Windows CI round surfaced the next layer:

- conftest: disable Rich legacy-Windows mode in tests. On the GHA Windows
  runner Rich detects a legacy console and subtracts 1 from the render width,
  so COLUMNS=80 became 79 and every byte-exact help snapshot rewrapped (and
  test_theme's width assertion failed). Modern Windows terminals report
  non-legacy — what real users get — so pin it. Fixes ~50 snapshot tests.
- ci: install ffmpeg on the Windows runner (choco). stream --sample / clip /
  caption probe for ffmpeg before their mocked work, so its absence failed
  them at the probe (KeyError 'params', wrong exit codes).
- caption: subtitles_filter now normalizes os.sep to "/" before escaping, so
  the ffmpeg filtergraph gets the portable forward-slash form with an escaped
  drive colon (C:\a\b.srt -> C\:/a/b.srt) instead of unusable backslashes —
  a real burn-in bug on Windows. test helper reverses the escaping to recover
  the on-disk SRT path.
- test isolation: USERPROFILE alongside HOME in the coding-agent fixture
  (Path.home() reads USERPROFILE on Windows).
- test expectations: read committed template assets as utf-8 (cp1252 can't
  decode them); compare the remote-download basename without a hardcoded "/";
  use a cross-platform binary + signal (SIGTERM) in the macOS-helper unit
  tests instead of /bin/echo and SIGTRAP.

https://claude.ai/code/session_01RZW2ga1Phmt4CQpkZp3LqE
---
 .github/workflows/ci.yml               | 11 +++++++----
 aai_cli/commands/caption/_exec.py      |  8 +++++++-
 tests/conftest.py                      | 14 ++++++++++++++
 tests/test_caption_exec.py             |  6 +++++-
 tests/test_coding_agent.py             |  2 ++
 tests/test_init_template_agent.py      |  2 +-
 tests/test_init_template_transcribe.py |  8 +++++---
 tests/test_macos_audio_source.py       | 15 +++++++++++----
 tests/test_transcribe.py               |  2 +-
 9 files changed, 53 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index abe9a0ce..a131db05 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -128,10 +128,13 @@ jobs:
           python-version: ${{ matrix.python-version }}
           cache: pip
 
-      # No native media tooling is installed: the sounddevice wheel bundles PortAudio on
-      # Windows, and the unit suite mocks every ffmpeg/ffprobe shell-out. The e2e/install
-      # suites that would need real binaries are excluded by the default pytest
-      # `-m 'not e2e and not install'` selection.
+      # ffmpeg must be on PATH: the `stream --sample`/`clip`/`caption` paths probe for it
+      # (require_ffmpeg) before doing their work, so without it those tests fail at the
+      # probe rather than exercising the mocked run. PortAudio needs no install — the
+      # sounddevice wheel bundles it on Windows. choco ships on the runner.
+      - name: System deps (ffmpeg)
+        run: choco install ffmpeg --no-progress -y
+
       - name: Install uv
         run: python -m pip install uv
 
diff --git a/aai_cli/commands/caption/_exec.py b/aai_cli/commands/caption/_exec.py
index bf948cdf..06a58235 100644
--- a/aai_cli/commands/caption/_exec.py
+++ b/aai_cli/commands/caption/_exec.py
@@ -15,6 +15,7 @@
 
 from __future__ import annotations
 
+import os
 import tempfile
 from dataclasses import dataclass
 from pathlib import Path
@@ -56,7 +57,12 @@ def default_out_path(media: Path) -> Path:
 
 def subtitles_filter(srt: Path, font_size: int | None) -> str:
     """The ``-vf`` filtergraph burning ``srt`` into the video."""
-    spec = f"subtitles={str(srt).translate(_FILTER_ESCAPES)}"
+    # ffmpeg's filtergraph parser takes forward slashes on every platform; a Windows
+    # backslash path would otherwise need each separator escaped (and the drive colon
+    # mishandled). Normalize to "/" first, then escape the remaining metacharacters
+    # (notably the drive ":") — e.g. C:\a\b.srt -> C\:/a/b.srt. No-op on POSIX.
+    posix = str(srt).replace(os.sep, "/")
+    spec = f"subtitles={posix.translate(_FILTER_ESCAPES)}"
     if font_size is not None:
         spec += f":force_style=FontSize={font_size}"
     return spec
diff --git a/tests/conftest.py b/tests/conftest.py
index b60a69ad..2b4c7a7e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -83,6 +83,20 @@ def isolate_env(monkeypatch):
         monkeypatch.delenv(var, raising=False)
 
 
+@pytest.fixture(autouse=True)
+def _disable_legacy_windows(monkeypatch):
+    # On a Windows CI runner Rich detects a "legacy" console (ColorSystem.WINDOWS) and
+    # subtracts 1 from the render width to dodge the auto-wrap cursor bug — so COLUMNS=80
+    # renders at 79 and every byte-exact help snapshot rewraps and fails. Modern Windows
+    # terminals (Windows Terminal, VT-enabled) report non-legacy, which is what real users
+    # get, so pin non-legacy here to keep rendering deterministic across platforms. No-op
+    # off Windows (detect_legacy_windows already returns False there).
+    if sys.platform == "win32":
+        import rich.console
+
+        monkeypatch.setattr(rich.console, "detect_legacy_windows", lambda: False)
+
+
 @pytest.fixture(autouse=True)
 def pin_timezone(monkeypatch):
     # Pin the host timezone so any time rendering is deterministic across machines and
diff --git a/tests/test_caption_exec.py b/tests/test_caption_exec.py
index 12eff9d5..96052e24 100644
--- a/tests/test_caption_exec.py
+++ b/tests/test_caption_exec.py
@@ -10,6 +10,7 @@
 import contextlib
 import dataclasses
 import json
+import re
 import subprocess
 from pathlib import Path
 from types import SimpleNamespace
@@ -58,7 +59,10 @@ def record_ffmpeg(monkeypatch, *, returncode: int = 0, stderr: str = ""):
 
     def run(args: list[str]) -> subprocess.CompletedProcess[str]:
         recorded["args"] = args
-        srt_path = args[8].removeprefix("subtitles=").split(":force_style")[0]
+        escaped = args[8].removeprefix("subtitles=").split(":force_style")[0]
+        # subtitles_filter escapes filtergraph metacharacters (and the Windows drive
+        # colon) with a leading backslash; reverse that to recover the real on-disk path.
+        srt_path = re.sub(r"\\(.)", r"\1", escaped)
         recorded["srt"] = Path(srt_path).read_text(encoding="utf-8")
         return subprocess.CompletedProcess(
             args=args, returncode=returncode, stdout="", stderr=stderr
diff --git a/tests/test_coding_agent.py b/tests/test_coding_agent.py
index f856b097..823bc694 100644
--- a/tests/test_coding_agent.py
+++ b/tests/test_coding_agent.py
@@ -11,6 +11,8 @@
 def _isolate_home(tmp_path, monkeypatch):
     """Keep skill reads inside a temp HOME so tests never touch ~/.claude."""
     monkeypatch.setenv("HOME", str(tmp_path))
+    # Path.home() reads USERPROFILE on Windows, not HOME, so isolate both.
+    monkeypatch.setenv("USERPROFILE", str(tmp_path))
     monkeypatch.delenv("CLAUDE_CONFIG_DIR", raising=False)
 
 
diff --git a/tests/test_init_template_agent.py b/tests/test_init_template_agent.py
index 32b0769c..be945a31 100644
--- a/tests/test_init_template_agent.py
+++ b/tests/test_init_template_agent.py
@@ -54,7 +54,7 @@ def fake_get(url, params=None, headers=None):
 
 def test_page_reads_reply_audio_from_data_field():
     # reply.audio carries the base64 PCM in `data` (not `audio`); guard the regression.
-    app_js = (TEMPLATE_DIR / "static" / "app.js").read_text()
+    app_js = (TEMPLATE_DIR / "static" / "app.js").read_text(encoding="utf-8")
     assert "reply.audio" in app_js
     assert "event.data" in app_js
 
diff --git a/tests/test_init_template_transcribe.py b/tests/test_init_template_transcribe.py
index 04aade8c..5eabc2d4 100644
--- a/tests/test_init_template_transcribe.py
+++ b/tests/test_init_template_transcribe.py
@@ -59,7 +59,9 @@ def test_required_files_exist():
 
 def test_template_ships_no_real_key():
     assert not (TEMPLATE_DIR / ".env").exists()
-    assert "your_assemblyai_api_key_here" in (TEMPLATE_DIR / "env.example").read_text()
+    assert "your_assemblyai_api_key_here" in (TEMPLATE_DIR / "env.example").read_text(
+        encoding="utf-8"
+    )
 
 
 def test_base_url_env_is_applied(monkeypatch, mocker):
@@ -71,8 +73,8 @@ def test_base_url_env_is_applied(monkeypatch, mocker):
 
 def test_page_explores_all_features_and_speakers():
     # Guard the UI surface: each audio-intelligence view + per-speaker coloring stay wired.
-    html = (TEMPLATE_DIR / "static" / "index.html").read_text()
-    app_js = (TEMPLATE_DIR / "static" / "app.js").read_text()
+    html = (TEMPLATE_DIR / "static" / "index.html").read_text(encoding="utf-8")
+    app_js = (TEMPLATE_DIR / "static" / "app.js").read_text(encoding="utf-8")
     ui_src = html + app_js
     for token in (
         "chapters",
diff --git a/tests/test_macos_audio_source.py b/tests/test_macos_audio_source.py
index e6f6a06a..8032e6f3 100644
--- a/tests/test_macos_audio_source.py
+++ b/tests/test_macos_audio_source.py
@@ -1,4 +1,6 @@
 import io
+import signal
+import sys
 import types
 from pathlib import Path
 
@@ -166,7 +168,9 @@ def test_read_stderr_none_is_empty():
 
 
 def test_open_process_exposes_stdout():
-    proc = macos._open_process(["/bin/echo", "ok"])
+    # sys.executable is the one binary guaranteed present on every platform (/bin/echo
+    # isn't on Windows); it writes "ok" to stdout exactly like the helper would.
+    proc = macos._open_process([sys.executable, "-c", "import sys; sys.stdout.buffer.write(b'ok')"])
     assert proc.stdout is not None
     try:
         assert proc.stdout.read().strip() == b"ok"
@@ -215,7 +219,10 @@ def test_raise_helper_exit_handles_clean_eof():
 
 
 def test_returncode_detail_names_signals():
-    assert macos._returncode_detail(-5) == "SIGTRAP (-5)"
+    # SIGTERM (15) is one of the few signals the enum knows on Windows too, so the
+    # name-resolution branch is exercised cross-platform; 99999 hits the unknown fallback.
+    sigterm = int(signal.SIGTERM)
+    assert macos._returncode_detail(-sigterm) == f"SIGTERM (-{sigterm})"
     assert macos._returncode_detail(-99999) == "signal 99999 (-99999)"
     assert macos._returncode_detail(2) == "exit 2"
     assert macos._returncode_detail(0) == "exit 0"  # 0 is a clean exit (pins `>= 0`)
@@ -223,10 +230,10 @@ def test_returncode_detail_names_signals():
 
 
 def test_raise_helper_exit_names_signal_without_stderr():
-    proc = _FakeProc(stdout=b"", stderr=b"", returncode=-5)
+    proc = _FakeProc(stdout=b"", stderr=b"", returncode=-int(signal.SIGTERM))
     with pytest.raises(CLIError) as exc:
         macos._raise_helper_exit(proc)
-    assert "SIGTRAP" in exc.value.message
+    assert "SIGTERM" in exc.value.message
 
 
 def test_source_starts_helper_and_yields_pcm(tmp_path):
diff --git a/tests/test_transcribe.py b/tests/test_transcribe.py
index 6cecf25c..1d6eae49 100644
--- a/tests/test_transcribe.py
+++ b/tests/test_transcribe.py
@@ -409,7 +409,7 @@ def fake(api_key, audio, *, config):
     result = runner.invoke(app, ["transcribe", "memory://bucket/call.mp3", "-o", "text"])
     assert result.exit_code == 0
     assert result.output.strip() == "hello world"
-    assert seen["path"].endswith("/call.mp3")
+    assert seen["path"].replace("\\", "/").endswith("/call.mp3")  # native separator on Windows
     assert "aai-remote-" in seen["path"]  # a temp copy, not the URL itself
     assert seen["bytes"] == b"remote-bytes"
 

From 515c83d09221972b179424951cdf5723042a98ed Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 14 Jun 2026 00:22:21 +0000
Subject: [PATCH 4/6] ci: keep the real-client TTS test socket-disabled on
 Windows

Last Windows failure: test_synthesize_without_connect_uses_real_client_and_
fails_cleanly relies on socket *creation* being blocked (Linux --disable-socket)
to surface a clean CLIError. The suite-wide Windows loopback allowance let the
socket be created, then the blocked external connect leaked it, tripping the
unraisable-exception warning gate. Pin this test to disable_socket (a no-op on
Linux) and have the conftest relaxation skip disable_socket/enable_socket tests.

https://claude.ai/code/session_01RZW2ga1Phmt4CQpkZp3LqE
---
 tests/conftest.py         | 4 +++-
 tests/test_tts_session.py | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 2b4c7a7e..2ead4a26 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -23,7 +23,9 @@ def pytest_collection_modifyitems(items: list[pytest.Item]) -> None:
     for item in items:
         if any(item.get_closest_marker(name) for name in _NETWORK_MARKERS):
             item.add_marker(pytest.mark.enable_socket)
-        elif sys.platform == "win32" and item.get_closest_marker("allow_hosts") is None:
+        elif sys.platform == "win32" and not any(
+            item.get_closest_marker(m) for m in ("allow_hosts", "enable_socket", "disable_socket")
+        ):
             # On Windows the asyncio event loop's self-pipe is an AF_INET socketpair(),
             # which the suite-wide --disable-socket would block — so every in-process
             # async test (FastAPI TestClient, the scaffolded template apps) would fail.
diff --git a/tests/test_tts_session.py b/tests/test_tts_session.py
index 6637c2e0..092ee719 100644
--- a/tests/test_tts_session.py
+++ b/tests/test_tts_session.py
@@ -342,11 +342,14 @@ def test_synthesize_maps_unexpected_protocol_error_to_api_error():
         session.synthesize("k", session.SpeakConfig(text="hi"), connect=lambda *a, **k: ws)
 
 
+@pytest.mark.disable_socket
 def test_synthesize_without_connect_uses_real_client_and_fails_cleanly():
     # No `connect` provided: synthesize imports websockets' real sync client and
     # attempts a connection. pytest-socket blocks socket creation, so this must
     # surface as a clean CLIError (mapped in diagnostics.open_authorized_ws),
-    # never a raw socket error.
+    # never a raw socket error. disable_socket pins that blocked-at-creation behavior
+    # on Windows too (the suite-wide conftest otherwise allows loopback there, which
+    # would let the socket be created and then leak when the real connect is blocked).
     _use_env("sandbox000")
     with pytest.raises(CLIError):
         session.synthesize("k", session.SpeakConfig(text="hi"))

From 0e142232dcef872d6659d5c18433f9e22083b34a Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 14 Jun 2026 00:36:20 +0000
Subject: [PATCH 5/6] ci: make the Windows ffmpeg install deterministic (retry
 + verify)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

py3.12 went green but py3.13 failed with ffmpeg-missing errors from the same
choco command — a transient choco download flake on one matrix cell. Retry the
install up to 3x, fix the in-session PATH, and verify `ffmpeg -version` so a real
miss fails this step instead of surfacing as confusing test failures downstream.

https://claude.ai/code/session_01RZW2ga1Phmt4CQpkZp3LqE
---
 .github/workflows/ci.yml | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a131db05..1c3b0d22 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -131,9 +131,23 @@ jobs:
       # ffmpeg must be on PATH: the `stream --sample`/`clip`/`caption` paths probe for it
       # (require_ffmpeg) before doing their work, so without it those tests fail at the
       # probe rather than exercising the mocked run. PortAudio needs no install — the
-      # sounddevice wheel bundles it on Windows. choco ships on the runner.
+      # sounddevice wheel bundles it on Windows. choco ships on the runner but its download
+      # occasionally flakes (one matrix cell got ffmpeg, the other didn't), so retry and
+      # verify ffmpeg is callable here — a real miss fails this step instead of surfacing as
+      # confusing "ffmpeg not on PATH" test failures. The shim lands in choco's bin dir,
+      # already on the runner PATH, so later steps pick it up.
       - name: System deps (ffmpeg)
-        run: choco install ffmpeg --no-progress -y
+        shell: pwsh
+        run: |
+          $ErrorActionPreference = "Stop"
+          $env:PATH = "C:\ProgramData\chocolatey\bin;$env:PATH"
+          for ($i = 1; $i -le 3; $i++) {
+            choco install ffmpeg --no-progress -y
+            if (Get-Command ffmpeg -ErrorAction SilentlyContinue) { break }
+            Write-Host "ffmpeg not yet on PATH (attempt $i); retrying…"
+            Start-Sleep -Seconds 5
+          }
+          ffmpeg -version
 
       - name: Install uv
         run: python -m pip install uv

From 3885fa6c9d22a92e796a2d39c056ddaa4547db06 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 14 Jun 2026 00:56:47 +0000
Subject: [PATCH 6/6] feat(dictate): support push-to-talk hotkeys on Windows
 via msvcrt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

assembly dictate previously raised "not supported on this platform" on Windows
because core/hotkey.py is termios-only. Add a Windows backend behind the same
TerminalKeys interface: the console is already character-at-a-time, so there's no
cbreak mode to enter/restore, and read() polls msvcrt.kbhit()/getwch() (getwch
blocks for timeout=None, kbhit polls to a deadline otherwise) — mirroring the
POSIX select()+os.read() path. Enter (\r) is already in dictate's TOGGLE_KEYS.

Stays stdlib-only (msvcrt is in the stdlib). The win32-only members are loaded
via importlib and bound to typed Callable locals so mypy and pyright both accept
the module on the POSIX CI host. _on_windows() is an injectable predicate and the
backend reads through an injectable console, so the new tests drive the Windows
path (and give coverage/mutation) on Linux with a fake msvcrt.

https://claude.ai/code/session_01RZW2ga1Phmt4CQpkZp3LqE
---
 aai_cli/core/hotkey.py | 75 +++++++++++++++++++++++++++++++++---------
 tests/test_hotkey.py   | 72 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 128 insertions(+), 19 deletions(-)

diff --git a/aai_cli/core/hotkey.py b/aai_cli/core/hotkey.py
index fa0056ac..37b4ccab 100644
--- a/aai_cli/core/hotkey.py
+++ b/aai_cli/core/hotkey.py
@@ -1,18 +1,27 @@
 """Single-keypress input for hotkey-driven commands (`assembly dictate`).
 
-``TerminalKeys`` switches stdin into cbreak mode for the lifetime of a ``with``
-block, so individual keypresses arrive without Enter — while Ctrl-C still raises
-KeyboardInterrupt (cbreak keeps ISIG, unlike full raw mode). POSIX-only: there
-is no termios on Windows, so entering the context raises a clean CLIError there
-instead of an ImportError traceback. Stdlib-only on purpose, mirroring the other
+``TerminalKeys`` reads individual keypresses — without waiting for Enter — for the
+lifetime of a ``with`` block, while Ctrl-C still ends the program. One interface,
+two backends:
+
+- POSIX puts stdin into cbreak mode (termios/tty) and waits with ``select``; cbreak
+  keeps ISIG, so Ctrl-C raises KeyboardInterrupt instead of arriving as a byte.
+- Windows reads the console through stdlib ``msvcrt`` (``kbhit``/``getwch``), which is
+  already character-at-a-time, so there is no mode to enter or restore.
+
+A platform that is neither (no termios and not Windows) raises a clean CLIError rather
+than an ImportError traceback. Stdlib-only on purpose, mirroring the other
 non-rendering layers.
 """
 
 from __future__ import annotations
 
+import importlib
 import os
 import select
 import sys
+import time
+from collections.abc import Callable
 
 from aai_cli.core.errors import CLIError
 
@@ -21,6 +30,10 @@
 CTRL_D = "\x04"
 ESC = "\x1b"
 
+# How long the Windows key poll naps between kbhit() checks (msvcrt has no select()):
+# short enough to feel instant at the dictate prompt, long enough not to spin a core.
+_WINDOWS_POLL_INTERVAL = 0.01
+
 
 def _stdin_fd() -> int:
     """The stdin file descriptor, or -1 when stdin has none (a captured/replaced
@@ -32,19 +45,38 @@ def _stdin_fd() -> int:
         return -1
 
 
+def _on_windows() -> bool:
+    """True on Windows, where key input goes through msvcrt instead of termios. A
+    function (not a constant) so tests can drive the Windows backend on a POSIX host."""
+    return sys.platform == "win32"
+
+
 class TerminalKeys:
-    """Reads single keypresses from a terminal fd, cbreak-scoped via ``with``.
+    """Reads single keypresses from a terminal, scoped via ``with``.
 
-    The fd is injectable (tests drive it through a pty pair); it defaults to
-    the process's stdin.
+    The fd is injectable (POSIX tests drive it through a pty pair) and defaults to the
+    process's stdin; the Windows backend reads the console directly and ignores it.
     """
 
     def __init__(self, fd: int | None = None) -> None:
         self._fd = fd if fd is not None else _stdin_fd()
-        # termios.tcgetattr's attribute list (typeshed's exact shape).
+        # termios.tcgetattr's attribute list (typeshed's exact shape); stays None on
+        # Windows, where there is no saved terminal state to restore.
         self._saved: list[int | list[bytes | int]] | None = None
+        self._windows = _on_windows()
 
     def __enter__(self) -> TerminalKeys:
+        if not os.isatty(self._fd):
+            raise CLIError(
+                "This command needs an interactive terminal: it waits for hotkey presses on stdin.",
+                error_type="not_a_tty",
+                exit_code=2,
+                suggestion="Run it directly in a terminal, without piping or redirecting stdin.",
+            )
+        if self._windows:
+            # The Windows console is already character-at-a-time; there is no cbreak mode
+            # to enter or restore, so read() goes straight through msvcrt.
+            return self
         try:
             import termios
             import tty
@@ -54,13 +86,6 @@ def __enter__(self) -> TerminalKeys:
                 error_type="unsupported_platform",
                 exit_code=2,
             ) from exc
-        if not os.isatty(self._fd):
-            raise CLIError(
-                "This command needs an interactive terminal: it waits for hotkey presses on stdin.",
-                error_type="not_a_tty",
-                exit_code=2,
-                suggestion="Run it directly in a terminal, without piping or redirecting stdin.",
-            )
         self._saved = termios.tcgetattr(self._fd)
         tty.setcbreak(self._fd)
         return self
@@ -78,6 +103,8 @@ def read(self, timeout: float | None) -> str | None:
         ``timeout=None`` blocks until a key arrives; ``timeout=0`` polls without
         waiting (the in-recording check between audio chunks).
         """
+        if self._windows:
+            return self._read_windows(timeout)
         ready, _, _ = select.select([self._fd], [], [], timeout)
         if not ready:
             return None
@@ -85,3 +112,19 @@ def read(self, timeout: float | None) -> str | None:
         if not data:
             return None
         return data.decode("utf-8", "replace")
+
+    def _read_windows(self, timeout: float | None) -> str | None:
+        """Windows key read: getwch() blocks (timeout=None) or kbhit() polls to a deadline."""
+        msvcrt = importlib.import_module("msvcrt")
+        # Bind the win32-only members to typed locals: typeshed hides them off-Windows,
+        # so the type-checkers reject `msvcrt.getwch` directly but accept these.
+        kbhit: Callable[[], bool] = msvcrt.kbhit
+        getwch: Callable[[], str] = msvcrt.getwch
+        if timeout is None:
+            return getwch()
+        deadline = time.monotonic() + timeout  # pragma: no mutate
+        while not kbhit():
+            if time.monotonic() >= deadline:  # pragma: no mutate
+                return None
+            time.sleep(_WINDOWS_POLL_INTERVAL)  # pragma: no mutate
+        return getwch()
diff --git a/tests/test_hotkey.py b/tests/test_hotkey.py
index 3a138b1c..73f14465 100644
--- a/tests/test_hotkey.py
+++ b/tests/test_hotkey.py
@@ -9,12 +9,14 @@
 
 import pytest
 
+from aai_cli.core import hotkey
 from aai_cli.core.errors import CLIError
 from aai_cli.core.hotkey import TerminalKeys, _stdin_fd
 
-# termios and os.openpty are POSIX-only, so the whole module is skipped on Windows
-# (where TerminalKeys raises a clean CLIError rather than running). importorskip keeps
-# this out of the skip/xfail escape-hatch count the Linux gate tracks.
+# termios and os.openpty are POSIX-only, so the pty-driven tests below are skipped on
+# Windows. importorskip keeps that out of the skip/xfail escape-hatch count the Linux
+# gate tracks. The msvcrt-backend tests at the bottom inject a fake console, so they run
+# (and give coverage) on the POSIX CI host even though they exercise the Windows path.
 termios = pytest.importorskip("termios")
 
 
@@ -121,3 +123,67 @@ def fileno(self):
 
     monkeypatch.setattr(sys, "stdin", RealStdin())
     assert _stdin_fd() == 42
+
+
+# --- Windows (msvcrt) backend ------------------------------------------------
+# Driven on the POSIX CI host by forcing _on_windows() True and injecting a fake
+# console; the real msvcrt calls are thin, so this covers the branch logic.
+
+
+class _FakeMsvcrt:
+    """Stand-in for the stdlib msvcrt console API the Windows backend reads through."""
+
+    def __init__(self, *, ready_after: int = 0, char: str = "a") -> None:
+        self._until_ready = ready_after  # kbhit() returns False this many times first
+        self._char = char
+        self.getwch_calls = 0
+
+    def kbhit(self) -> bool:
+        if self._until_ready <= 0:
+            return True
+        self._until_ready -= 1
+        return False
+
+    def getwch(self) -> str:
+        self.getwch_calls += 1
+        return self._char
+
+
+@pytest.fixture
+def windows_backend(monkeypatch):
+    """Force the Windows code path on this POSIX host: msvcrt backend, fake console tty."""
+    monkeypatch.setattr(hotkey, "_on_windows", lambda: True)
+    monkeypatch.setattr(hotkey.os, "isatty", lambda _fd: True)
+    return monkeypatch
+
+
+def test_windows_backend_enters_without_cbreak_and_reads_keys(windows_backend):
+    windows_backend.setitem(sys.modules, "msvcrt", _FakeMsvcrt(char="a"))
+    keys = TerminalKeys(fd=5)
+    with keys as k:
+        assert k.read(None) == "a"  # timeout=None -> blocking getwch()
+        assert k.read(0) == "a"  # zero-timeout poll with a key already buffered
+    assert keys._saved is None  # no termios state is saved or restored on Windows
+
+
+def test_windows_backend_poll_returns_none_when_no_key(windows_backend):
+    windows_backend.setitem(sys.modules, "msvcrt", _FakeMsvcrt(ready_after=10**9))
+    with TerminalKeys(fd=5) as k:
+        assert k.read(0) is None  # nothing buffered + zero timeout -> immediate None
+
+
+def test_windows_backend_polls_with_naps_until_a_key_arrives(windows_backend):
+    naps: list[float] = []
+    windows_backend.setattr(hotkey.time, "sleep", lambda s: naps.append(s))
+    windows_backend.setitem(sys.modules, "msvcrt", _FakeMsvcrt(ready_after=2, char="z"))
+    with TerminalKeys(fd=5) as k:
+        assert k.read(5.0) == "z"
+    assert naps == [0.01, 0.01]  # napped between the two not-ready polls
+
+
+def test_windows_backend_non_tty_is_still_a_usage_error(windows_backend):
+    windows_backend.setattr(hotkey.os, "isatty", lambda _fd: False)
+    with pytest.raises(CLIError) as exc:
+        with TerminalKeys(fd=5):
+            pass
+    assert exc.value.error_type == "not_a_tty"