Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions .github/workflows/codeql-swift.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
name: CodeQL Swift

# The Swift analysis is split out of codeql.yml because it is two orders of
# magnitude slower than the interpreted-language scans: CodeQL must observe a
# real `swiftc` build (autobuild can't discover a bare helper script with no
# Xcode/SwiftPM project), and the traced build of the macOS system-audio
# helper takes ~25 minutes on a macOS runner.
#
# Triggers are deliberately narrower than codeql.yml:
# - path-filtered to the Swift source (and this workflow), so the ~25 macOS
# runner-minutes are only spent when Swift code can actually change the
# result;
# - no merge_group: the merge-queue ref is deleted the moment the PR merges,
# so any scan slower than the queue fails its SARIF upload with
# "ref not found" (this failed on 7 consecutive queued PRs). The push run
# on main scans the identical merge result instead;
# - the weekly sweep keeps the default-branch baseline fresh when new
# queries ship between Swift changes.
on:
pull_request:
branches: [main]
paths:
- "**/*.swift"
- ".github/workflows/codeql-swift.yml"
push:
branches: [main]
paths:
- "**/*.swift"
- ".github/workflows/codeql-swift.yml"
schedule:
- cron: "29 15 * * 2"

# Least privilege at the workflow level; the analyze job opts into the extra
# scopes CodeQL needs. Actions are pinned to commit SHAs (a moved tag can't
# silently change what runs); Dependabot keeps them current.
permissions:
contents: read

# Cancel superseded runs when new commits land on a PR/branch, but never cancel
# a main run (don't drop the scan that updates the default-branch baseline).
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}

jobs:
analyze:
name: analyze (swift)
runs-on: macos-latest
timeout-minutes: 45
permissions:
security-events: write # upload SARIF results to code scanning
actions: read # workflow metadata for run context on private repos
contents: read
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false # no job pushes; don't leave the token in .git/config

- name: Initialize CodeQL
uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
with:
languages: swift
build-mode: manual

# The same swiftc invocation scripts/check.sh uses.
- name: Build Swift audio helper
run: |
swiftc -parse-as-library aai_cli/streaming/macos_system_audio.swift \
-framework ScreenCaptureKit \
-framework AVFoundation \
-framework CoreMedia \
-framework CoreGraphics \
-o /tmp/aai-macos-audio-codeql

- name: Analyze
uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
with:
category: /language:swift
39 changes: 9 additions & 30 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ concurrency:
jobs:
analyze:
name: analyze (${{ matrix.language }})
runs-on: ${{ matrix.os }}
runs-on: ubuntu-latest
timeout-minutes: 30
permissions:
security-events: write # upload SARIF results to code scanning
Expand All @@ -38,24 +38,13 @@ jobs:
matrix:
# python: the CLI itself; actions: the workflows in .github/workflows;
# javascript-typescript: the committed `assembly init` template JS.
# Those three are interpreted languages, so build-mode none suffices.
# swift: the macOS system-audio helper. Swift is compiled, so CodeQL
# must observe a real build — and autobuild can't discover a bare
# helper script with no Xcode/SwiftPM project, so the build is manual
# (the same swiftc invocation scripts/check.sh uses) on a macOS runner.
include:
- language: python
os: ubuntu-latest
build-mode: none
- language: actions
os: ubuntu-latest
build-mode: none
- language: javascript-typescript
os: ubuntu-latest
build-mode: none
- language: swift
os: macos-latest
build-mode: manual
# All three are interpreted languages, so build-mode none suffices and
# each analysis finishes in about a minute. The Swift helper needs a
# ~25-minute traced build on a macOS runner, so it lives in
# codeql-swift.yml, path-filtered to Swift changes and kept out of the
# merge queue (the queue ref is deleted as soon as the PR merges, which
# makes any scan slower than the queue fail its SARIF upload).
language: [python, actions, javascript-typescript]
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
Expand All @@ -65,17 +54,7 @@ jobs:
uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
with:
languages: ${{ matrix.language }}
build-mode: ${{ matrix.build-mode }}

- name: Build Swift audio helper
if: matrix.build-mode == 'manual'
run: |
swiftc -parse-as-library aai_cli/streaming/macos_system_audio.swift \
-framework ScreenCaptureKit \
-framework AVFoundation \
-framework CoreMedia \
-framework CoreGraphics \
-o /tmp/aai-macos-audio-codeql
build-mode: none

- name: Analyze
uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
Expand Down
9 changes: 9 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,20 @@ Lessons that cost iterations getting the patch-coverage and mutation tail gates
with "No such option"; it's `assembly transcribe … --json`. (The root callback still sniffs the
whole token list via `argscan.requests_json`, so a callback-level failure like a bad
`--env` keeps the JSON error shape — but the flag itself lives on the subcommand.)
- **Tests that touch global logging state must snapshot/restore it** — root handlers/level
and per-logger levels are process-global, so a leak only fails on some pytest-randomly
seeds (green locally, red in CI). Opt in to the shared `preserve_logging_state` conftest
fixture (it also resets the websockets wire loggers a silencer test may have clamped)
instead of hand-rolling the snapshot per module.

### Manual QA / running the CLI in sandboxed sessions

Lessons that cost time in agent sessions — read before exercising `uv run assembly` by hand:

- **Check for in-flight duplicates before starting a fix.** Sessions run concurrently:
before implementing a bug fix or small feature, scan open PRs and the last few
`origin/main` commits touching the same files (two sessions once shipped the identical
fix; the slower PR was closed as redundant). Seconds of checking beats a discarded PR.
- **Web/remote containers are fully provisioned at session start**
(`.claude/hooks/session-start.sh`): system deps, `markdownlint`/`prettier`, and the Go
gate binaries (`actionlint`, `gitleaks`) are installed at CI's pinned versions, so
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ exclude = ["aai_cli/_version.py"]
min_confidence = 90
ignore_decorators = ["@app.command", "@app.callback"]
ignore_names = ["app", "capture_output", "download", "healthy", "ist", "memory_keyring", "org",
"refresh"]
"preserve_logging_state", "refresh"]

[tool.deptry]
exclude = ["docs", "dist", ".venv", "aai_cli/init/templates"]
Expand Down
28 changes: 28 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,34 @@ def fixed_render_size(monkeypatch):
monkeypatch.setenv("LINES", "40")


@pytest.fixture
def preserve_logging_state():
# Logging is process-global: root handlers and level, plus per-logger levels.
# A test that enables verbose diagnostics (debuglog.enable) or trips the
# realtime silencers would otherwise leak that state into unrelated tests —
# an order dependence pytest-randomly only exposes on some seeds (it cost a
# red CI round on PR #125). Named (not autouse): modules that touch global
# logging opt in. The websockets wire loggers are reset to NOTSET up front
# so a CRITICAL clamp left by an earlier test can't swallow records the
# opting test asserts on.
import logging

from aai_cli import ws as wsutil

root = logging.getLogger()
previous_handlers = list(root.handlers)
previous_level = root.level
wire_loggers = [logging.getLogger(name) for name in wsutil.WEBSOCKETS_LOGGERS]
previous_wire_levels = [logger.level for logger in wire_loggers]
for logger in wire_loggers:
logger.setLevel(logging.NOTSET)
yield
root.handlers[:] = previous_handlers
root.setLevel(previous_level)
for logger, level in zip(wire_loggers, previous_wire_levels, strict=True):
logger.setLevel(level)


@pytest.fixture(autouse=True)
def reset_active_environment():
# The active environment is a process-global (set at CLI startup); pin it to
Expand Down
25 changes: 4 additions & 21 deletions tests/test_debuglog.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,36 +11,19 @@
from typer.testing import CliRunner

from aai_cli import config, debuglog
from aai_cli import ws as wsutil
from aai_cli.context import AppState
from aai_cli.main import app

runner = CliRunner()


@pytest.fixture(autouse=True)
def reset_debuglog(monkeypatch):
# enable() mutates process-global state (the root logger and the module's
# verbosity/secret registries); snapshot and restore so pytest-randomly
# ordering can't leak a verbose run into unrelated tests.
root = logging.getLogger()
previous_handlers = list(root.handlers)
previous_level = root.level
# Logger levels are process-global too: any earlier test that exercised the
# realtime silencers left the websockets loggers clamped at CRITICAL, which
# would swallow the wire-level records asserted here. Reset them so these
# tests are order-independent under pytest-randomly, then restore.
wire_loggers = [logging.getLogger(name) for name in wsutil.WEBSOCKETS_LOGGERS]
previous_wire_levels = [logger.level for logger in wire_loggers]
for logger in wire_loggers:
logger.setLevel(logging.NOTSET)
def reset_debuglog(preserve_logging_state, monkeypatch):
# The shared conftest fixture snapshots the process-global logging state
# (root handlers/level, websockets wire-logger levels); enable() also
# mutates the module's own verbosity/secret registries, reset here.
monkeypatch.setattr(debuglog, "_verbosity", 0)
monkeypatch.setattr(debuglog, "_secrets", set())
yield
root.handlers[:] = previous_handlers
root.setLevel(previous_level)
for logger, level in zip(wire_loggers, previous_wire_levels, strict=True):
logger.setLevel(level)


def test_enable_zero_is_the_everyday_no_op():
Expand Down
Loading