diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 816322c..60910e5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,25 +37,35 @@ jobs: test: name: Tests - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: + os: [ubuntu-latest] python-version: ["3.10", "3.11", "3.12"] + include: + # Smoke-test on Windows to catch POSIX-only regressions (e.g. SIGALRM). + - os: windows-latest + python-version: "3.11" + # Smoke-test on macOS — primary developer platform, catches BSD/Darwin quirks. + - os: macos-latest + python-version: "3.11" steps: - uses: actions/checkout@v4 - - name: Install uv + - name: Install uv and Python ${{ matrix.python-version }} uses: astral-sh/setup-uv@v7 with: version: "latest" - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: python-version: ${{ matrix.python-version }} - name: Install dependencies - run: uv sync --extra dev + # Force uv-managed (python-build-standalone) Python. The runner's + # pre-installed Python on macOS is compiled without + # --enable-loadable-sqlite-extensions, so sqlite-vec fails to load. + # python-build-standalone enables loadable sqlite extensions on + # every platform. + run: uv sync --extra dev --python-preference=only-managed - name: Run tests run: uv run pytest -v --cov --cov-config=pyproject.toml --cov-report=xml @@ -65,7 +75,7 @@ jobs: - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 - if: matrix.python-version == '3.11' + if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11' with: files: ./coverage.xml fail_ci_if_error: false diff --git a/librarian/cli.py b/librarian/cli.py index 17bf92d..5f879b9 100644 --- a/librarian/cli.py +++ b/librarian/cli.py @@ -662,8 +662,13 @@ def add_source( rprint(f" [red]Error:[/red] {err.get('path', '')}: {err.get('error', '')}") skipped = result.get("skipped", 0) + status_line = ( + "[red]Source added, but indexing had errors.[/red]" + if errors + else "[green]Source added and indexed![/green]" + ) summary = ( - f"[green]Source added and indexed![/green]\n\n" + f"{status_line}\n\n" f"Name: [cyan]{source['name']}[/cyan]\n" f"Path: [blue]{source_path}[/blue]\n" f"Files found: [yellow]{len(files_to_index)}[/yellow]\n" @@ -675,6 +680,8 @@ def add_source( summary += f"\nErrors: [red]{len(errors)}[/red]" rprint(Panel(summary, title="Source Added")) + if errors: + raise typer.Exit(1) # ============================================================================= diff --git a/librarian/processing/parsers/base.py b/librarian/processing/parsers/base.py index 2a66675..4f92095 100644 --- a/librarian/processing/parsers/base.py +++ b/librarian/processing/parsers/base.py @@ -8,6 +8,8 @@ import logging import signal from abc import ABC, abstractmethod +from collections.abc import Iterator +from contextlib import contextmanager from pathlib import Path from librarian.types import ParsedDocument @@ -31,6 +33,30 @@ def _timeout_handler(signum: int, frame: object) -> None: raise FileReadTimeoutError("File read timed out") +@contextmanager +def _read_timeout(timeout: int) -> Iterator[None]: + """Apply a POSIX alarm-based read timeout when available; no-op otherwise. + + Windows Python does not expose ``signal.SIGALRM`` / ``signal.alarm``, so we + fall back to an unbounded read on those platforms rather than refusing to + parse the file at all. + """ + sigalrm = getattr(signal, "SIGALRM", None) + alarm = getattr(signal, "alarm", None) + if sigalrm is None or alarm is None: + yield + return + + old_handler = signal.getsignal(sigalrm) + signal.signal(sigalrm, _timeout_handler) + alarm(timeout) + try: + yield + finally: + alarm(0) + signal.signal(sigalrm, old_handler) + + def safe_read_text( file_path: Path, timeout: int = DEFAULT_READ_TIMEOUT, @@ -61,21 +87,14 @@ def safe_read_text( msg = f"File not found: {file_path}" raise FileNotFoundError(msg) - old_handler = signal.getsignal(signal.SIGALRM) - content: str | None = None try: - signal.signal(signal.SIGALRM, _timeout_handler) - signal.alarm(timeout) - - try: - content = file_path.read_text(encoding=encoding) - except UnicodeDecodeError: - if fallback_encoding: - content = file_path.read_text(encoding=fallback_encoding) - else: + with _read_timeout(timeout): + try: + return file_path.read_text(encoding=encoding) + except UnicodeDecodeError: + if fallback_encoding: + return file_path.read_text(encoding=fallback_encoding) raise - - signal.alarm(0) except FileReadTimeoutError as e: raise FileReadTimeoutError( f"Timed out reading {file_path} after {timeout}s " @@ -87,11 +106,6 @@ def safe_read_text( raise FileReadError(f"Permission denied: {file_path}") from e except OSError as e: raise FileReadError(f"Cannot read {file_path}: {e}") from e - finally: - signal.alarm(0) - signal.signal(signal.SIGALRM, old_handler) - - return content def safe_read_bytes( @@ -117,13 +131,9 @@ def safe_read_bytes( msg = f"File not found: {file_path}" raise FileNotFoundError(msg) - old_handler = signal.getsignal(signal.SIGALRM) - content: bytes | None = None try: - signal.signal(signal.SIGALRM, _timeout_handler) - signal.alarm(timeout) - content = file_path.read_bytes() - signal.alarm(0) + with _read_timeout(timeout): + return file_path.read_bytes() except FileReadTimeoutError as e: raise FileReadTimeoutError( f"Timed out reading {file_path} after {timeout}s " @@ -135,11 +145,6 @@ def safe_read_bytes( raise FileReadError(f"Permission denied: {file_path}") from e except OSError as e: raise FileReadError(f"Cannot read {file_path}: {e}") from e - finally: - signal.alarm(0) - signal.signal(signal.SIGALRM, old_handler) - - return content class BaseParser(ABC): diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..bd42a9b --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,46 @@ +"""Tests for CLI behavior.""" + +from pathlib import Path +from typing import Any + +import pytest +from typer.testing import CliRunner + +from librarian import cli + + +def test_add_directory_exits_nonzero_when_indexing_errors( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Directory add should fail automation when indexing reports errors.""" + docs_dir = tmp_path / "docs" + docs_dir.mkdir() + fixture = docs_dir / "fixture.md" + fixture.write_text("# Synthetic Markdown Fixture\n\nContent.", encoding="utf-8") + + config_dir = tmp_path / "config" + monkeypatch.setattr(cli, "CONFIG_DIR", config_dir) + monkeypatch.setattr(cli, "SOURCES_FILE", config_dir / "sources.json") + monkeypatch.setattr(cli, "SETTINGS_FILE", config_dir / "settings.json") + monkeypatch.setattr(cli, "_get_config", lambda: {"ensure_directories": lambda: None}) + + async def fake_server_ingest(context: Any, directory: str) -> dict[str, Any]: + return { + "directory": directory, + "total_files": 1, + "indexed": 0, + "updated": 0, + "skipped": 0, + "errors": [{"path": str(fixture), "error": "parser exploded"}], + "files": [], + } + + monkeypatch.setattr("librarian.server.index_directory_to_library", fake_server_ingest) + + # Force a wide terminal so Rich does not wrap the error message off-screen. + monkeypatch.setenv("COLUMNS", "500") + result = CliRunner().invoke(cli.app, ["add", str(docs_dir), "--verbose"]) + + assert result.exit_code == 1 + assert "Errors:" in result.output + assert "parser exploded" in result.output diff --git a/tests/test_file_read_errors.py b/tests/test_file_read_errors.py index 7f36c35..a7ff613 100644 --- a/tests/test_file_read_errors.py +++ b/tests/test_file_read_errors.py @@ -29,6 +29,20 @@ def test_read_existing_file(self, tmp_path: Path) -> None: content = safe_read_text(test_file) assert content == "hello world" + def test_read_existing_file_without_sigalrm( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Test reading text on platforms without POSIX SIGALRM.""" + import signal + + test_file = tmp_path / "test.md" + test_file.write_text("# Hello\nworld", encoding="utf-8") + + monkeypatch.delattr(signal, "SIGALRM", raising=False) + + content = safe_read_text(test_file) + assert content == "# Hello\nworld" + def test_file_not_found(self, tmp_path: Path) -> None: """Test FileNotFoundError for missing files.""" missing = tmp_path / "nonexistent.md" @@ -99,6 +113,20 @@ def test_read_existing_file(self, tmp_path: Path) -> None: content = safe_read_bytes(test_file) assert content == b"\x00\x01\x02" + def test_read_existing_file_without_sigalrm( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Test reading bytes on platforms without POSIX SIGALRM.""" + import signal + + test_file = tmp_path / "test.bin" + test_file.write_bytes(b"\x00\x01\x02") + + monkeypatch.delattr(signal, "SIGALRM", raising=False) + + content = safe_read_bytes(test_file) + assert content == b"\x00\x01\x02" + def test_file_not_found(self, tmp_path: Path) -> None: """Test FileNotFoundError for missing files.""" missing = tmp_path / "nonexistent.bin"