diff --git a/aai_cli/client.py b/aai_cli/client.py index 0f4946fd..15ee5e26 100644 --- a/aai_cli/client.py +++ b/aai_cli/client.py @@ -263,7 +263,7 @@ def _export_vtt(transcript: _SubtitleTranscript, chars_per_caption: int | None) "id": lambda t: str(getattr(t, "id", "") or ""), "status": status_str, "utterances": _render_utterances, - "json": lambda t: json.dumps(transcript_json_payload(t), default=str), + "json": lambda t: jsonshape.dumps(transcript_json_payload(t)), } diff --git a/aai_cli/commands/caption/_exec.py b/aai_cli/commands/caption/_exec.py index 53a628ea..da3cb7bf 100644 --- a/aai_cli/commands/caption/_exec.py +++ b/aai_cli/commands/caption/_exec.py @@ -22,9 +22,9 @@ import assemblyai as aai from rich.markup import escape -from aai_cli import client, mediafile, output, youtube +from aai_cli import client, mediafile, output from aai_cli.context import AppState -from aai_cli.errors import CLIError, UsageError +from aai_cli.errors import CLIError @dataclass(frozen=True) @@ -122,34 +122,25 @@ def _fetch_srt(transcript: object, opts: CaptionOptions, *, json_mode: bool, qui def run_caption(opts: CaptionOptions, state: AppState, *, json_mode: bool) -> None: """Execute one `assembly caption` invocation from already-parsed flags.""" ffmpeg = mediafile.require_ffmpeg("burn captions into video") - if youtube.is_downloadable_url(opts.media): - # A media-page URL (YouTube, …) is downloaded once — always the full - # video, since the captions are burned into it. The download dir is - # temporary, so the default output lands in the current directory. - with tempfile.TemporaryDirectory(prefix="aai-caption-src-") as td: - with output.status("Downloading video…", json_mode=json_mode, quiet=state.quiet): - local = youtube.download_media(opts.media, Path(td), video=True) - out = opts.out if opts.out is not None else Path.cwd() / default_out_path(local).name - mediafile.validate_out(out, local) - _caption_and_emit(opts, local, out, ffmpeg, state, json_mode=json_mode) - return - if opts.media.startswith(("http://", "https://")): - raise UsageError( - "assembly caption can't fetch this URL; it captions a local file or a " - "media-page URL yt-dlp can download (YouTube, …).", - suggestion="Download the video first, then caption the local copy.", - ) - if "://" in opts.media: - # Path() would collapse the "//" and report a corrupted echo of the URL. - raise UsageError( - f"assembly caption needs a local file, not a URL: {opts.media}", - suggestion="Download the video first, then caption the local copy.", + # A media-page URL is downloaded once — always the full video, since the + # captions are burned into it. + with mediafile.resolve_media_source( + opts.media, + "caption", + fetch_clause="captions a local file or a media-page URL yt-dlp can download (YouTube, …)", + download_suggestion="Download the video first, then caption the local copy.", + video=True, + download_sections=None, + json_mode=json_mode, + quiet=state.quiet, + ) as (media, downloaded): + if not downloaded: + mediafile.validate_local_media(media, "caption", kind="video") + out = mediafile.default_output( + opts.out, media, downloaded=downloaded, namer=default_out_path ) - media = Path(opts.media) - mediafile.validate_local_media(media, "caption", kind="video") - out = opts.out if opts.out is not None else default_out_path(media) - mediafile.validate_out(out, media) - _caption_and_emit(opts, media, out, ffmpeg, state, json_mode=json_mode) + mediafile.validate_out(out, media) + _caption_and_emit(opts, media, out, ffmpeg, state, json_mode=json_mode) def _caption_and_emit( diff --git a/aai_cli/commands/clip/_exec.py b/aai_cli/commands/clip/_exec.py index aa8819fa..f143a0f4 100644 --- a/aai_cli/commands/clip/_exec.py +++ b/aai_cli/commands/clip/_exec.py @@ -20,7 +20,6 @@ from __future__ import annotations import json -import tempfile from dataclasses import dataclass from pathlib import Path from types import SimpleNamespace @@ -308,28 +307,26 @@ def run_clip(opts: ClipOptions, state: AppState, *, json_mode: bool) -> None: youtube.validate_video_flag(opts.media, video=opts.video) explicit = [clip_select.parse_range(value) for value in opts.ranges] ffmpeg = mediafile.require_ffmpeg("cut media") - if youtube.is_downloadable_url(opts.media): - # A media-page URL (YouTube, podcast page, …) is downloaded once — the - # audio track by default, the full video with --video so the clips carry - # video too — and clipped locally. The download dir is temporary, so the - # clips land in --out-dir or the current directory — never next to the - # temp file. - downloading = "Downloading video…" if opts.video else "Downloading audio…" - with tempfile.TemporaryDirectory(prefix="aai-clip-") as td: - with output.status(downloading, json_mode=json_mode, quiet=state.quiet): - local = youtube.download_media(opts.media, Path(td), video=opts.video) - out_dir = opts.out_dir if opts.out_dir is not None else Path.cwd() - _cut_and_emit(opts, local, out_dir, explicit, ffmpeg, state, json_mode=json_mode) - return - if opts.media.startswith(("http://", "https://")): - raise UsageError( - "assembly clip can't fetch this URL; it cuts a local file or a " - "media-page URL yt-dlp can download (YouTube, podcasts, …).", - suggestion="Download the media first, then clip the local copy.", - ) - media = Path(opts.media) - mediafile.validate_local_media(media, "clip") - _cut_and_emit(opts, media, opts.out_dir, explicit, ffmpeg, state, json_mode=json_mode) + # A media-page URL is downloaded once — the audio track by default, the full + # video with --video so the clips carry video too — and clipped locally. + with mediafile.resolve_media_source( + opts.media, + "clip", + fetch_clause="cuts a local file or a media-page URL yt-dlp can download (YouTube, podcasts, …)", + download_suggestion="Download the media first, then clip the local copy.", + video=opts.video, + download_sections=None, + json_mode=json_mode, + quiet=state.quiet, + ) as (media, downloaded): + if not downloaded: + mediafile.validate_local_media(media, "clip") + # A downloaded source lives in a temp dir, so its clips land in --out-dir + # or the current directory — never next to the vanishing temp file. + out_dir: Path | None = opts.out_dir + if downloaded and out_dir is None: + out_dir = Path.cwd() + _cut_and_emit(opts, media, out_dir, explicit, ffmpeg, state, json_mode=json_mode) def _cut_and_emit( diff --git a/aai_cli/commands/dub/_exec.py b/aai_cli/commands/dub/_exec.py index 617c031b..2f3a3793 100644 --- a/aai_cli/commands/dub/_exec.py +++ b/aai_cli/commands/dub/_exec.py @@ -340,52 +340,29 @@ def run_dub(opts: DubOptions, state: AppState, *, json_mode: bool) -> None: voice_plan = _VoicePlan(*dialogue.parse_voice_overrides(opts.voice)) youtube.validate_video_flag(opts.media, video=opts.video) youtube.validate_sections_flag(opts.media, opts.download_sections) - if youtube.is_downloadable_url(opts.media): - # A media-page URL (YouTube, podcast page, …) is downloaded once — the - # audio track by default, the full video with --video so the dub keeps - # the picture, only the --download-sections slices when given — and - # dubbed locally. ffmpeg is checked before the download so a missing - # dependency fails before any fetch. - ffmpeg = mediafile.require_ffmpeg("write the dubbed file") - downloading = "Downloading video…" if opts.video else "Downloading audio…" - with tempfile.TemporaryDirectory(prefix="aai-dub-src-") as td: - with output.status(downloading, json_mode=json_mode, quiet=state.quiet): - local = youtube.download_media( - opts.media, - Path(td), - video=opts.video, - download_sections=opts.download_sections, - ) - # The download dir is temporary, so the default output lands in the - # current directory — never next to the temp file. - out = ( - opts.out - if opts.out is not None - else Path.cwd() / default_out_path(local, language).name - ) - mediafile.validate_out(out, local) - _dub_and_emit( - opts, local, out, language, ffmpeg, voice_plan, state, json_mode=json_mode - ) - return - if opts.media.startswith(("http://", "https://")): - raise UsageError( - "assembly dub can't fetch this URL; it dubs a local file or a " - "media-page URL yt-dlp can download (YouTube, podcasts, …).", - suggestion="Download the media first, then dub the local copy.", - ) - if "://" in opts.media: - # Path() would collapse the "//" and report a corrupted echo of the URL. - raise UsageError( - f"assembly dub needs a local file, not a URL: {opts.media}", - suggestion="Download the media first, then dub the local copy.", - ) - media = Path(opts.media) - mediafile.validate_local_media(media, "dub") - out = opts.out if opts.out is not None else default_out_path(media, language) - mediafile.validate_out(out, media) + # ffmpeg is checked before any (billed) download/transcription so a missing + # dependency fails before any fetch. ffmpeg = mediafile.require_ffmpeg("write the dubbed file") - _dub_and_emit(opts, media, out, language, ffmpeg, voice_plan, state, json_mode=json_mode) + # A media-page URL is downloaded once — the audio track by default, the full + # video with --video so the dub keeps the picture, only the + # --download-sections slices when given — and dubbed locally. + with mediafile.resolve_media_source( + opts.media, + "dub", + fetch_clause="dubs a local file or a media-page URL yt-dlp can download (YouTube, podcasts, …)", + download_suggestion="Download the media first, then dub the local copy.", + video=opts.video, + download_sections=opts.download_sections, + json_mode=json_mode, + quiet=state.quiet, + ) as (media, downloaded): + if not downloaded: + mediafile.validate_local_media(media, "dub") + out = mediafile.default_output( + opts.out, media, downloaded=downloaded, namer=lambda m: default_out_path(m, language) + ) + mediafile.validate_out(out, media) + _dub_and_emit(opts, media, out, language, ffmpeg, voice_plan, state, json_mode=json_mode) def _dub_and_emit( diff --git a/aai_cli/errors.py b/aai_cli/errors.py index 05bbe129..2ab5f76b 100644 --- a/aai_cli/errors.py +++ b/aai_cli/errors.py @@ -19,6 +19,8 @@ from __future__ import annotations +from aai_cli import jsonshape + class CLIError(Exception): """Base error carrying an exit code, a machine-readable type, and an optional @@ -41,11 +43,15 @@ def __init__( self.suggestion = suggestion def to_dict(self) -> dict[str, object]: - body: dict[str, object] = {"type": self.error_type, "message": self.message} - if self.suggestion is not None: - body["suggestion"] = self.suggestion - if self.transcript_id is not None: - body["transcript_id"] = self.transcript_id + # suggestion/transcript_id are omitted entirely when unset (not null). + body = jsonshape.compact( + { + "type": self.error_type, + "message": self.message, + "suggestion": self.suggestion, + "transcript_id": self.transcript_id, + } + ) return {"error": body} diff --git a/aai_cli/jsonshape.py b/aai_cli/jsonshape.py index cbd03eb5..eed79c71 100644 --- a/aai_cli/jsonshape.py +++ b/aai_cli/jsonshape.py @@ -1,5 +1,7 @@ from __future__ import annotations +import json + from pydantic import TypeAdapter, ValidationError _JSON_OBJECT: TypeAdapter[dict[str, object]] = TypeAdapter(dict[str, object]) @@ -66,3 +68,25 @@ def as_float(value: object, default: float = 0.0) -> float: return _FLOAT.validate_python(value) except ValidationError: return default + + +def dumps(obj: object) -> str: + """Serialize ``obj`` to a JSON string the way the whole CLI does it. + + ``default=str`` is the one safety the CLI relies on everywhere it emits JSON: + pydantic/SDK models and ``datetime``\\s that aren't natively serializable fall + back to ``str(...)`` instead of raising. Centralized here so every emission + path (``output``'s stdout/stderr writers, the realtime ``BaseRenderer``, the + ``--out`` and ``-o json`` field renderers) shares one serialization policy. + """ + return json.dumps(obj, default=str) + + +def compact(mapping: dict[str, object]) -> dict[str, object]: + """Return ``mapping`` without the keys whose value is ``None``. + + For JSON payloads where an absent optional field should be omitted entirely + rather than serialized as ``null`` — the build-then-``if x is not None`` + idiom repeated across the error and realtime-event payloads. + """ + return {key: value for key, value in mapping.items() if value is not None} diff --git a/aai_cli/mediafile.py b/aai_cli/mediafile.py index 69ee503d..45f628a3 100644 --- a/aai_cli/mediafile.py +++ b/aai_cli/mediafile.py @@ -9,13 +9,16 @@ from __future__ import annotations +import contextlib import shutil import subprocess +import tempfile +from collections.abc import Callable, Generator from pathlib import Path import assemblyai as aai -from aai_cli import client, output +from aai_cli import client, output, youtube from aai_cli.errors import APIError, CLIError, UsageError @@ -39,6 +42,69 @@ def validate_local_media(media: Path, command: str, *, kind: str = "audio/video" ) +@contextlib.contextmanager +def resolve_media_source( + media: str, + command: str, + *, + fetch_clause: str, + download_suggestion: str, + video: bool, + download_sections: list[str] | None, + json_mode: bool, + quiet: bool, +) -> Generator[tuple[Path, bool]]: + """Resolve the ``media`` argument to a local file for the body of the ``with``. + + The three-way source handling shared verbatim by ``caption``/``clip``/``dub`` + (only the wording differs): + + * a media-page URL (YouTube, …) is downloaded into a temp dir that stays open + until the block exits, yielding ``(path, True)`` — so the caller must resolve + any default output *away* from that vanishing temp dir; + * any other ``http(s)://`` URL is rejected (the API/yt-dlp can't fetch it), as + is a bare ``scheme://`` (``Path()`` would mangle the ``//`` into a corrupted + echo of the URL); + * a local path yields ``(Path(media), False)`` unvalidated — the caller checks + it with ``validate_local_media`` (the ``kind`` differs per command). + + ``fetch_clause`` completes "assembly can't fetch this URL; it …" and + ``download_suggestion`` is the shared "Download … first" hint on both rejects. + """ + if youtube.is_downloadable_url(media): + download_label = "Downloading video…" if video else "Downloading audio…" + with tempfile.TemporaryDirectory(prefix=f"aai-{command}-src-") as td: + with output.status(download_label, json_mode=json_mode, quiet=quiet): + local = youtube.download_media( + media, Path(td), video=video, download_sections=download_sections + ) + yield local, True + return + if media.startswith(("http://", "https://")): + raise UsageError( + f"assembly {command} can't fetch this URL; it {fetch_clause}.", + suggestion=download_suggestion, + ) + if "://" in media: + raise UsageError( + f"assembly {command} needs a local file, not a URL: {media}", + suggestion=download_suggestion, + ) + yield Path(media), False + + +def default_output( + out: Path | None, media: Path, *, downloaded: bool, namer: Callable[[Path], Path] +) -> Path: + """The output path for a media command: an explicit ``--out`` wins; otherwise + ``namer(media)`` next to the source — or, when ``media`` is a vanishing + downloaded temp file, that same name dropped into the current directory.""" + if out is not None: + return out + chosen = namer(media) + return Path.cwd() / chosen.name if downloaded else chosen + + def validate_out(out: Path, media: Path) -> None: """An unwritable or self-overwriting output file must fail here, before the billed transcription/translation/synthesis pipeline runs. diff --git a/aai_cli/output.py b/aai_cli/output.py index 2354906e..8f3956f8 100644 --- a/aai_cli/output.py +++ b/aai_cli/output.py @@ -1,7 +1,6 @@ from __future__ import annotations import contextlib -import json import os import sys from collections.abc import Callable, Generator @@ -13,7 +12,7 @@ from rich.table import Table from rich.text import Text -from aai_cli import __version__, choices, theme +from aai_cli import __version__, choices, jsonshape, theme if TYPE_CHECKING: from aai_cli.errors import CLIError @@ -200,14 +199,14 @@ def stack(*items: RenderableType | None) -> RenderableType: def emit[T](data: T, human_renderer: Callable[[T], object], *, json_mode: bool) -> None: if json_mode: - print(json.dumps(data, default=str)) + print(jsonshape.dumps(data)) else: console.print(human_renderer(data)) def emit_ndjson(obj: object) -> None: """Write one newline-delimited JSON record to stdout, flushed for live pipelines.""" - print(json.dumps(obj, default=str), flush=True) + print(jsonshape.dumps(obj), flush=True) def emit_text(text: str) -> None: @@ -239,7 +238,7 @@ def emit_warning(message: str, *, json_mode: bool) -> None: clean and stderr machine-readable. Human mode gets the familiar yellow line. """ if json_mode: - print(json.dumps({"warning": message}, default=str), file=sys.stderr) + print(jsonshape.dumps({"warning": message}), file=sys.stderr) else: error_console.print(warn(message)) @@ -247,7 +246,7 @@ def emit_warning(message: str, *, json_mode: bool) -> None: def emit_error(err: CLIError, *, json_mode: bool) -> None: # Always to stderr, so stdout stays clean for `assembly … | next-tool` pipelines. if json_mode: - print(json.dumps(err.to_dict(), default=str), file=sys.stderr) + print(jsonshape.dumps(err.to_dict()), file=sys.stderr) else: error_console.print(f"[aai.error]Error:[/aai.error] {escape(err.message)}") if err.suggestion: diff --git a/aai_cli/render.py b/aai_cli/render.py index 37e9bd63..5f7a18cd 100644 --- a/aai_cli/render.py +++ b/aai_cli/render.py @@ -1,6 +1,5 @@ from __future__ import annotations -import json import sys from typing import TextIO @@ -8,7 +7,7 @@ from rich.live import Live from rich.text import Text -from aai_cli import theme +from aai_cli import jsonshape, theme class BaseRenderer: @@ -44,8 +43,8 @@ def _status(self, message: str) -> None: # --- JSON output (plain text; preserves BrokenPipe for `| head`) ------- def _emit(self, obj: object) -> None: - """Write one NDJSON event (default=str matches output.emit_ndjson's safety).""" - self._write(json.dumps(obj, default=str) + "\n") + """Write one NDJSON event (jsonshape.dumps is the CLI-wide JSON policy).""" + self._write(jsonshape.dumps(obj) + "\n") def _write(self, text: str) -> None: try: diff --git a/aai_cli/streaming/render.py b/aai_cli/streaming/render.py index ca04a959..3e7de9d6 100644 --- a/aai_cli/streaming/render.py +++ b/aai_cli/streaming/render.py @@ -6,7 +6,7 @@ from rich.console import Console from rich.text import Text -from aai_cli import theme +from aai_cli import jsonshape, theme from aai_cli.render import BaseRenderer # Source label -> (display text, Rich style). System audio borrows the agent color; @@ -115,13 +115,15 @@ def turn(self, event: object, *, source: str | None = None) -> None: speaker = getattr(event, "speaker_label", None) # set when --speaker-labels diarizes with self._lock: if self.json_mode: - payload: dict[str, object] = { - "type": "turn", - "transcript": text, - "end_of_turn": end, - } - if speaker is not None: - payload["speaker"] = speaker + # speaker is omitted entirely when undiarized (not null). + payload = jsonshape.compact( + { + "type": "turn", + "transcript": text, + "end_of_turn": end, + "speaker": speaker, + } + ) self._emit(self._with_source(payload, source)) elif self.text_mode: if end and text: diff --git a/aai_cli/transcribe_exec.py b/aai_cli/transcribe_exec.py index 673ab1a7..77d03cd1 100644 --- a/aai_cli/transcribe_exec.py +++ b/aai_cli/transcribe_exec.py @@ -7,7 +7,6 @@ from __future__ import annotations -import json import os import tempfile from dataclasses import dataclass @@ -22,6 +21,7 @@ client, code_gen, config_builder, + jsonshape, llm, output, remotefs, @@ -146,7 +146,7 @@ def out_payload( transcript, output_field, chars_per_caption=chars_per_caption ) if json_mode: - return json.dumps(client.transcript_json_payload(transcript), default=str) + return jsonshape.dumps(client.transcript_json_payload(transcript)) return client.select_transcript_field(transcript, choices.TranscriptOutput.text) diff --git a/tests/test_caption_exec.py b/tests/test_caption_exec.py index 8f4f41c5..3785852e 100644 --- a/tests/test_caption_exec.py +++ b/tests/test_caption_exec.py @@ -177,6 +177,7 @@ def test_run_caption_rejects_non_downloadable_url(fake_ffmpeg): with pytest.raises(UsageError) as exc: _run(opts, json_mode=False) assert "assembly caption can't fetch this URL" in exc.value.message + assert "captions a local file" in exc.value.message assert "Download the video first" in (exc.value.suggestion or "") @@ -345,9 +346,11 @@ def fake_download(monkeypatch: pytest.MonkeyPatch): """Stand in for yt-dlp: 'download' a fixed video file into the temp dir.""" seen: dict[str, object] = {} - def download(url, dest_dir, *, video=False): + def download(url, dest_dir, *, video=False, download_sections=None): seen["url"] = url seen["video"] = video + seen["download_sections"] = download_sections + seen["dest_dir"] = dest_dir path = dest_dir / "vid123.mp4" path.write_bytes(b"\x00video") seen["path"] = path @@ -363,9 +366,12 @@ def test_run_caption_youtube_downloads_the_full_video( monkeypatch.chdir(tmp_path) opts = dataclasses.replace(DEFAULTS, media=YT_URL) _run(opts, json_mode=True) - # Captions are burned into the picture, so the download is always the video. + # Captions are burned into the picture, so the download is always the video, + # never a section slice, into the command's own source temp dir. assert fake_download["url"] == YT_URL assert fake_download["video"] is True + assert fake_download["download_sections"] is None + assert Path(fake_download["dest_dir"]).name.startswith("aai-caption-src-") assert fake_transcribe["audio"] == str(fake_download["path"]) # ffmpeg reads the downloaded temp file; the default output lands in the cwd, # named after the download (the temp dir is gone after the run). diff --git a/tests/test_clip_exec.py b/tests/test_clip_exec.py index c2a8169e..98e97ec1 100644 --- a/tests/test_clip_exec.py +++ b/tests/test_clip_exec.py @@ -92,6 +92,17 @@ def test_run_clip_rejects_non_downloadable_url(): with pytest.raises(UsageError) as exc: clip_exec.run_clip(opts, AppState(), json_mode=False) assert "can't fetch this URL" in exc.value.message + assert "cuts a local file" in exc.value.message + assert "Download the media first" in (exc.value.suggestion or "") + + +def test_run_clip_rejects_remote_urls_with_the_url_intact(): + # Path() would collapse "//" and echo a corrupted "s3:/bucket/…" back, so a + # bare scheme:// is rejected outright (same handling as caption/dub). + opts = dataclasses.replace(DEFAULTS, media="s3://bucket/talk.mp4", ranges=["1-2"]) + with pytest.raises(UsageError) as exc: + clip_exec.run_clip(opts, AppState(), json_mode=False) + assert "s3://bucket/talk.mp4" in exc.value.message assert "Download the media first" in (exc.value.suggestion or "") diff --git a/tests/test_clip_sources.py b/tests/test_clip_sources.py index b4f9a194..6374ef57 100644 --- a/tests/test_clip_sources.py +++ b/tests/test_clip_sources.py @@ -39,9 +39,11 @@ def fake_download(monkeypatch): """Stand in for yt-dlp: 'download' a fixed media file into the temp dir.""" seen: dict[str, object] = {} - def download(url, dest_dir, *, video=False): + def download(url, dest_dir, *, video=False, download_sections=None): seen["url"] = url seen["video"] = video + seen["download_sections"] = download_sections + seen["dest_dir"] = dest_dir path = dest_dir / ("vid123.mp4" if video else "vid123.m4a") path.write_bytes(b"\x00media") seen["path"] = path @@ -61,6 +63,9 @@ def test_run_clip_downloads_youtube_audio_into_cwd( opts = dataclasses.replace(DEFAULTS, media=YT_URL, ranges=["1-2"]) clip_exec.run_clip(opts, AppState(), json_mode=True) assert fake_download["url"] == YT_URL + # No section slicing, into the command's own source temp dir. + assert fake_download["download_sections"] is None + assert Path(fake_download["dest_dir"]).name.startswith("aai-clip-src-") # ffmpeg reads the downloaded temp file; the clip lands in the cwd, named # after the download (the temp dir is gone after the run). assert fake_ffmpeg[1][6] == str(fake_download["path"]) diff --git a/tests/test_dub_exec.py b/tests/test_dub_exec.py index dff55a1a..7db72e9d 100644 --- a/tests/test_dub_exec.py +++ b/tests/test_dub_exec.py @@ -229,10 +229,12 @@ def test_run_dub_refuses_to_overwrite_the_input(sandbox, media): assert "overwrite the input file" in exc.value.message -def test_run_dub_rejects_remote_urls_with_the_url_intact(sandbox): +def test_run_dub_rejects_remote_urls_with_the_url_intact(sandbox, monkeypatch): # http(s) URLs are downloaded (or rejected by the yt-dlp branch); a bucket # URL would otherwise reach Path(), which collapses "//" and echoes a - # corrupted "s3:/bucket/…" back. + # corrupted "s3:/bucket/…" back. ffmpeg is checked first (as in caption), so + # stub it present to reach the URL classification. + monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/ffmpeg") url = "s3://bucket/talk.mp4" opts = dataclasses.replace(DEFAULTS, media=url) with pytest.raises(UsageError) as exc: diff --git a/tests/test_dub_sources.py b/tests/test_dub_sources.py index 55bd6d84..49f07c44 100644 --- a/tests/test_dub_sources.py +++ b/tests/test_dub_sources.py @@ -70,6 +70,7 @@ def download(url, dest_dir, *, video=False, download_sections=None): seen["url"] = url seen["video"] = video seen["download_sections"] = download_sections + seen["dest_dir"] = dest_dir path = dest_dir / ("vid123.mp4" if video else "vid123.m4a") path.write_bytes(b"\x00media") seen["path"] = path @@ -101,6 +102,7 @@ def test_run_dub_youtube_downloads_and_dubs_into_cwd( assert fake_download["url"] == YT_URL assert fake_download["video"] is False assert fake_download["download_sections"] == [] + assert Path(fake_download["dest_dir"]).name.startswith("aai-dub-src-") assert fake_transcribe["audio"] == str(fake_download["path"]) # ffmpeg muxes over the downloaded file; the default output lands in the cwd, # named after the download (the temp dir is gone after the run). @@ -223,4 +225,5 @@ def test_run_dub_rejects_non_downloadable_url(monkeypatch): with pytest.raises(UsageError) as exc: _run(opts, json_mode=False) assert "assembly dub can't fetch this URL" in exc.value.message + assert "dubs a local file" in exc.value.message assert "Download the media first" in (exc.value.suggestion or "") diff --git a/tests/test_jsonshape.py b/tests/test_jsonshape.py index 16b6653f..6e69b163 100644 --- a/tests/test_jsonshape.py +++ b/tests/test_jsonshape.py @@ -1,3 +1,5 @@ +import datetime + from aai_cli import jsonshape @@ -36,3 +38,22 @@ def test_as_float_coerces_scalars_and_defaults(): assert jsonshape.as_float("bad") == 0.0 assert jsonshape.as_float(object()) == 0.0 assert jsonshape.as_float(None, default=-1.0) == -1.0 + + +def test_dumps_round_trips_plain_json(): + assert jsonshape.dumps({"a": 1, "b": [2, 3]}) == '{"a": 1, "b": [2, 3]}' + + +def test_dumps_falls_back_to_str_for_unserializable_values(): + # A datetime isn't natively JSON-serializable; default=str must stringify it + # instead of raising — the safety every CLI emission path depends on. + moment = datetime.datetime(2026, 6, 13, 14, 0, 0) + assert jsonshape.dumps({"at": moment}) == '{"at": "2026-06-13 14:00:00"}' + + +def test_compact_drops_only_none_values(): + assert jsonshape.compact({"keep": 0, "blank": "", "false": False, "drop": None}) == { + "keep": 0, + "blank": "", + "false": False, + }