Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion aai_cli/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def _export_vtt(transcript: _SubtitleTranscript, chars_per_caption: int | None)
"id": lambda t: str(getattr(t, "id", "") or ""),
"status": status_str,
"utterances": _render_utterances,
"json": lambda t: json.dumps(transcript_json_payload(t), default=str),
"json": lambda t: jsonshape.dumps(transcript_json_payload(t)),
}


Expand Down
49 changes: 20 additions & 29 deletions aai_cli/commands/caption/_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
import assemblyai as aai
from rich.markup import escape

from aai_cli import client, mediafile, output, youtube
from aai_cli import client, mediafile, output
from aai_cli.context import AppState
from aai_cli.errors import CLIError, UsageError
from aai_cli.errors import CLIError


@dataclass(frozen=True)
Expand Down Expand Up @@ -122,34 +122,25 @@ def _fetch_srt(transcript: object, opts: CaptionOptions, *, json_mode: bool, qui
def run_caption(opts: CaptionOptions, state: AppState, *, json_mode: bool) -> None:
"""Execute one `assembly caption` invocation from already-parsed flags."""
ffmpeg = mediafile.require_ffmpeg("burn captions into video")
if youtube.is_downloadable_url(opts.media):
# A media-page URL (YouTube, …) is downloaded once — always the full
# video, since the captions are burned into it. The download dir is
# temporary, so the default output lands in the current directory.
with tempfile.TemporaryDirectory(prefix="aai-caption-src-") as td:
with output.status("Downloading video…", json_mode=json_mode, quiet=state.quiet):
local = youtube.download_media(opts.media, Path(td), video=True)
out = opts.out if opts.out is not None else Path.cwd() / default_out_path(local).name
mediafile.validate_out(out, local)
_caption_and_emit(opts, local, out, ffmpeg, state, json_mode=json_mode)
return
if opts.media.startswith(("http://", "https://")):
raise UsageError(
"assembly caption can't fetch this URL; it captions a local file or a "
"media-page URL yt-dlp can download (YouTube, …).",
suggestion="Download the video first, then caption the local copy.",
)
if "://" in opts.media:
# Path() would collapse the "//" and report a corrupted echo of the URL.
raise UsageError(
f"assembly caption needs a local file, not a URL: {opts.media}",
suggestion="Download the video first, then caption the local copy.",
# A media-page URL is downloaded once — always the full video, since the
# captions are burned into it.
with mediafile.resolve_media_source(
opts.media,
"caption",
fetch_clause="captions a local file or a media-page URL yt-dlp can download (YouTube, …)",
download_suggestion="Download the video first, then caption the local copy.",
video=True,
download_sections=None,
json_mode=json_mode,
quiet=state.quiet,
) as (media, downloaded):
if not downloaded:
mediafile.validate_local_media(media, "caption", kind="video")
out = mediafile.default_output(
opts.out, media, downloaded=downloaded, namer=default_out_path
)
media = Path(opts.media)
mediafile.validate_local_media(media, "caption", kind="video")
out = opts.out if opts.out is not None else default_out_path(media)
mediafile.validate_out(out, media)
_caption_and_emit(opts, media, out, ffmpeg, state, json_mode=json_mode)
mediafile.validate_out(out, media)
_caption_and_emit(opts, media, out, ffmpeg, state, json_mode=json_mode)


def _caption_and_emit(
Expand Down
43 changes: 20 additions & 23 deletions aai_cli/commands/clip/_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from __future__ import annotations

import json
import tempfile
from dataclasses import dataclass
from pathlib import Path
from types import SimpleNamespace
Expand Down Expand Up @@ -308,28 +307,26 @@ def run_clip(opts: ClipOptions, state: AppState, *, json_mode: bool) -> None:
youtube.validate_video_flag(opts.media, video=opts.video)
explicit = [clip_select.parse_range(value) for value in opts.ranges]
ffmpeg = mediafile.require_ffmpeg("cut media")
if youtube.is_downloadable_url(opts.media):
# A media-page URL (YouTube, podcast page, …) is downloaded once — the
# audio track by default, the full video with --video so the clips carry
# video too — and clipped locally. The download dir is temporary, so the
# clips land in --out-dir or the current directory — never next to the
# temp file.
downloading = "Downloading video…" if opts.video else "Downloading audio…"
with tempfile.TemporaryDirectory(prefix="aai-clip-") as td:
with output.status(downloading, json_mode=json_mode, quiet=state.quiet):
local = youtube.download_media(opts.media, Path(td), video=opts.video)
out_dir = opts.out_dir if opts.out_dir is not None else Path.cwd()
_cut_and_emit(opts, local, out_dir, explicit, ffmpeg, state, json_mode=json_mode)
return
if opts.media.startswith(("http://", "https://")):
raise UsageError(
"assembly clip can't fetch this URL; it cuts a local file or a "
"media-page URL yt-dlp can download (YouTube, podcasts, …).",
suggestion="Download the media first, then clip the local copy.",
)
media = Path(opts.media)
mediafile.validate_local_media(media, "clip")
_cut_and_emit(opts, media, opts.out_dir, explicit, ffmpeg, state, json_mode=json_mode)
# A media-page URL is downloaded once — the audio track by default, the full
# video with --video so the clips carry video too — and clipped locally.
with mediafile.resolve_media_source(
opts.media,
"clip",
fetch_clause="cuts a local file or a media-page URL yt-dlp can download (YouTube, podcasts, …)",
download_suggestion="Download the media first, then clip the local copy.",
video=opts.video,
download_sections=None,
json_mode=json_mode,
quiet=state.quiet,
) as (media, downloaded):
if not downloaded:
mediafile.validate_local_media(media, "clip")
# A downloaded source lives in a temp dir, so its clips land in --out-dir
# or the current directory — never next to the vanishing temp file.
out_dir: Path | None = opts.out_dir
if downloaded and out_dir is None:
out_dir = Path.cwd()
_cut_and_emit(opts, media, out_dir, explicit, ffmpeg, state, json_mode=json_mode)


def _cut_and_emit(
Expand Down
67 changes: 22 additions & 45 deletions aai_cli/commands/dub/_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,52 +340,29 @@ def run_dub(opts: DubOptions, state: AppState, *, json_mode: bool) -> None:
voice_plan = _VoicePlan(*dialogue.parse_voice_overrides(opts.voice))
youtube.validate_video_flag(opts.media, video=opts.video)
youtube.validate_sections_flag(opts.media, opts.download_sections)
if youtube.is_downloadable_url(opts.media):
# A media-page URL (YouTube, podcast page, …) is downloaded once — the
# audio track by default, the full video with --video so the dub keeps
# the picture, only the --download-sections slices when given — and
# dubbed locally. ffmpeg is checked before the download so a missing
# dependency fails before any fetch.
ffmpeg = mediafile.require_ffmpeg("write the dubbed file")
downloading = "Downloading video…" if opts.video else "Downloading audio…"
with tempfile.TemporaryDirectory(prefix="aai-dub-src-") as td:
with output.status(downloading, json_mode=json_mode, quiet=state.quiet):
local = youtube.download_media(
opts.media,
Path(td),
video=opts.video,
download_sections=opts.download_sections,
)
# The download dir is temporary, so the default output lands in the
# current directory — never next to the temp file.
out = (
opts.out
if opts.out is not None
else Path.cwd() / default_out_path(local, language).name
)
mediafile.validate_out(out, local)
_dub_and_emit(
opts, local, out, language, ffmpeg, voice_plan, state, json_mode=json_mode
)
return
if opts.media.startswith(("http://", "https://")):
raise UsageError(
"assembly dub can't fetch this URL; it dubs a local file or a "
"media-page URL yt-dlp can download (YouTube, podcasts, …).",
suggestion="Download the media first, then dub the local copy.",
)
if "://" in opts.media:
# Path() would collapse the "//" and report a corrupted echo of the URL.
raise UsageError(
f"assembly dub needs a local file, not a URL: {opts.media}",
suggestion="Download the media first, then dub the local copy.",
)
media = Path(opts.media)
mediafile.validate_local_media(media, "dub")
out = opts.out if opts.out is not None else default_out_path(media, language)
mediafile.validate_out(out, media)
# ffmpeg is checked before any (billed) download/transcription so a missing
# dependency fails before any fetch.
ffmpeg = mediafile.require_ffmpeg("write the dubbed file")
_dub_and_emit(opts, media, out, language, ffmpeg, voice_plan, state, json_mode=json_mode)
# A media-page URL is downloaded once — the audio track by default, the full
# video with --video so the dub keeps the picture, only the
# --download-sections slices when given — and dubbed locally.
with mediafile.resolve_media_source(
opts.media,
"dub",
fetch_clause="dubs a local file or a media-page URL yt-dlp can download (YouTube, podcasts, …)",
download_suggestion="Download the media first, then dub the local copy.",
video=opts.video,
download_sections=opts.download_sections,
json_mode=json_mode,
quiet=state.quiet,
) as (media, downloaded):
if not downloaded:
mediafile.validate_local_media(media, "dub")
out = mediafile.default_output(
opts.out, media, downloaded=downloaded, namer=lambda m: default_out_path(m, language)
)
mediafile.validate_out(out, media)
_dub_and_emit(opts, media, out, language, ffmpeg, voice_plan, state, json_mode=json_mode)


def _dub_and_emit(
Expand Down
16 changes: 11 additions & 5 deletions aai_cli/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

from __future__ import annotations

from aai_cli import jsonshape


class CLIError(Exception):
"""Base error carrying an exit code, a machine-readable type, and an optional
Expand All @@ -41,11 +43,15 @@ def __init__(
self.suggestion = suggestion

def to_dict(self) -> dict[str, object]:
body: dict[str, object] = {"type": self.error_type, "message": self.message}
if self.suggestion is not None:
body["suggestion"] = self.suggestion
if self.transcript_id is not None:
body["transcript_id"] = self.transcript_id
# suggestion/transcript_id are omitted entirely when unset (not null).
body = jsonshape.compact(
{
"type": self.error_type,
"message": self.message,
"suggestion": self.suggestion,
"transcript_id": self.transcript_id,
}
)
return {"error": body}


Expand Down
24 changes: 24 additions & 0 deletions aai_cli/jsonshape.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import json

from pydantic import TypeAdapter, ValidationError

_JSON_OBJECT: TypeAdapter[dict[str, object]] = TypeAdapter(dict[str, object])
Expand Down Expand Up @@ -66,3 +68,25 @@ def as_float(value: object, default: float = 0.0) -> float:
return _FLOAT.validate_python(value)
except ValidationError:
return default


def dumps(obj: object) -> str:
"""Serialize ``obj`` to a JSON string the way the whole CLI does it.

``default=str`` is the one safety the CLI relies on everywhere it emits JSON:
pydantic/SDK models and ``datetime``\\s that aren't natively serializable fall
back to ``str(...)`` instead of raising. Centralized here so every emission
path (``output``'s stdout/stderr writers, the realtime ``BaseRenderer``, the
``--out`` and ``-o json`` field renderers) shares one serialization policy.
"""
return json.dumps(obj, default=str)


def compact(mapping: dict[str, object]) -> dict[str, object]:
"""Return ``mapping`` without the keys whose value is ``None``.

For JSON payloads where an absent optional field should be omitted entirely
rather than serialized as ``null`` — the build-then-``if x is not None``
idiom repeated across the error and realtime-event payloads.
"""
return {key: value for key, value in mapping.items() if value is not None}
68 changes: 67 additions & 1 deletion aai_cli/mediafile.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@

from __future__ import annotations

import contextlib
import shutil
import subprocess
import tempfile
from collections.abc import Callable, Generator
from pathlib import Path

import assemblyai as aai

from aai_cli import client, output
from aai_cli import client, output, youtube
from aai_cli.errors import APIError, CLIError, UsageError


Expand All @@ -39,6 +42,69 @@ def validate_local_media(media: Path, command: str, *, kind: str = "audio/video"
)


@contextlib.contextmanager
def resolve_media_source(
media: str,
command: str,
*,
fetch_clause: str,
download_suggestion: str,
video: bool,
download_sections: list[str] | None,
json_mode: bool,
quiet: bool,
) -> Generator[tuple[Path, bool]]:
"""Resolve the ``media`` argument to a local file for the body of the ``with``.

The three-way source handling shared verbatim by ``caption``/``clip``/``dub``
(only the wording differs):

* a media-page URL (YouTube, …) is downloaded into a temp dir that stays open
until the block exits, yielding ``(path, True)`` — so the caller must resolve
any default output *away* from that vanishing temp dir;
* any other ``http(s)://`` URL is rejected (the API/yt-dlp can't fetch it), as
is a bare ``scheme://`` (``Path()`` would mangle the ``//`` into a corrupted
echo of the URL);
* a local path yields ``(Path(media), False)`` unvalidated — the caller checks
it with ``validate_local_media`` (the ``kind`` differs per command).

``fetch_clause`` completes "assembly <command> can't fetch this URL; it …" and
``download_suggestion`` is the shared "Download … first" hint on both rejects.
"""
if youtube.is_downloadable_url(media):
download_label = "Downloading video…" if video else "Downloading audio…"
with tempfile.TemporaryDirectory(prefix=f"aai-{command}-src-") as td:
with output.status(download_label, json_mode=json_mode, quiet=quiet):
local = youtube.download_media(
media, Path(td), video=video, download_sections=download_sections
)
yield local, True
return
if media.startswith(("http://", "https://")):
raise UsageError(
f"assembly {command} can't fetch this URL; it {fetch_clause}.",
suggestion=download_suggestion,
)
if "://" in media:
raise UsageError(
f"assembly {command} needs a local file, not a URL: {media}",
suggestion=download_suggestion,
)
yield Path(media), False


def default_output(
out: Path | None, media: Path, *, downloaded: bool, namer: Callable[[Path], Path]
) -> Path:
"""The output path for a media command: an explicit ``--out`` wins; otherwise
``namer(media)`` next to the source — or, when ``media`` is a vanishing
downloaded temp file, that same name dropped into the current directory."""
if out is not None:
return out
chosen = namer(media)
return Path.cwd() / chosen.name if downloaded else chosen


def validate_out(out: Path, media: Path) -> None:
"""An unwritable or self-overwriting output file must fail here, before the
billed transcription/translation/synthesis pipeline runs.
Expand Down
Loading
Loading