From 4b23f930f3a13bcb9e0341036e612b8bcad25f76 Mon Sep 17 00:00:00 2001 From: Anthony Ettinger Date: Sat, 27 Jun 2026 17:10:49 +0000 Subject: [PATCH 1/2] feat(pipeline): qaaas-style Claude feature-detection + media pipeline brain Port the qaaas QA feature-detection flow into makedemo and extend it for marketing demo videos: crawl the whole site, let Claude pick the demo-worthy features (structured output), screen-record each feature, let Claude write one cohesive voiceover script + a surreal/metal suno.com music prompt, synthesize the voiceover (ElevenLabs), and assemble motion graphics + recorded clips + user-uploaded clips + a ducked Suno background track into the final MP4. Designed as a drop-in for the parallel web-upload shell (feat/web-app): same job + emit(type, data) contract, exported runPipeline(job, emit). - src/pipeline/{crawl,feature-detect}: same-origin BFS crawl + Claude feature detection (claude-opus-4-8, adaptive thinking, zod structured output), with a heuristic fallback when no ANTHROPIC_API_KEY. - src/pipeline/feature-recorder: Playwright recordVideo per feature. - src/pipeline/script-writer: cohesive VO script + suno.com surreal/metal prompt. - src/pipeline/voiceover: ElevenLabs TTS (reuses existing audio generator). - src/pipeline/{timeline,graphics,music,assembly}: timeline math, ffmpeg motion graphics (animated cards + lower-thirds; optional Remotion backend), suno song looped + sidechain-ducked under the voiceover, final ffmpeg assembly. - 13 unit tests for the pure pieces (timeline, ducking filter, URL globs). - docs/PIPELINE.md documents the architecture + web integration contract. Co-Authored-By: Claude Opus 4.8 --- .env.example | 13 ++- docs/PIPELINE.md | 123 ++++++++++++++++++++++++++ package.json | 11 ++- scripts/run-pipeline.js | 55 ++++++++++++ src/pipeline/assembly.js | 146 +++++++++++++++++++++++++++++++ src/pipeline/crawl.js | 115 ++++++++++++++++++++++++ src/pipeline/feature-detect.js | 61 +++++++++++++ src/pipeline/feature-recorder.js | 85 ++++++++++++++++++ src/pipeline/ffmpeg.js | 49 +++++++++++ src/pipeline/graphics.js | 140 +++++++++++++++++++++++++++++ src/pipeline/index.js | 120 +++++++++++++++++++++++++ src/pipeline/llm.js | 85 ++++++++++++++++++ src/pipeline/music.js | 60 +++++++++++++ src/pipeline/schemas.js | 58 ++++++++++++ src/pipeline/script-writer.js | 104 ++++++++++++++++++++++ src/pipeline/timeline.js | 35 ++++++++ src/pipeline/url-utils.js | 33 +++++++ src/pipeline/voiceover.js | 41 +++++++++ test/pipeline/crawl.test.js | 26 ++++++ test/pipeline/music.test.js | 30 +++++++ test/pipeline/timeline.test.js | 40 +++++++++ 21 files changed, 1427 insertions(+), 3 deletions(-) create mode 100644 docs/PIPELINE.md create mode 100644 scripts/run-pipeline.js create mode 100644 src/pipeline/assembly.js create mode 100644 src/pipeline/crawl.js create mode 100644 src/pipeline/feature-detect.js create mode 100644 src/pipeline/feature-recorder.js create mode 100644 src/pipeline/ffmpeg.js create mode 100644 src/pipeline/graphics.js create mode 100644 src/pipeline/index.js create mode 100644 src/pipeline/llm.js create mode 100644 src/pipeline/music.js create mode 100644 src/pipeline/schemas.js create mode 100644 src/pipeline/script-writer.js create mode 100644 src/pipeline/timeline.js create mode 100644 src/pipeline/url-utils.js create mode 100644 src/pipeline/voiceover.js create mode 100644 test/pipeline/crawl.test.js create mode 100644 test/pipeline/music.test.js create mode 100644 test/pipeline/timeline.test.js diff --git a/.env.example b/.env.example index dcf63bc..b4e4cce 100644 --- a/.env.example +++ b/.env.example @@ -1,11 +1,20 @@ -# OpenAI Configuration +# Anthropic / Claude (pipeline brain: feature detection + script writing) +# When set, the pipeline runs the "smart" path; unset falls back to heuristics. +ANTHROPIC_API_KEY=your_anthropic_api_key_here +ANTHROPIC_MODEL=claude-opus-4-8 + +# OpenAI Configuration (legacy CLI path) OPENAI_API_KEY=your_openai_api_key_here OPENAI_MODEL=gpt-4-turbo-preview -# ElevenLabs Configuration +# ElevenLabs Configuration (voiceover synthesis) ELEVENLABS_API_KEY=your_elevenlabs_api_key_here ELEVENLABS_VOICE_ID=your_preferred_voice_id_here +# Motion graphics: set to 1 to use the Remotion backend (requires the +# optional @remotion/* deps + graphics/ project). Default uses the ffmpeg backend. +MKDEMO_REMOTION=0 + # Puppeteer Configuration PUPPETEER_HEADLESS=true PUPPETEER_VIEWPORT_WIDTH=1920 diff --git a/docs/PIPELINE.md b/docs/PIPELINE.md new file mode 100644 index 0000000..4ef45f7 --- /dev/null +++ b/docs/PIPELINE.md @@ -0,0 +1,123 @@ +# makedemo pipeline brain (Claude feature-detection + media pipeline) + +This document describes the `src/pipeline/` module: a qaaas-style flow that turns +a URL into a finished, scored-for-vibes demo video — animated motion graphics, +recorded feature clips, user-uploaded B-roll, a Claude-written voiceover, and a +ducked surreal/metal background track from a suno.com song. + +It is designed as a **drop-in for the web upload UI** (built in parallel on the +`feat/web-app` branch): it speaks the same `job` / `emit(type, data)` contract. + +## Why this exists + +The original makedemo planned interactions on a **single page** with OpenAI and +narrated each click. This brain borrows the qaaas QA flow instead: + +> crawl the whole site → let Claude pick the real features → act on each feature +> → let Claude write the script → synthesize → assemble. + +…and extends it for marketing video: real screen-recorded clips, uploaded +clips, motion graphics, one cohesive voiceover, and music. + +## Pipeline stages + +| Stage | Module | What it does | +|---|---|---| +| 1. discover | `crawl.js` + `feature-detect.js` | Same-origin BFS crawl (optionally logged in), then Claude picks the demo-worthy features (name, pitch, start URL, concrete steps). Structured output via zod. Heuristic fallback with no key. | +| 2. record | `feature-recorder.js` | Playwright `recordVideo` per feature, performing the feature's steps, → one `.webm` clip each. | +| 3. script | `script-writer.js` | Claude writes ONE cohesive timeline (intro → features/clips → outro) with on-screen titles, captions, spoken narration, **and a ready-to-paste suno.com surreal/metal music prompt**. | +| 4. voiceover | `voiceover.js` | ElevenLabs TTS per segment (one consistent voice), silent beds for B-roll. | +| 5. assemble | `timeline.js` + `graphics.js` + `music.js` + `assembly.js` | Build a timeline from voiceover durations; render animated cards + lower-thirds; concat segment videos; build the continuous voiceover; **duck the uploaded song under it**; mux → `demo.mp4`. | + +Orchestrated by `index.js#runPipeline(job, emit)`. + +## The job + event contract (drop-in for the web shell) + +```js +// job (superset of the existing web job) +{ + id, url, + credentials: { user, password } | null, + maxFeatures: number, // default 5 + voice: string | null, // ElevenLabs voice id + clips: string[], // paths to user-uploaded video clips + song: string | null, // path to an uploaded suno.com song clip +} + +// emit(type, data) — same vocabulary as web/lib/jobs.js +// 'stage' { stage, status, step?, total? } stage ∈ discover|record|script|voiceover|assemble +// 'log' { level, msg } +// 'script' { features? , title?, segments?, sunoPrompt? } +// 'video' { video } +// 'done' { video, features, sunoPrompt } +// 'error' { message } +``` + +The web shell can swap its `web/lib/pipeline.js` import for: + +```js +import { runPipeline, OUTPUT_ROOT } from '../../src/pipeline/index.js'; +// in runPipeline(job) inside jobs.js: +await runPipeline(job, (type, data) => emit(job, type, data)); +``` + +Outputs land in `output//`: `demo.mp4`, `transcript.txt`, `suno-prompt.txt`, +per-feature `feature-NN.webm`, per-segment voiceover, and a `work/` scratch dir. + +## Music: the suno.com flow + +1. The script writer emits a **surreal/metal Suno prompt** (saved to + `suno-prompt.txt` and emitted on the `script` event) — the user pastes it into + suno.com and downloads a clip. +2. The uploaded clip is passed as `job.song`. `music.js` loops it to cover the + whole video and **sidechain-compresses it against the voiceover** so the bed + ducks under narration and swells in the gaps. +3. No song → voiceover-only audio (still a complete video). + +## Motion graphics + +`graphics.js` has two backends: + +- **ffmpeg (default):** animated `mandelbrot` background (surreal, zero-asset), + fading kinetic title/subtitle, and slide-in lower-thirds burned onto clips. +- **Remotion (opt-in, `MKDEMO_REMOTION=1`):** full React motion graphics via + `@remotion/renderer` + a `graphics/` composition. Falls back to ffmpeg if the + optional deps/project aren't present. + +## Smart path vs fallback + +Every external dependency degrades gracefully, matching makedemo's existing +style: + +| Missing | Behavior | +|---|---| +| `ANTHROPIC_API_KEY` | Heuristic feature list + deterministic script (no Claude). | +| `ELEVENLABS_API_KEY` | Silent timed voiceover (timing preserved). | +| `job.song` | Voiceover-only audio. | +| `@remotion/*` | ffmpeg motion-graphics backend. | + +## Running locally + +```bash +pnpm install +npx playwright install chromium # browsers for crawl + recording +node scripts/run-pipeline.js --url https://example.com \ + --clips ./a.mp4,./b.mp4 --song ./suno.mp3 --max-features 5 +``` + +Unit tests for the pure pieces (timeline math, music filter, URL globs): + +```bash +pnpm run test:pipeline +``` + +## Status / what's wired vs. pending + +- **Wired & unit-tested:** timeline math, ducking-filter construction, crawl + URL globbing. +- **Wired (needs keys/browsers/ffmpeg to run end-to-end):** crawl, Claude + feature detection + script, Playwright recording, ElevenLabs voiceover, + ffmpeg assembly with ducked music. +- **Scaffolded:** Remotion backend (ffmpeg backend is the default and fully + functional); a Claude action-loop for richer in-clip interactions (current + recorder uses a light step interpreter) is the next upgrade. diff --git a/package.json b/package.json index 1399a88..3de2a03 100644 --- a/package.json +++ b/package.json @@ -10,7 +10,9 @@ "scripts": { "start": "node src/index.js", "setup": "node scripts/setup.js", + "pipeline": "node scripts/run-pipeline.js", "test": "NODE_OPTIONS='--loader=./node_modules/mocha/lib/nodejs/esm-utils.js' mocha test/**/*.test.js --recursive", + "test:pipeline": "NODE_OPTIONS='--loader=./node_modules/mocha/lib/nodejs/esm-utils.js' mocha test/pipeline/**/*.test.js --recursive", "test:watch": "NODE_OPTIONS='--loader=./node_modules/mocha/lib/nodejs/esm-utils.js' mocha test/**/*.test.js --recursive --watch", "test:coverage": "NODE_OPTIONS='--loader=./node_modules/mocha/lib/nodejs/esm-utils.js' c8 mocha test/**/*.test.js --recursive", "lint": "eslint src/ test/", @@ -38,6 +40,7 @@ "node": ">=20.0.0" }, "dependencies": { + "@anthropic-ai/sdk": "^0.106.0", "chalk": "^5.3.0", "commander": "^11.1.0", "dotenv": "^16.4.1", @@ -47,8 +50,14 @@ "node-fetch": "^3.3.2", "openai": "^4.28.4", "ora": "^8.0.1", + "playwright": "^1.61.0", "puppeteer": "^21.11.0", - "winston": "^3.11.0" + "winston": "^3.11.0", + "zod": "^4.0.0" + }, + "optionalDependencies": { + "@remotion/bundler": "^4.0.0", + "@remotion/renderer": "^4.0.0" }, "devDependencies": { "@eslint/js": "^8.57.0", diff --git a/scripts/run-pipeline.js b/scripts/run-pipeline.js new file mode 100644 index 0000000..38e229b --- /dev/null +++ b/scripts/run-pipeline.js @@ -0,0 +1,55 @@ +#!/usr/bin/env node +import 'dotenv/config'; +import { randomUUID } from 'node:crypto'; +import { runPipeline } from '../src/pipeline/index.js'; + +/** + * Local CLI runner for the makedemo pipeline brain — useful for testing the + * full crawl -> Claude features -> record -> script -> voiceover -> assemble + * flow without the web shell. The web app drives runPipeline(job, emit) the + * same way. + * + * node scripts/run-pipeline.js --url https://example.com \ + * --clips ./a.mp4,./b.mp4 --song ./suno.mp3 --max-features 5 + */ +function parseArgs(argv) { + const out = {}; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a.startsWith('--')) { + const key = a.slice(2); + const val = argv[i + 1] && !argv[i + 1].startsWith('--') ? argv[++i] : 'true'; + out[key] = val; + } + } + return out; +} + +const args = parseArgs(process.argv.slice(2)); +if (!args.url) { + console.error('Usage: node scripts/run-pipeline.js --url [--user --password

] [--clips a.mp4,b.mp4] [--song suno.mp3] [--max-features 5] [--voice ]'); + process.exit(1); +} + +const job = { + id: randomUUID().slice(0, 8), + url: args.url, + credentials: args.user && args.password ? { user: args.user, password: args.password } : null, + maxFeatures: args['max-features'] ? Number(args['max-features']) : 5, + voice: args.voice || null, + clips: args.clips ? args.clips.split(',').map((s) => s.trim()).filter(Boolean) : [], + song: args.song || null, +}; + +const emit = (type, data) => { + if (type === 'log') console.log(` ${data.msg}`); + else if (type === 'stage') console.log(`[${data.stage}] ${data.status}${data.step ? ` ${data.step}/${data.total}` : ''}`); + else if (type === 'done') console.log(`\n✅ Done: output/${job.id}/${data.video}`); + else if (type === 'error') console.error(`❌ ${data.message}`); +}; + +console.log(`Job ${job.id} → ${job.url}`); +runPipeline(job, emit).catch((err) => { + console.error('Pipeline failed:', err); + process.exit(1); +}); diff --git a/src/pipeline/assembly.js b/src/pipeline/assembly.js new file mode 100644 index 0000000..3967817 --- /dev/null +++ b/src/pipeline/assembly.js @@ -0,0 +1,146 @@ +import path from 'node:path'; +import fs from 'node:fs/promises'; +import { runFfmpeg } from './ffmpeg.js'; +import { renderCard, overlayLowerThird } from './graphics.js'; +import { mixMusicUnderVoice, validateSong } from './music.js'; + +const W = 1920; +const H = 1080; +const FPS = 30; +const PAD_COLOR = '0x0d0c0f'; + +/** + * Assemble the final MP4 from a built timeline. + * + * Video and audio are built as two independent tracks, then muxed: + * - VIDEO: each segment -> a normalized, silent 1080p/30fps clip (animated + * card for intro/outro; recorded feature webm or uploaded clip otherwise, + * with an animated lower-third burned on). Concatenated in order. + * - AUDIO: each segment's voiceover, padded to the segment's exact duration, + * concatenated into one continuous voice track, then (if a suno.com song + * was uploaded) ducked under the looped music bed. + * + * @param {object} opts + * @param {Array} opts.timeline built timeline (see timeline.js) + * @param {Array<{path,duration}>} opts.audios per-segment voiceover (aligned) + * @param {Array<{path}>} opts.featureClips recorded feature clips (mp4/webm on disk) + * @param {string[]} opts.uploadedClips user-uploaded clip paths + * @param {string|null} opts.song uploaded suno.com song path + * @param {string} opts.workDir + * @param {string} opts.outPath + * @param {(m:string)=>void} [opts.log] + */ +export async function assembleVideo(opts) { + const { timeline, audios, featureClips, uploadedClips, song, workDir, outPath, log } = opts; + await fs.mkdir(workDir, { recursive: true }); + + // 1. Build one normalized, silent video clip per segment. + const segVideos = []; + for (const seg of timeline) { + const out = path.join(workDir, `seg-${String(seg.index).padStart(2, '0')}.mp4`); + log?.(`Rendering segment ${seg.index + 1}/${timeline.length} (${seg.kind})`); + + if (seg.kind === 'intro' || seg.kind === 'outro') { + await renderCard({ title: seg.title, subtitle: seg.caption, duration: seg.duration, outPath: out }); + } else { + const src = sourceForSegment(seg, featureClips, uploadedClips); + if (!src) { + // Missing source — render a titled card so the timeline stays intact. + await renderCard({ title: seg.title || '', subtitle: seg.caption || '', duration: seg.duration, outPath: out }); + } else { + const normalized = path.join(workDir, `norm-${String(seg.index).padStart(2, '0')}.mp4`); + await normalizeClip(src, seg.duration, normalized); + await overlayLowerThird(normalized, { title: seg.title, caption: seg.caption, outPath: out }); + } + } + segVideos.push(out); + } + + // 2. Concat the segment videos (identical params -> stream copy). + const videoTrack = path.join(workDir, 'video.mp4'); + await concatCopy(segVideos, path.join(workDir, 'video-list.txt'), videoTrack); + + // 3. Build the continuous voiceover track (pad each segment to its duration). + const paddedVo = []; + for (const seg of timeline) { + const vo = audios[seg.index]; + const out = path.join(workDir, `voa-${String(seg.index).padStart(2, '0')}.m4a`); + await padAudioToDuration(vo.path, seg.duration, out); + paddedVo.push(out); + } + const voiceTrack = path.join(workDir, 'voice.m4a'); + await concatAudioFilter(paddedVo, voiceTrack); + + // 4. Mix in the ducked music bed if a song was uploaded. + let audioTrack = voiceTrack; + if (song && (await validateSong(song))) { + log?.('Ducking suno.com song under the voiceover'); + audioTrack = path.join(workDir, 'final-audio.m4a'); + await mixMusicUnderVoice(voiceTrack, song, audioTrack); + } else if (song) { + log?.('Uploaded song unreadable — continuing with voiceover only'); + } + + // 5. Mux video + audio. + await runFfmpeg([ + '-i', videoTrack, + '-i', audioTrack, + '-map', '0:v:0', '-map', '1:a:0', + '-c:v', 'copy', '-c:a', 'aac', '-b:a', '192k', + '-shortest', '-movflags', '+faststart', + outPath, + ]); + log?.(`Final video written: ${outPath}`); + return outPath; +} + +function sourceForSegment(seg, featureClips, uploadedClips) { + if (seg.kind === 'feature' && seg.featureIndex != null) return featureClips[seg.featureIndex]?.path; + if (seg.kind === 'clip' && seg.clipIndex != null) return uploadedClips[seg.clipIndex]; + return null; +} + +// Scale + letterbox a source to 1080p/30fps, looping short clips and trimming to +// an exact duration. Output is silent so audio can be assembled independently. +async function normalizeClip(src, duration, outPath) { + const vf = `scale=${W}:${H}:force_original_aspect_ratio=decrease,pad=${W}:${H}:(ow-iw)/2:(oh-ih)/2:color=${PAD_COLOR},fps=${FPS},format=yuv420p`; + await runFfmpeg([ + '-stream_loop', '-1', '-i', src, + '-t', String(duration), + '-vf', vf, + '-an', + '-c:v', 'libx264', '-preset', 'veryfast', '-pix_fmt', 'yuv420p', + outPath, + ]); +} + +async function concatCopy(files, listFile, outPath) { + await fs.writeFile(listFile, files.map((f) => `file '${f}'`).join('\n'), 'utf8'); + await runFfmpeg(['-f', 'concat', '-safe', '0', '-i', listFile, '-c', 'copy', outPath]); +} + +// Pad (or trim) an audio file to an exact duration; normalize to stereo/44.1k AAC. +async function padAudioToDuration(audioPath, duration, outPath) { + await runFfmpeg([ + '-i', audioPath, + '-af', 'apad', + '-t', String(duration), + '-ar', '44100', '-ac', '2', + '-c:a', 'aac', '-b:a', '192k', + outPath, + ]); +} + +// Concatenate audio segments with the concat filter (robust across containers). +async function concatAudioFilter(files, outPath) { + const inputs = files.flatMap((f) => ['-i', f]); + const filter = files.map((_, i) => `[${i}:a]`).join('') + `concat=n=${files.length}:v=0:a=1[a]`; + await runFfmpeg([ + ...inputs, + '-filter_complex', filter, + '-map', '[a]', + '-ar', '44100', '-ac', '2', + '-c:a', 'aac', '-b:a', '192k', + outPath, + ]); +} diff --git a/src/pipeline/crawl.js b/src/pipeline/crawl.js new file mode 100644 index 0000000..163063f --- /dev/null +++ b/src/pipeline/crawl.js @@ -0,0 +1,115 @@ +import { chromium } from 'playwright'; +import { globToRegExp, isExcluded, normalize } from './url-utils.js'; + +/** + * Same-origin site crawler (ported/adapted from the qaaas discovery flow). + * + * BFS over links from the homepage, capturing each page's URL, title, + * and a snippet of visible text. Optional credential login runs first so the + * crawl can reach authenticated app surfaces. The crawled pages feed Claude in + * feature-detect.js. + */ + +export { globToRegExp, isExcluded, normalize }; + +// Best-effort heuristic login using Playwright (the existing auth handler is +// Puppeteer-bound; here we keep the crawler self-contained on Playwright). +async function tryLogin(page, { loginUrl, homepageUrl, user, password }, log) { + try { + await page.goto(loginUrl || homepageUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }).catch(() => {}); + await page.waitForLoadState('networkidle', { timeout: 6000 }).catch(() => {}); + + const emailSel = 'input[type="email"], input[name*="email" i], input[name*="user" i], input[id*="email" i]'; + const passSel = 'input[type="password"]'; + const email = page.locator(emailSel).first(); + const pass = page.locator(passSel).first(); + + if (await email.count()) await email.fill(user, { timeout: 4000 }).catch(() => {}); + if (await pass.count()) await pass.fill(password, { timeout: 4000 }).catch(() => {}); + + const submit = page + .locator('button[type="submit"], input[type="submit"], button:has-text("Log in"), button:has-text("Sign in")') + .first(); + if (await submit.count()) await submit.click({ timeout: 4000 }).catch(() => {}); + else await pass.press('Enter').catch(() => {}); + + await page.waitForLoadState('networkidle', { timeout: 8000 }).catch(() => {}); + log?.(`Attempted login as ${user}`); + } catch (err) { + log?.(`Login skipped/failed: ${String(err).slice(0, 120)}`); + } +} + +/** + * @param {object} opts + * @param {string} opts.homepageUrl + * @param {number} [opts.maxPages] + * @param {{user,password,loginUrl?}|null} [opts.credentials] + * @param {string[]} [opts.excludedPaths] + * @param {(msg:string)=>void} [opts.log] + * @returns {Promise>} + */ +export async function crawlSite(opts) { + const maxPages = Math.min(opts.maxPages || 20, 40); + const excludePatterns = (opts.excludedPaths ?? []).map((p) => p.trim()).filter(Boolean).map(globToRegExp); + const log = opts.log; + + const browser = await chromium.launch({ headless: true }); + const context = await browser.newContext({ viewport: { width: 1280, height: 720 } }); + try { + const page = await context.newPage(); + const origin = new URL(opts.homepageUrl).origin; + const deadline = Date.now() + 5 * 60 * 1000; + + if (opts.credentials?.user && opts.credentials?.password) { + await tryLogin(page, { ...opts.credentials, homepageUrl: opts.homepageUrl }, log); + } + + const visited = new Set(); + const queue = [normalize(opts.homepageUrl)]; + const pages = []; + + while (queue.length && pages.length < maxPages && Date.now() < deadline) { + const url = queue.shift(); + if (visited.has(url)) continue; + visited.add(url); + if (isExcluded(url, excludePatterns)) continue; + + try { + await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 20000 }); + await page.waitForLoadState('networkidle', { timeout: 5000 }).catch(() => {}); + + const title = await page.title(); + const text = (await page.locator('body').innerText({ timeout: 4000 }).catch(() => '')).slice(0, 1500); + pages.push({ url, title, text }); + log?.(`Crawled ${title || url}`); + + const links = await page.evaluate( + (org) => + Array.from(document.querySelectorAll('a[href]')) + .map((a) => a.href) + .filter((h) => { + try { + return new URL(h).origin === org; + } catch { + return false; + } + }), + origin, + ); + + for (const link of links.map(normalize)) { + if (!visited.has(link) && !queue.includes(link) && !isExcluded(link, excludePatterns) && queue.length < maxPages * 3) { + queue.push(link); + } + } + } catch (err) { + log?.(`Skipped ${url}: ${String(err).slice(0, 120)}`); + } + } + + return pages; + } finally { + await browser.close(); + } +} diff --git a/src/pipeline/feature-detect.js b/src/pipeline/feature-detect.js new file mode 100644 index 0000000..6303b11 --- /dev/null +++ b/src/pipeline/feature-detect.js @@ -0,0 +1,61 @@ +import { parseStructured } from './llm.js'; +import { featureDetectionSchema } from './schemas.js'; + +/** + * Turn a crawl into a ranked list of demo-worthy features (Claude). + * + * This is the makedemo analog of qaaas's "identify testable features" step — + * but framed for marketing: pick the features that best sell the product, give + * each a punchy name, a one-line pitch, and concrete on-page steps to perform + * while recording. + * + * Falls back to a heuristic (homepage + a few distinct crawled pages) when no + * ANTHROPIC_API_KEY is configured. + */ +export async function detectFeatures({ homepageUrl, pages, maxFeatures = 5, log }) { + log?.(`Analyzing ${pages.length} page(s) to pick demo-worthy features`); + + const result = await parseStructured({ + system: + 'You are a senior product marketer making a punchy demo video. Given a crawl of a website (each page has a URL, title, and a snippet of visible text), pick the distinct, user-facing FEATURES that best showcase the product (e.g. a core workflow, a create/edit flow, search, a dashboard, checkout). For each feature give: a short punchy name, the best feature URL to start from (chosen from the crawled URLs), a one-sentence pitch a viewer would care about, and 2-5 concrete on-page actions to perform while recording it. Order them into a compelling narrative (hook first, payoff last). Skip legal/utility pages (privacy, terms, 404, login) and avoid duplicates.', + prompt: `Homepage: ${homepageUrl}\nWant about ${maxFeatures} features.\n\nCrawled pages:\n${pages + .map((p) => `- ${p.url}\n title: ${p.title}\n text: ${p.text.slice(0, 300)}`) + .join('\n')}`, + schema: featureDetectionSchema, + maxTokens: 8000, + }); + + let features = result?.features ?? []; + if (features.length === 0) { + features = heuristicFeatures(homepageUrl, pages, maxFeatures); + log?.('No LLM features — using heuristic feature list'); + } + + features = features.slice(0, maxFeatures); + log?.(`Selected ${features.length} feature(s)`); + return features; +} + +function heuristicFeatures(homepageUrl, pages, maxFeatures) { + const skip = /privacy|terms|login|signin|sign-in|404|cookie/i; + const picked = []; + const seenPaths = new Set(); + for (const p of [{ url: homepageUrl, title: 'Home', text: '' }, ...pages]) { + let path; + try { + path = new URL(p.url).pathname; + } catch { + continue; + } + if (seenPaths.has(path) || skip.test(p.url) || skip.test(p.title || '')) continue; + seenPaths.add(path); + picked.push({ + name: (p.title || path || 'Feature').slice(0, 40), + featureUrl: p.url, + pitch: `A look at ${p.title || path}.`, + steps: ['Wait for the page to load', 'Scroll through the main content'], + }); + if (picked.length >= maxFeatures) break; + } + return picked; +} diff --git a/src/pipeline/feature-recorder.js b/src/pipeline/feature-recorder.js new file mode 100644 index 0000000..613feed --- /dev/null +++ b/src/pipeline/feature-recorder.js @@ -0,0 +1,85 @@ +import { chromium } from 'playwright'; +import { mkdtemp, readFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +/** + * Record a short screen-capture clip of one feature (Playwright recordVideo). + * + * Ported from qaaas's browser-explorer: navigate to the feature URL, record + * video while performing the feature's steps, then flush the .webm to disk and + * return its bytes (the assembly stage transcodes/trims to the timeline). + * + * Steps are plain English; we run a light best-effort interpreter (click text + * matches, scroll, wait) so the recording shows motion. A full Claude-driven + * action loop (like qaaas browser-agent) is the documented upgrade path. + */ +export async function recordFeature(feature, { credentials, log } = {}) { + const videoDir = await mkdtemp(join(tmpdir(), 'mkdemo-feat-')); + const browser = await chromium.launch({ headless: true }); + const context = await browser.newContext({ + viewport: { width: 1280, height: 720 }, + recordVideo: { dir: videoDir, size: { width: 1280, height: 720 } }, + }); + + try { + const page = await context.newPage(); + await page.goto(feature.featureUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }).catch(() => {}); + await page.waitForLoadState('networkidle', { timeout: 8000 }).catch(() => {}); + + for (const step of feature.steps || []) { + await runStep(page, step).catch(() => {}); + await page.waitForTimeout(1200); + } + // A beat at the end so the clip doesn't cut on the last action. + await page.waitForTimeout(1000); + + const video = page.video(); + await context.close(); // flushes the recorded video to disk + + let webm; + if (video) { + const p = await video.path().catch(() => null); + if (p) webm = await readFile(p).catch(() => undefined); + } + log?.(`Recorded "${feature.name}" (${webm ? webm.length : 0} bytes)`); + return { name: feature.name, webm }; + } finally { + await browser.close(); + await rm(videoDir, { recursive: true, force: true }).catch(() => {}); + } +} + +// Best-effort single-step interpreter. Tries to click an element whose visible +// text overlaps the step's words; otherwise scrolls the viewport. +async function runStep(page, step) { + const lower = String(step).toLowerCase(); + + if (/scroll/.test(lower)) { + await page.mouse.wheel(0, 600); + return; + } + if (/wait|load/.test(lower)) { + await page.waitForTimeout(800); + return; + } + + // Pull candidate phrases (quoted text or capitalized words) from the step. + const quoted = [...lower.matchAll(/"([^"]+)"|'([^']+)'/g)].map((m) => m[1] || m[2]); + const words = lower.replace(/[^a-z0-9 ]/g, ' ').split(/\s+/).filter((w) => w.length > 3); + const phrases = [...quoted, ...words]; + + for (const phrase of phrases) { + const el = page.getByText(new RegExp(phrase, 'i')).first(); + if (await el.count().catch(() => 0)) { + const box = await el.boundingBox().catch(() => null); + if (box) { + await el.scrollIntoViewIfNeeded({ timeout: 2000 }).catch(() => {}); + await el.click({ timeout: 2500 }).catch(() => {}); + return; + } + } + } + // Nothing matched — give the viewer a gentle scroll. + await page.mouse.wheel(0, 400); +} diff --git a/src/pipeline/ffmpeg.js b/src/pipeline/ffmpeg.js new file mode 100644 index 0000000..f279379 --- /dev/null +++ b/src/pipeline/ffmpeg.js @@ -0,0 +1,49 @@ +import { spawn } from 'node:child_process'; + +/** Run ffmpeg with the given args; resolves on success, rejects on non-zero. */ +export function runFfmpeg(args, onLog) { + return new Promise((resolve, reject) => { + const proc = spawn('ffmpeg', ['-y', '-hide_banner', '-loglevel', 'error', ...args]); + let stderr = ''; + proc.stderr.on('data', (chunk) => { + const text = chunk.toString(); + stderr += text; + onLog?.(text.trim()); + }); + proc.on('error', (err) => reject(new Error(`ffmpeg failed to start: ${err.message}`))); + proc.on('close', (code) => { + if (code === 0) resolve(); + else reject(new Error(`ffmpeg exited ${code}: ${stderr.slice(-500)}`)); + }); + }); +} + +/** Probe a media file's duration in seconds (ffprobe). */ +export function probeDuration(file) { + return new Promise((resolve, reject) => { + const proc = spawn('ffprobe', [ + '-v', 'error', + '-show_entries', 'format=duration', + '-of', 'default=noprint_wrappers=1:nokey=1', + file, + ]); + let out = ''; + proc.stdout.on('data', (c) => (out += c.toString())); + proc.on('error', reject); + proc.on('close', () => { + const d = parseFloat(out.trim()); + Number.isFinite(d) ? resolve(d) : reject(new Error('bad duration')); + }); + }); +} + +/** Generate a silent stereo AAC track of a given duration (timing fallback). */ +export async function generateSilentAudio(outputPath, durationSec) { + await runFfmpeg([ + '-f', 'lavfi', + '-i', 'anullsrc=channel_layout=stereo:sample_rate=44100', + '-t', String(Math.max(1, durationSec)), + '-c:a', 'aac', '-b:a', '128k', + outputPath, + ]); +} diff --git a/src/pipeline/graphics.js b/src/pipeline/graphics.js new file mode 100644 index 0000000..40a5fe1 --- /dev/null +++ b/src/pipeline/graphics.js @@ -0,0 +1,140 @@ +import path from 'node:path'; +import { existsSync } from 'node:fs'; +import { runFfmpeg } from './ffmpeg.js'; + +/** + * Motion graphics for the demo: animated title/outro cards and lower-third + * overlays burned onto feature/clip segments. + * + * Two backends: + * - Remotion (full React motion graphics) when MKDEMO_REMOTION=1 and the + * graphics/ project + @remotion/renderer are installed — see renderCardRemotion. + * - An ffmpeg backend (default) that animates a surreal mandelbrot background + * with fading kinetic type. Zero extra toolchain, always runnable. + */ + +const WIDTH = 1920; +const HEIGHT = 1080; +const FPS = 30; +const FONT = findFont(); + +function findFont() { + // Common Linux font; drawtext falls back gracefully if absent. + const candidates = [ + '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', + '/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf', + '/System/Library/Fonts/Supplemental/Arial Bold.ttf', + ]; + return candidates[0]; // ffmpeg ignores a missing file only if fontfile omitted; see drawtextFont() +} + +function escapeDrawtext(text) { + return String(text || '') + .replace(/\\/g, '\\\\') + .replace(/:/g, '\\:') + .replace(/'/g, "’") + .replace(/%/g, '\\%'); +} + +function drawtextFont() { + // Only pass fontfile if it exists; otherwise let ffmpeg use its default. + return existsSync(FONT) ? `fontfile='${FONT}':` : ''; +} + +/** + * Render an animated intro/outro card to MP4. + * @param {object} opts + * @param {'intro'|'outro'} opts.kind + * @param {string} opts.title + * @param {string} opts.subtitle + * @param {number} opts.duration seconds + * @param {string} opts.outPath + * @param {string} [opts.audioPath] optional narration to mux in + */ +export async function renderCard({ title, subtitle, duration, outPath, audioPath }) { + const ff = drawtextFont(); + const d = Math.max(2, duration); + + // Surreal animated background: a slowly-zooming mandelbrot, desaturated and + // darkened so white type pops. Title fades in + drifts up; subtitle follows. + const vf = [ + `format=yuv420p`, + `eq=saturation=0.6:brightness=-0.25:contrast=1.1`, + `drawtext=${ff}text='${escapeDrawtext(title)}':fontcolor=white:fontsize=96:x=(w-text_w)/2:y=(h-text_h)/2-60-20*sin(t):alpha='min(1,t/0.8)':box=1:boxcolor=black@0.35:boxborderw=24`, + `drawtext=${ff}text='${escapeDrawtext(subtitle || '')}':fontcolor=0xC0C0FF:fontsize=44:x=(w-text_w)/2:y=(h-text_h)/2+70:alpha='min(1,max(0,(t-0.5)/0.8))'`, + ].join(','); + + const args = [ + '-f', 'lavfi', '-i', `mandelbrot=size=${WIDTH}x${HEIGHT}:rate=${FPS}`, + ]; + if (audioPath) args.push('-i', audioPath); + args.push( + '-t', String(d), + '-vf', vf, + '-r', String(FPS), + '-c:v', 'libx264', '-preset', 'veryfast', '-pix_fmt', 'yuv420p', + ); + if (audioPath) args.push('-c:a', 'aac', '-b:a', '192k', '-shortest'); + else args.push('-an'); + args.push(outPath); + + if (process.env.MKDEMO_REMOTION === '1') { + const ok = await renderCardRemotion({ title, subtitle, duration: d, outPath, audioPath }).catch(() => false); + if (ok) return outPath; + } + await runFfmpeg(args); + return outPath; +} + +/** + * Burn an animated lower-third (title + caption) onto an existing video clip. + * @param {string} inputVideo + * @param {object} opts { title, caption, outPath } + */ +export async function overlayLowerThird(inputVideo, { title, caption, outPath }) { + if (!title && !caption) { + // Nothing to overlay — just normalize the clip. + await runFfmpeg(['-i', inputVideo, '-c', 'copy', outPath]).catch(async () => { + await runFfmpeg(['-i', inputVideo, '-c:v', 'libx264', '-preset', 'veryfast', '-pix_fmt', 'yuv420p', outPath]); + }); + return outPath; + } + const ff = drawtextFont(); + // Slide-in lower-third bar that holds for ~4s then fades. + const vf = [ + `format=yuv420p`, + `drawbox=x=80:y=h-220:w=900:h=120:color=black@0.45:t=fill:enable='lt(t,5)'`, + `drawtext=${ff}text='${escapeDrawtext(title)}':fontcolor=white:fontsize=52:x=110:y=h-200:enable='lt(t,5)':alpha='min(1,t/0.5)'`, + `drawtext=${ff}text='${escapeDrawtext(caption || '')}':fontcolor=0xC0C0FF:fontsize=34:x=110:y=h-140:enable='lt(t,5)':alpha='min(1,t/0.5)'`, + ].join(','); + await runFfmpeg([ + '-i', inputVideo, + '-vf', vf, + '-c:v', 'libx264', '-preset', 'veryfast', '-pix_fmt', 'yuv420p', + '-c:a', 'copy', + outPath, + ]).catch(async () => { + // Some clips have no audio stream; retry without copying audio. + await runFfmpeg(['-i', inputVideo, '-vf', vf, '-c:v', 'libx264', '-preset', 'veryfast', '-pix_fmt', 'yuv420p', '-an', outPath]); + }); + return outPath; +} + +/** + * Optional Remotion backend. Expects a bundled composition at graphics/ and + * @remotion/renderer installed. Returns true on success, false to fall back. + */ +async function renderCardRemotion({ title, subtitle, duration, outPath }) { + try { + const { bundle } = await import('@remotion/bundler'); + const { renderMedia, selectComposition } = await import('@remotion/renderer'); + const entry = path.resolve('graphics/src/index.jsx'); + const serveUrl = await bundle({ entryPoint: entry }); + const inputProps = { title, subtitle, durationInFrames: Math.round(duration * FPS) }; + const composition = await selectComposition({ serveUrl, id: 'Card', inputProps }); + await renderMedia({ composition, serveUrl, codec: 'h264', outputLocation: outPath, inputProps }); + return true; + } catch { + return false; + } +} diff --git a/src/pipeline/index.js b/src/pipeline/index.js new file mode 100644 index 0000000..29a71cb --- /dev/null +++ b/src/pipeline/index.js @@ -0,0 +1,120 @@ +import path from 'node:path'; +import fs from 'node:fs/promises'; +import { fileURLToPath } from 'node:url'; +import { crawlSite } from './crawl.js'; +import { detectFeatures } from './feature-detect.js'; +import { recordFeature } from './feature-recorder.js'; +import { writeScript } from './script-writer.js'; +import { synthesizeVoiceover } from './voiceover.js'; +import { buildTimeline } from './timeline.js'; +import { assembleVideo } from './assembly.js'; +import { isLlmEnabled } from './llm.js'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +export const OUTPUT_ROOT = path.resolve(__dirname, '../../output'); + +/** + * makedemo pipeline brain — drop-in for the web shell. + * + * Job shape (a superset of the existing web job; see schemas.js#jobShape): + * { id, url, credentials?, maxFeatures?, voice?, clips?: string[], song?: string } + * + * `emit(type, data)` uses the same event vocabulary as web/lib/jobs.js + * ('stage' | 'log' | 'script' | 'asset' | 'video' | 'done' | 'error'), so the + * existing SSE UI works unchanged. Stage runners are also exported individually + * if the web layer prefers to drive them. + */ +export async function runPipeline(job, emit) { + const e = emit || (() => {}); + const log = (msg) => e('log', { level: 'info', msg }); + const jobDir = path.join(OUTPUT_ROOT, job.id); + await fs.mkdir(jobDir, { recursive: true }); + log(isLlmEnabled() ? 'Claude enabled (smart path)' : 'No ANTHROPIC_API_KEY — heuristic path'); + + // STAGE 1 — discover: crawl + Claude feature detection. + e('stage', { stage: 'discover', status: 'running' }); + const pages = await crawlSite({ + homepageUrl: job.url, + maxPages: job.maxPages || 20, + credentials: job.credentials || null, + excludedPaths: job.excludedPaths || [], + log, + }); + const features = await detectFeatures({ + homepageUrl: job.url, + pages, + maxFeatures: job.maxFeatures || 5, + log, + }); + job.features = features; + e('stage', { stage: 'discover', status: 'done' }); + e('script', { features }); + + // STAGE 2 — record: one screen-capture clip per feature. + e('stage', { stage: 'record', status: 'running' }); + const featureClips = []; + for (let i = 0; i < features.length; i++) { + e('stage', { stage: 'record', status: 'running', step: i + 1, total: features.length }); + const rec = await recordFeature(features[i], { credentials: job.credentials, log }); + let clipPath = null; + if (rec.webm?.length) { + clipPath = path.join(jobDir, `feature-${String(i).padStart(2, '0')}.webm`); + await fs.writeFile(clipPath, rec.webm); + } + featureClips.push({ name: rec.name, path: clipPath }); + } + e('stage', { stage: 'record', status: 'done' }); + + // STAGE 3 — script: cohesive VO script + Suno music prompt. + e('stage', { stage: 'script', status: 'running' }); + const uploadedClips = job.clips || []; + const script = await writeScript({ + productUrl: job.url, + features, + clipCount: uploadedClips.length, + log, + }); + job.script = script; + job.sunoPrompt = script.sunoPrompt; + await fs.writeFile(path.join(jobDir, 'suno-prompt.txt'), script.sunoPrompt || '', 'utf8'); + await fs.writeFile( + path.join(jobDir, 'transcript.txt'), + script.segments.map((s) => s.narration).filter(Boolean).join('\n\n'), + 'utf8', + ); + e('stage', { stage: 'script', status: 'done' }); + e('script', { title: script.title, segments: script.segments, sunoPrompt: script.sunoPrompt }); + + // STAGE 4 — voiceover: ElevenLabs per segment. + e('stage', { stage: 'voiceover', status: 'running' }); + const { voice, audios } = await synthesizeVoiceover({ + segments: script.segments, + outputDir: jobDir, + voice: job.voice, + log, + }); + job.voice = voice; + e('stage', { stage: 'voiceover', status: 'done' }); + + // STAGE 5 — assemble: motion graphics + clips + ducked music -> MP4. + e('stage', { stage: 'assemble', status: 'running' }); + const timeline = buildTimeline(script.segments, { voiceDurations: audios.map((a) => a.duration) }); + const outPath = path.join(jobDir, 'demo.mp4'); + await assembleVideo({ + timeline, + audios, + featureClips, + uploadedClips, + song: job.song || null, + workDir: path.join(jobDir, 'work'), + outPath, + log, + }); + job.video = 'demo.mp4'; + job.transcript = 'transcript.txt'; + e('stage', { stage: 'assemble', status: 'done' }); + e('video', { video: job.video }); + + e('done', { video: job.video, features, sunoPrompt: job.sunoPrompt }); + return { videoPath: outPath, features, script }; +} diff --git a/src/pipeline/llm.js b/src/pipeline/llm.js new file mode 100644 index 0000000..68df7b7 --- /dev/null +++ b/src/pipeline/llm.js @@ -0,0 +1,85 @@ +import Anthropic from '@anthropic-ai/sdk'; +import { zodOutputFormat } from '@anthropic-ai/sdk/helpers/zod'; + +/** + * Claude wrapper for the makedemo pipeline brain. + * + * Mirrors the proven qaaas flow: one structured call validated against a zod + * schema (here via the SDK's native structured-outputs + messages.parse), plus + * a plain free-text completion. Uses claude-opus-4-8 with adaptive thinking, + * and streams the longer free-text call to avoid request timeouts. + * + * Returns null when ANTHROPIC_API_KEY is unset so callers can fall back to + * heuristics — the same "smart path / fallback" shape the rest of makedemo uses + * for OpenAI and ElevenLabs. + */ + +const MODEL = process.env.ANTHROPIC_MODEL || 'claude-opus-4-8'; + +let cached; +function getClient() { + if (cached !== undefined) return cached; + cached = process.env.ANTHROPIC_API_KEY ? new Anthropic() : null; + return cached; +} + +export function isLlmEnabled() { + return Boolean(process.env.ANTHROPIC_API_KEY); +} + +/** + * One structured Claude call. `schema` is a zod schema; the parsed, validated + * object is returned (or null on no-key / parse failure). + */ +export async function parseStructured({ system, prompt, schema, maxTokens = 8000 }) { + const client = getClient(); + if (!client) return null; + + try { + const response = await client.messages.parse({ + model: MODEL, + max_tokens: maxTokens, + thinking: { type: 'adaptive' }, + output_config: { format: zodOutputFormat(schema) }, + system, + messages: [{ role: 'user', content: prompt }], + }); + + // parsed_output is null when the model refused or output didn't validate. + return response.parsed_output ?? null; + } catch (err) { + console.error('llm.parseStructured failed:', err?.message || err); + return null; + } +} + +/** + * Plain free-text Claude completion. Streamed because scripts/prompts can run + * long; we collect the final message rather than handling deltas. + */ +export async function completeText({ system, prompt, maxTokens = 4000 }) { + const client = getClient(); + if (!client) return null; + + try { + const stream = client.messages.stream({ + model: MODEL, + max_tokens: maxTokens, + thinking: { type: 'adaptive' }, + system, + messages: [{ role: 'user', content: prompt }], + }); + + const message = await stream.finalMessage(); + const text = message.content + .filter((b) => b.type === 'text') + .map((b) => b.text) + .join('') + .trim(); + + return text || null; + } catch (err) { + console.error('llm.completeText failed:', err?.message || err); + return null; + } +} diff --git a/src/pipeline/music.js b/src/pipeline/music.js new file mode 100644 index 0000000..9001169 --- /dev/null +++ b/src/pipeline/music.js @@ -0,0 +1,60 @@ +import { runFfmpeg, probeDuration } from './ffmpeg.js'; + +/** + * Background-music handling: take the user's uploaded suno.com song clip, loop + * it to cover the whole video, and duck it under the voiceover. + * + * The ducking is a sidechain compressor: the voiceover drives the compressor so + * the music automatically dips whenever narration plays, then swells back in + * the gaps. `buildMusicFilter` is pure so it can be unit-tested. + */ + +/** + * Build the ffmpeg -filter_complex string that mixes a looped, ducked music bed + * under a voiceover track. + * + * Inputs are assumed to be: [0:a] = voiceover, [1:a] = music. + * @returns {string} filter_complex + */ +export function buildMusicFilter({ musicVolume = 0.35, threshold = 0.05, ratio = 8, attack = 20, release = 600 } = {}) { + // Split the voiceover: one copy is the sidechain key, one is the final VO. + // The music is volume-reduced, then sidechain-compressed by the VO key, then + // mixed back with the full-volume voiceover. + return [ + '[0:a]asplit=2[vo][key]', + `[1:a]volume=${musicVolume}[bed]`, + `[bed][key]sidechaincompress=threshold=${threshold}:ratio=${ratio}:attack=${attack}:release=${release}[ducked]`, + '[vo][ducked]amix=inputs=2:duration=first:dropout_transition=0[aout]', + ].join(';'); +} + +/** + * Mix a ducked, looped music bed under a voiceover track into one audio file. + * @param {string} voicePath combined voiceover track + * @param {string} songPath uploaded suno.com clip + * @param {string} outPath output audio path + * @param {object} [opts] + */ +export async function mixMusicUnderVoice(voicePath, songPath, outPath, opts = {}) { + const filter = buildMusicFilter(opts); + await runFfmpeg([ + '-i', voicePath, + '-stream_loop', '-1', '-i', songPath, // loop the song to outlast the VO + '-filter_complex', filter, + '-map', '[aout]', + '-c:a', 'aac', '-b:a', '192k', '-ar', '44100', + '-shortest', + outPath, + ]); + return outPath; +} + +/** True if the uploaded song looks usable (exists and has a real duration). */ +export async function validateSong(songPath) { + try { + const d = await probeDuration(songPath); + return Number.isFinite(d) && d > 0.5; + } catch { + return false; + } +} diff --git a/src/pipeline/schemas.js b/src/pipeline/schemas.js new file mode 100644 index 0000000..60bb56e --- /dev/null +++ b/src/pipeline/schemas.js @@ -0,0 +1,58 @@ +import { z } from 'zod'; + +/** + * Shared zod schemas for the makedemo pipeline brain. + * + * These describe the structured artifacts Claude produces and that the + * assembly stage consumes. They double as the integration contract with the + * web shell (web/lib/pipeline.js can import + reuse these shapes). + */ + +/** A single user-facing feature worth showing in the demo. */ +export const featureSchema = z.object({ + name: z.string().describe('Short, punchy feature name (e.g. "Instant Checkout")'), + featureUrl: z.string().describe('Best URL to start demoing this feature, chosen from the crawl'), + pitch: z.string().describe('One sentence on why a viewer should care about this feature'), + // Concrete things to do on the page so the recording shows the feature in action. + steps: z.array(z.string()).describe('2-5 concrete on-page actions to perform while recording'), +}); + +export const featureDetectionSchema = z.object({ + features: z.array(featureSchema), +}); + +/** + * One segment of the final video timeline. The script writer emits an ordered + * list of these; each maps to either a recorded feature clip, an uploaded + * clip, or a pure motion-graphics card. + */ +export const scriptSegmentSchema = z.object({ + kind: z.enum(['intro', 'feature', 'clip', 'outro']).describe('What drives the visuals for this segment'), + // For kind==="feature": index into the recorded features array. + featureIndex: z.number().nullable().describe('Index into recorded features, or null'), + // For kind==="clip": index into the user-uploaded clips array. + clipIndex: z.number().nullable().describe('Index into uploaded clips, or null'), + title: z.string().describe('On-screen title/lower-third text for this segment'), + caption: z.string().describe('Short on-screen caption synced to the voiceover'), + narration: z.string().describe('Voiceover line(s) for this segment, written to be spoken aloud'), +}); + +export const demoScriptSchema = z.object({ + title: z.string().describe('Overall video title shown on the intro card'), + tagline: z.string().describe('One-line subtitle for the intro card'), + segments: z.array(scriptSegmentSchema), + // A ready-to-paste prompt for suno.com to generate the surreal/metal bed. + sunoPrompt: z.string().describe('A surreal/metal background-music prompt the user can paste into suno.com'), +}); + +/** Mirrors the web shell's job object so this brain is a drop-in. */ +export const jobShape = { + id: 'string', + url: 'string', + credentials: '{ user, password } | null', + maxFeatures: 'number', + voice: 'string | null', + // New for the media pipeline: + clips: 'string[] (paths to user-uploaded video clips)', + song: 'string | null (path to an uploaded suno.com song clip)', +}; diff --git a/src/pipeline/script-writer.js b/src/pipeline/script-writer.js new file mode 100644 index 0000000..8db925d --- /dev/null +++ b/src/pipeline/script-writer.js @@ -0,0 +1,104 @@ +import { parseStructured } from './llm.js'; +import { demoScriptSchema } from './schemas.js'; + +/** + * Write one cohesive voiceover script + a surreal/metal Suno music prompt. + * + * Given the detected features and the count of user-uploaded clips, Claude + * produces an ordered timeline of segments (intro -> features/clips -> outro), + * each with on-screen title, caption, and a spoken narration line. It also + * writes a ready-to-paste suno.com prompt for the surreal/metal background bed. + * + * Falls back to a deterministic script when no ANTHROPIC_API_KEY is set, so the + * pipeline always produces something renderable. + */ +export async function writeScript({ productUrl, features, clipCount = 0, log }) { + log?.('Writing the voiceover script + Suno music prompt'); + + const result = await parseStructured({ + system: + 'You are a scriptwriter for a high-energy product demo video with a surreal, metal aesthetic. You are given the product URL, the features being shown (each already has a recorded screen clip), and the number of extra user-uploaded video clips to intercut. Produce: (1) an overall title and tagline; (2) an ordered list of timeline segments. Start with an "intro" segment, then weave "feature" segments (set featureIndex to the feature being shown, clipIndex null) and "clip" segments for the uploaded clips (set clipIndex 0..N-1, featureIndex null), and finish with an "outro" segment. Every feature should appear once; spread the uploaded clips between features as B-roll. For each segment write a punchy on-screen title, a short caption, and a narration line written to be spoken aloud (no stage directions). Keep narration tight — one or two sentences. (3) A vivid suno.com prompt for a SURREAL, METAL instrumental bed that matches the energy (mention tempo, instrumentation, mood, and that it must loop and sit under a voiceover).', + prompt: `Product URL: ${productUrl}\nUploaded B-roll clips available: ${clipCount}\n\nFeatures (in intended order):\n${features + .map((f, i) => `${i}. ${f.name} — ${f.pitch} (url: ${f.featureUrl})`) + .join('\n')}`, + schema: demoScriptSchema, + maxTokens: 8000, + }); + + if (result && result.segments?.length) { + log?.(`Script: "${result.title}" with ${result.segments.length} segment(s)`); + return result; + } + + log?.('No LLM script — using heuristic script'); + return heuristicScript({ productUrl, features, clipCount }); +} + +function heuristicScript({ productUrl, features, clipCount }) { + const host = safeHost(productUrl); + const segments = [ + { + kind: 'intro', + featureIndex: null, + clipIndex: null, + title: host, + caption: 'A quick tour', + narration: `Welcome to ${host}. Let's take a fast tour of what it can do.`, + }, + ]; + + let clipsUsed = 0; + features.forEach((f, i) => { + segments.push({ + kind: 'feature', + featureIndex: i, + clipIndex: null, + title: f.name, + caption: f.pitch.slice(0, 60), + narration: f.pitch, + }); + // Intercut an uploaded clip after every other feature. + if (clipsUsed < clipCount && i % 2 === 1) { + segments.push({ + kind: 'clip', + featureIndex: null, + clipIndex: clipsUsed, + title: '', + caption: '', + narration: '', + }); + clipsUsed += 1; + } + }); + + // Any remaining uploaded clips go before the outro. + while (clipsUsed < clipCount) { + segments.push({ kind: 'clip', featureIndex: null, clipIndex: clipsUsed, title: '', caption: '', narration: '' }); + clipsUsed += 1; + } + + segments.push({ + kind: 'outro', + featureIndex: null, + clipIndex: null, + title: 'Try it yourself', + caption: host, + narration: `That's ${host}. Come see what you can build.`, + }); + + return { + title: host, + tagline: 'A quick tour', + segments, + sunoPrompt: + 'Surreal, cinematic metal instrumental: driving down-tuned guitars and double-kick drums under dreamy, reverb-drenched synth pads; ~140 BPM, dark and euphoric, seamless loop, leaves headroom for a spoken voiceover.', + }; +} + +function safeHost(url) { + try { + return new URL(url).hostname.replace(/^www\./, ''); + } catch { + return 'this product'; + } +} diff --git a/src/pipeline/timeline.js b/src/pipeline/timeline.js new file mode 100644 index 0000000..1cbd289 --- /dev/null +++ b/src/pipeline/timeline.js @@ -0,0 +1,35 @@ +/** + * Pure timeline math — no I/O, so it's unit-testable. + * + * Given script segments and the measured duration of each segment's voiceover, + * compute a concrete per-segment duration and absolute start offset. Segments + * with no narration (e.g. uploaded B-roll clips) fall back to `clipDurations` + * or a default. + */ +export function buildTimeline(segments, { voiceDurations = [], clipDurations = [], minSegment = 3, padding = 0.4 } = {}) { + let offset = 0; + return segments.map((seg, i) => { + const voice = voiceDurations[i] || 0; + let duration; + if (voice > 0) { + duration = voice + padding; + } else if (seg.kind === 'clip' && seg.clipIndex != null && clipDurations[seg.clipIndex] != null) { + duration = clipDurations[seg.clipIndex]; + } else { + duration = minSegment; + } + duration = Math.max(minSegment, round(duration)); + const entry = { ...seg, index: i, start: round(offset), duration }; + offset += duration; + return entry; + }); +} + +/** Total length of a built timeline. */ +export function timelineDuration(timeline) { + return round(timeline.reduce((sum, s) => sum + s.duration, 0)); +} + +function round(n) { + return Math.round(n * 1000) / 1000; +} diff --git a/src/pipeline/url-utils.js b/src/pipeline/url-utils.js new file mode 100644 index 0000000..0f0b048 --- /dev/null +++ b/src/pipeline/url-utils.js @@ -0,0 +1,33 @@ +/** + * Dependency-free URL helpers shared by the crawler. Kept separate from + * crawl.js (which imports Playwright) so they're unit-testable in isolation. + */ + +// Convert a `/blog/*`-style glob to a RegExp anchored to the full path. +export function globToRegExp(glob) { + const escaped = glob.trim().replace(/[.+^${}()|[\]\\]/g, '\\$&').replace(/\*/g, '.*'); + return new RegExp(`^${escaped}$`); +} + +// True if the URL's path matches any exclusion glob. +export function isExcluded(url, patterns) { + if (!patterns.length) return false; + let path; + try { + path = new URL(url).pathname; + } catch { + return false; + } + return patterns.some((re) => re.test(path)); +} + +// Drop the hash fragment for stable de-duplication. +export function normalize(u) { + try { + const url = new URL(u); + url.hash = ''; + return url.toString(); + } catch { + return u; + } +} diff --git a/src/pipeline/voiceover.js b/src/pipeline/voiceover.js new file mode 100644 index 0000000..2cada8e --- /dev/null +++ b/src/pipeline/voiceover.js @@ -0,0 +1,41 @@ +import path from 'node:path'; +import { generateSpeech, getRandomVoice, estimateAudioDuration } from '../audio/generator.js'; +import { generateSilentAudio, probeDuration } from './ffmpeg.js'; + +/** + * Synthesize one audio file per script segment from the generated narration, + * using ElevenLabs (via the existing audio generator) with a single consistent + * voice. Segments with no narration (uploaded B-roll) get a short silent track + * so the timeline still lines up. + * + * Returns { voice, audios: [{ path, duration }] } aligned to `segments`. + */ +export async function synthesizeVoiceover({ segments, outputDir, voice, minSegment = 3, log }) { + const chosenVoice = voice || getRandomVoice(); + log?.(`Synthesizing voiceover (voice ${chosenVoice})`); + + const audios = []; + for (let i = 0; i < segments.length; i++) { + const seg = segments[i]; + const audioPath = path.join(outputDir, `vo-${String(i).padStart(2, '0')}.mp3`); + const text = (seg.narration || '').trim(); + + if (text) { + const estimated = Math.max(minSegment, estimateAudioDuration(text)); + try { + await generateSpeech(text, audioPath, { voice: chosenVoice }); + } catch (err) { + log?.(`TTS failed on segment ${i} (${err.message}) — silent`); + await generateSilentAudio(audioPath, estimated); + } + } else { + // B-roll/clip segment: short silent bed (clip's own audio is added later). + await generateSilentAudio(audioPath, minSegment); + } + + const duration = await probeDuration(audioPath).catch(() => minSegment); + audios.push({ path: audioPath, duration }); + } + + return { voice: chosenVoice, audios }; +} diff --git a/test/pipeline/crawl.test.js b/test/pipeline/crawl.test.js new file mode 100644 index 0000000..d4435e4 --- /dev/null +++ b/test/pipeline/crawl.test.js @@ -0,0 +1,26 @@ +import { expect } from 'chai'; +import { globToRegExp, isExcluded, normalize } from '../../src/pipeline/url-utils.js'; + +describe('crawl helpers', () => { + it('converts a glob to an anchored regexp', () => { + const re = globToRegExp('/blog/*'); + expect(re.test('/blog/hello')).to.equal(true); + expect(re.test('/blogger')).to.equal(false); + expect(re.test('/docs/blog/x')).to.equal(false); + }); + + it('excludes paths matching any pattern', () => { + const patterns = ['/blog/*', '/legal/*'].map(globToRegExp); + expect(isExcluded('https://x.com/blog/a', patterns)).to.equal(true); + expect(isExcluded('https://x.com/legal/terms', patterns)).to.equal(true); + expect(isExcluded('https://x.com/app/dashboard', patterns)).to.equal(false); + }); + + it('returns false when there are no patterns', () => { + expect(isExcluded('https://x.com/anything', [])).to.equal(false); + }); + + it('strips the hash fragment when normalizing', () => { + expect(normalize('https://x.com/a#section')).to.equal('https://x.com/a'); + }); +}); diff --git a/test/pipeline/music.test.js b/test/pipeline/music.test.js new file mode 100644 index 0000000..9c5457b --- /dev/null +++ b/test/pipeline/music.test.js @@ -0,0 +1,30 @@ +import { expect } from 'chai'; +import { buildMusicFilter } from '../../src/pipeline/music.js'; + +describe('music ducking filter', () => { + it('splits the voiceover into a final track and a sidechain key', () => { + const f = buildMusicFilter(); + expect(f).to.include('[0:a]asplit=2[vo][key]'); + }); + + it('lowers the music bed volume and sidechain-compresses it by the voice', () => { + const f = buildMusicFilter({ musicVolume: 0.35 }); + expect(f).to.include('volume=0.35[bed]'); + expect(f).to.include('sidechaincompress'); + expect(f).to.include('[bed][key]sidechaincompress'); + }); + + it('mixes the voice and the ducked bed into [aout]', () => { + const f = buildMusicFilter(); + expect(f).to.include('[vo][ducked]amix=inputs=2'); + expect(f).to.match(/\[aout\]$/); + }); + + it('honors custom compressor parameters', () => { + const f = buildMusicFilter({ threshold: 0.1, ratio: 12, attack: 5, release: 400 }); + expect(f).to.include('threshold=0.1'); + expect(f).to.include('ratio=12'); + expect(f).to.include('attack=5'); + expect(f).to.include('release=400'); + }); +}); diff --git a/test/pipeline/timeline.test.js b/test/pipeline/timeline.test.js new file mode 100644 index 0000000..5cfe4e5 --- /dev/null +++ b/test/pipeline/timeline.test.js @@ -0,0 +1,40 @@ +import { expect } from 'chai'; +import { buildTimeline, timelineDuration } from '../../src/pipeline/timeline.js'; + +describe('timeline', () => { + const segments = [ + { kind: 'intro', featureIndex: null, clipIndex: null }, + { kind: 'feature', featureIndex: 0, clipIndex: null }, + { kind: 'clip', featureIndex: null, clipIndex: 0 }, + { kind: 'outro', featureIndex: null, clipIndex: null }, + ]; + + it('uses voiceover duration + padding when narration exists', () => { + const t = buildTimeline(segments, { voiceDurations: [4, 6, 0, 3], clipDurations: [9], padding: 0.4 }); + expect(t[0].duration).to.equal(4.4); + expect(t[1].duration).to.equal(6.4); + }); + + it('falls back to clip duration for silent clip segments', () => { + const t = buildTimeline(segments, { voiceDurations: [4, 6, 0, 3], clipDurations: [9] }); + expect(t[2].duration).to.equal(9); + }); + + it('enforces the minimum segment length', () => { + const t = buildTimeline(segments, { voiceDurations: [1, 1, 0, 1], clipDurations: [1], minSegment: 3 }); + t.forEach((s) => expect(s.duration).to.be.at.least(3)); + }); + + it('computes monotonically increasing start offsets', () => { + const t = buildTimeline(segments, { voiceDurations: [4, 6, 0, 3], clipDurations: [9] }); + expect(t[0].start).to.equal(0); + expect(t[1].start).to.equal(t[0].duration); + expect(t[2].start).to.equal(t[0].duration + t[1].duration); + }); + + it('total duration equals the sum of segment durations', () => { + const t = buildTimeline(segments, { voiceDurations: [4, 6, 0, 3], clipDurations: [9] }); + const sum = t.reduce((s, x) => s + x.duration, 0); + expect(timelineDuration(t)).to.equal(Math.round(sum * 1000) / 1000); + }); +}); From d6bbcb5841ce6915e50b70f565553107b6b80e9c Mon Sep 17 00:00:00 2001 From: Anthony Ettinger Date: Sat, 27 Jun 2026 17:16:27 +0000 Subject: [PATCH 2/2] test(pipeline): cover schemas, feature-detect + script-writer fallbacks, drawtext escaping Adds 17 tests (13 -> 30 total): - schemas: zod validation of feature/feature-detection/demo-script shapes - detectFeatures heuristic path (no ANTHROPIC_API_KEY): maxFeatures, skips login/privacy, de-dupes paths, required shape - writeScript heuristic path: intro/outro bookends, every feature once, every uploaded clip placed with a valid index, non-empty surreal/metal suno prompt - escapeDrawtext: colon/percent/backslash/apostrophe/null handling Exports escapeDrawtext from graphics.js for testing. Co-Authored-By: Claude Opus 4.8 --- src/pipeline/graphics.js | 2 +- test/pipeline/feature-detect.test.js | 40 ++++++++++++++++++++ test/pipeline/graphics.test.js | 25 +++++++++++++ test/pipeline/schemas.test.js | 55 ++++++++++++++++++++++++++++ test/pipeline/script-writer.test.js | 42 +++++++++++++++++++++ 5 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 test/pipeline/feature-detect.test.js create mode 100644 test/pipeline/graphics.test.js create mode 100644 test/pipeline/schemas.test.js create mode 100644 test/pipeline/script-writer.test.js diff --git a/src/pipeline/graphics.js b/src/pipeline/graphics.js index 40a5fe1..c47a0d2 100644 --- a/src/pipeline/graphics.js +++ b/src/pipeline/graphics.js @@ -28,7 +28,7 @@ function findFont() { return candidates[0]; // ffmpeg ignores a missing file only if fontfile omitted; see drawtextFont() } -function escapeDrawtext(text) { +export function escapeDrawtext(text) { return String(text || '') .replace(/\\/g, '\\\\') .replace(/:/g, '\\:') diff --git a/test/pipeline/feature-detect.test.js b/test/pipeline/feature-detect.test.js new file mode 100644 index 0000000..0294530 --- /dev/null +++ b/test/pipeline/feature-detect.test.js @@ -0,0 +1,40 @@ +import { expect } from 'chai'; +import { detectFeatures } from '../../src/pipeline/feature-detect.js'; + +// With no ANTHROPIC_API_KEY, parseStructured returns null and detectFeatures +// falls back to its deterministic heuristic — which is what we exercise here. +describe('detectFeatures (heuristic fallback)', () => { + before(() => { + delete process.env.ANTHROPIC_API_KEY; + }); + + const pages = [ + { url: 'https://x.com/', title: 'Home', text: '' }, + { url: 'https://x.com/dashboard', title: 'Dashboard', text: 'metrics' }, + { url: 'https://x.com/dashboard', title: 'Dashboard dup', text: 'dup' }, + { url: 'https://x.com/pricing', title: 'Pricing', text: 'plans' }, + { url: 'https://x.com/login', title: 'Login', text: 'sign in' }, + { url: 'https://x.com/privacy', title: 'Privacy', text: 'legal' }, + ]; + + it('returns features and respects maxFeatures', async () => { + const features = await detectFeatures({ homepageUrl: 'https://x.com/', pages, maxFeatures: 3 }); + expect(features).to.be.an('array'); + expect(features.length).to.be.at.most(3); + expect(features.length).to.be.greaterThan(0); + }); + + it('skips login/privacy and de-duplicates paths', async () => { + const features = await detectFeatures({ homepageUrl: 'https://x.com/', pages, maxFeatures: 10 }); + const urls = features.map((f) => f.featureUrl); + expect(urls.some((u) => /login|privacy/.test(u))).to.equal(false); + const paths = urls.map((u) => new URL(u).pathname); + expect(new Set(paths).size).to.equal(paths.length); + }); + + it('produces features with the required shape', async () => { + const [f] = await detectFeatures({ homepageUrl: 'https://x.com/', pages, maxFeatures: 1 }); + expect(f).to.have.all.keys('name', 'featureUrl', 'pitch', 'steps'); + expect(f.steps).to.be.an('array').that.is.not.empty; + }); +}); diff --git a/test/pipeline/graphics.test.js b/test/pipeline/graphics.test.js new file mode 100644 index 0000000..b7eea42 --- /dev/null +++ b/test/pipeline/graphics.test.js @@ -0,0 +1,25 @@ +import { expect } from 'chai'; +import { escapeDrawtext } from '../../src/pipeline/graphics.js'; + +describe('escapeDrawtext', () => { + it('escapes colons (ffmpeg drawtext option separator)', () => { + expect(escapeDrawtext('a:b')).to.equal('a\\:b'); + }); + + it('escapes percent signs', () => { + expect(escapeDrawtext('100%')).to.equal('100\\%'); + }); + + it('escapes backslashes', () => { + expect(escapeDrawtext('a\\b')).to.equal('a\\\\b'); + }); + + it('replaces straight apostrophes (which break the quoted text arg)', () => { + expect(escapeDrawtext("it's")).to.equal('it’s'); + }); + + it('returns empty string for null/undefined', () => { + expect(escapeDrawtext(null)).to.equal(''); + expect(escapeDrawtext(undefined)).to.equal(''); + }); +}); diff --git a/test/pipeline/schemas.test.js b/test/pipeline/schemas.test.js new file mode 100644 index 0000000..bc62d49 --- /dev/null +++ b/test/pipeline/schemas.test.js @@ -0,0 +1,55 @@ +import { expect } from 'chai'; +import { featureSchema, featureDetectionSchema, demoScriptSchema } from '../../src/pipeline/schemas.js'; + +describe('schemas', () => { + it('accepts a well-formed feature', () => { + const ok = featureSchema.safeParse({ + name: 'Instant Checkout', + featureUrl: 'https://x.com/checkout', + pitch: 'Buy in one tap.', + steps: ['Click Buy', 'Confirm'], + }); + expect(ok.success).to.equal(true); + }); + + it('rejects a feature missing required fields', () => { + const bad = featureSchema.safeParse({ name: 'X' }); + expect(bad.success).to.equal(false); + }); + + it('validates a feature-detection result of multiple features', () => { + const res = featureDetectionSchema.safeParse({ + features: [ + { name: 'A', featureUrl: 'https://x.com/a', pitch: 'a', steps: ['s'] }, + { name: 'B', featureUrl: 'https://x.com/b', pitch: 'b', steps: ['s'] }, + ], + }); + expect(res.success).to.equal(true); + expect(res.data.features).to.have.length(2); + }); + + it('accepts a full demo script with nullable indices and a suno prompt', () => { + const res = demoScriptSchema.safeParse({ + title: 'Demo', + tagline: 'A tour', + segments: [ + { kind: 'intro', featureIndex: null, clipIndex: null, title: 'T', caption: 'c', narration: 'hi' }, + { kind: 'feature', featureIndex: 0, clipIndex: null, title: 'F', caption: 'c', narration: 'feat' }, + { kind: 'clip', featureIndex: null, clipIndex: 0, title: '', caption: '', narration: '' }, + { kind: 'outro', featureIndex: null, clipIndex: null, title: 'Bye', caption: 'c', narration: 'bye' }, + ], + sunoPrompt: 'surreal metal bed, 140bpm, loops, leaves room for VO', + }); + expect(res.success).to.equal(true); + }); + + it('rejects an unknown segment kind', () => { + const res = demoScriptSchema.safeParse({ + title: 'Demo', + tagline: 't', + segments: [{ kind: 'banana', featureIndex: null, clipIndex: null, title: '', caption: '', narration: '' }], + sunoPrompt: 'x', + }); + expect(res.success).to.equal(false); + }); +}); diff --git a/test/pipeline/script-writer.test.js b/test/pipeline/script-writer.test.js new file mode 100644 index 0000000..35f1b91 --- /dev/null +++ b/test/pipeline/script-writer.test.js @@ -0,0 +1,42 @@ +import { expect } from 'chai'; +import { writeScript } from '../../src/pipeline/script-writer.js'; + +// No ANTHROPIC_API_KEY -> deterministic heuristic script. +describe('writeScript (heuristic fallback)', () => { + before(() => { + delete process.env.ANTHROPIC_API_KEY; + }); + + const features = [ + { name: 'Search', featureUrl: 'https://x.com/search', pitch: 'Find anything fast.', steps: [] }, + { name: 'Build', featureUrl: 'https://x.com/build', pitch: 'Ship in minutes.', steps: [] }, + { name: 'Share', featureUrl: 'https://x.com/share', pitch: 'One-click sharing.', steps: [] }, + ]; + + it('opens with intro and ends with outro', async () => { + const script = await writeScript({ productUrl: 'https://x.com', features, clipCount: 0 }); + expect(script.segments[0].kind).to.equal('intro'); + expect(script.segments[script.segments.length - 1].kind).to.equal('outro'); + }); + + it('includes every feature exactly once', async () => { + const script = await writeScript({ productUrl: 'https://x.com', features, clipCount: 0 }); + const featureIdx = script.segments.filter((s) => s.kind === 'feature').map((s) => s.featureIndex).sort(); + expect(featureIdx).to.deep.equal([0, 1, 2]); + }); + + it('places every uploaded clip with a valid clipIndex', async () => { + const script = await writeScript({ productUrl: 'https://x.com', features, clipCount: 2 }); + const clipSegs = script.segments.filter((s) => s.kind === 'clip'); + expect(clipSegs).to.have.length(2); + const idx = clipSegs.map((s) => s.clipIndex).sort(); + expect(idx).to.deep.equal([0, 1]); + }); + + it('always emits a non-empty suno prompt and a title', async () => { + const script = await writeScript({ productUrl: 'https://x.com', features, clipCount: 0 }); + expect(script.sunoPrompt).to.be.a('string').with.length.greaterThan(10); + expect(script.sunoPrompt.toLowerCase()).to.match(/metal|surreal/); + expect(script.title).to.be.a('string').that.is.not.empty; + }); +});