profullstack · ralyodio · Jun 28, 2026 · Jun 28, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -23,5 +23,9 @@ WORKDIR /app
 COPY . .
 RUN pnpm install
 
+# Playwright Chromium for the pipeline-brain recorder (MKDEMO_PIPELINE_BRAIN=1).
+# Harmless when the brain is disabled; required when it's on.
+RUN pnpm exec playwright install --with-deps chromium
+
 EXPOSE 3000
 CMD ["node", "apps/web/server.js"]
diff --git a/apps/web/lib/jobs.js b/apps/web/lib/jobs.js
@@ -1,6 +1,7 @@
 import { randomUUID } from 'node:crypto';
 import { EventEmitter } from 'node:events';
 import { runScriptStage, runAssetsStage, runRenderStage } from './pipeline.js';
+import { runPipeline as runBrainPipeline } from '@makedemo/core';
 
 const jobs = new Map();
 
@@ -16,6 +17,10 @@ export function createJob(input) {
     maxSteps: Math.min(Math.max(input.maxSteps ?? 6, 1), 12),
     voice: input.voice || null,
     credentials: input.credentials || null,
+    // Pipeline-brain inputs (used only when MKDEMO_PIPELINE_BRAIN=1):
+    maxFeatures: Math.min(Math.max(input.maxFeatures ?? 5, 1), 12),
+    clips: Array.isArray(input.clips) ? input.clips : [],
+    song: input.song || null,
     status: 'queued',
     stage: null,
     steps: [],
@@ -85,6 +90,14 @@ async function runPipeline(job) {
   job.status = 'running';
   e('status', { status: 'running' });
 
+  // Opt-in pipeline brain: crawl -> Claude features -> per-feature recording ->
+  // Claude VO script + Suno prompt -> ElevenLabs -> motion-graphics assembly.
+  // Default-off so the live scroll-tour pipeline is unchanged until flipped.
+  if (process.env.MKDEMO_PIPELINE_BRAIN === '1') {
+    await runBrainPipeline(job, e); // emits its own 'done'
+    return;
+  }
+
   await runScriptStage(job, e);
   await runAssetsStage(job, e);
   await runRenderStage(job, e);

diff --git a/docs/PIPELINE.md b/docs/PIPELINE.md
@@ -0,0 +1,123 @@
+# makedemo pipeline brain (Claude feature-detection + media pipeline)
+
+This document describes the `src/pipeline/` module: a qaaas-style flow that turns
+a URL into a finished, scored-for-vibes demo video — animated motion graphics,
+recorded feature clips, user-uploaded B-roll, a Claude-written voiceover, and a
+ducked surreal/metal background track from a suno.com song.
+
+It is designed as a **drop-in for the web upload UI** (built in parallel on the
+`feat/web-app` branch): it speaks the same `job` / `emit(type, data)` contract.
+
+## Why this exists
+
+The original makedemo planned interactions on a **single page** with OpenAI and
+narrated each click. This brain borrows the qaaas QA flow instead:
+
+> crawl the whole site → let Claude pick the real features → act on each feature
+> → let Claude write the script → synthesize → assemble.
+
+…and extends it for marketing video: real screen-recorded clips, uploaded
+clips, motion graphics, one cohesive voiceover, and music.
+
+## Pipeline stages
+
+| Stage | Module | What it does |
+|---|---|---|
+| 1. discover | `crawl.js` + `feature-detect.js` | Same-origin BFS crawl (optionally logged in), then Claude picks the demo-worthy features (name, pitch, start URL, concrete steps). Structured output via zod. Heuristic fallback with no key. |
+| 2. record | `feature-recorder.js` | Playwright `recordVideo` per feature, performing the feature's steps, → one `.webm` clip each. |
+| 3. script | `script-writer.js` | Claude writes ONE cohesive timeline (intro → features/clips → outro) with on-screen titles, captions, spoken narration, **and a ready-to-paste suno.com surreal/metal music prompt**. |
+| 4. voiceover | `voiceover.js` | ElevenLabs TTS per segment (one consistent voice), silent beds for B-roll. |
+| 5. assemble | `timeline.js` + `graphics.js` + `music.js` + `assembly.js` | Build a timeline from voiceover durations; render animated cards + lower-thirds; concat segment videos; build the continuous voiceover; **duck the uploaded song under it**; mux → `demo.mp4`. |
+
+Orchestrated by `index.js#runPipeline(job, emit)`.
+
+## The job + event contract (drop-in for the web shell)
+
+```js
+// job (superset of the existing web job)
+{
+  id, url,
+  credentials: { user, password } | null,
+  maxFeatures: number,           // default 5
+  voice: string | null,          // ElevenLabs voice id
+  clips: string[],               // paths to user-uploaded video clips
+  song: string | null,           // path to an uploaded suno.com song clip
+}
+
+// emit(type, data) — same vocabulary as web/lib/jobs.js
+//   'stage'  { stage, status, step?, total? }   stage ∈ discover|record|script|voiceover|assemble
+//   'log'    { level, msg }
+//   'script' { features? , title?, segments?, sunoPrompt? }
+//   'video'  { video }
+//   'done'   { video, features, sunoPrompt }
+//   'error'  { message }
+```
+
+The web shell can swap its `web/lib/pipeline.js` import for:
+
+```js
+import { runPipeline, OUTPUT_ROOT } from '../../src/pipeline/index.js';
+// in runPipeline(job) inside jobs.js:
+await runPipeline(job, (type, data) => emit(job, type, data));
+```
+
+Outputs land in `output/<jobId>/`: `demo.mp4`, `transcript.txt`, `suno-prompt.txt`,
+per-feature `feature-NN.webm`, per-segment voiceover, and a `work/` scratch dir.
+
+## Music: the suno.com flow
+
+1. The script writer emits a **surreal/metal Suno prompt** (saved to
+   `suno-prompt.txt` and emitted on the `script` event) — the user pastes it into
+   suno.com and downloads a clip.
+2. The uploaded clip is passed as `job.song`. `music.js` loops it to cover the
+   whole video and **sidechain-compresses it against the voiceover** so the bed
+   ducks under narration and swells in the gaps.
+3. No song → voiceover-only audio (still a complete video).
+
+## Motion graphics
+
+`graphics.js` has two backends:
+
+- **ffmpeg (default):** animated `mandelbrot` background (surreal, zero-asset),
+  fading kinetic title/subtitle, and slide-in lower-thirds burned onto clips.
+- **Remotion (opt-in, `MKDEMO_REMOTION=1`):** full React motion graphics via
+  `@remotion/renderer` + a `graphics/` composition. Falls back to ffmpeg if the
+  optional deps/project aren't present.
+
+## Smart path vs fallback
+
+Every external dependency degrades gracefully, matching makedemo's existing
+style:
+
+| Missing | Behavior |
+|---|---|
+| `ANTHROPIC_API_KEY` | Heuristic feature list + deterministic script (no Claude). |
+| `ELEVENLABS_API_KEY` | Silent timed voiceover (timing preserved). |
+| `job.song` | Voiceover-only audio. |
+| `@remotion/*` | ffmpeg motion-graphics backend. |
+
+## Running locally
+
+```bash
+pnpm install
+npx playwright install chromium      # browsers for crawl + recording
+node scripts/run-pipeline.js --url https://example.com \
+  --clips ./a.mp4,./b.mp4 --song ./suno.mp3 --max-features 5
+```
+
+Unit tests for the pure pieces (timeline math, music filter, URL globs):
+
+```bash
+pnpm run test:pipeline
+```
+
+## Status / what's wired vs. pending
+
+- **Wired & unit-tested:** timeline math, ducking-filter construction, crawl
+  URL globbing.
+- **Wired (needs keys/browsers/ffmpeg to run end-to-end):** crawl, Claude
+  feature detection + script, Playwright recording, ElevenLabs voiceover,
+  ffmpeg assembly with ducked music.
+- **Scaffolded:** Remotion backend (ffmpeg backend is the default and fully
+  functional); a Claude action-loop for richer in-clip interactions (current
+  recorder uses a light step interpreter) is the next upgrade.
diff --git a/package.json b/package.json
@@ -14,6 +14,7 @@
     "cli": "node src/index.js",
     "setup": "node scripts/setup.js",
     "test": "NODE_OPTIONS='--loader=./node_modules/mocha/lib/nodejs/esm-utils.js' mocha test/**/*.test.js --recursive",
+    "test:pipeline": "NODE_OPTIONS='--loader=./node_modules/mocha/lib/nodejs/esm-utils.js' mocha test/pipeline/**/*.test.js --recursive",
     "test:watch": "NODE_OPTIONS='--loader=./node_modules/mocha/lib/nodejs/esm-utils.js' mocha test/**/*.test.js --recursive --watch",
     "test:coverage": "NODE_OPTIONS='--loader=./node_modules/mocha/lib/nodejs/esm-utils.js' c8 mocha test/**/*.test.js --recursive",
     "lint": "eslint src/ test/",

diff --git a/packages/core/index.js b/packages/core/index.js
@@ -13,3 +13,6 @@ export {
   VOICES,
 } from './src/audio/generator.js';
 export { createDemo } from './src/index.js';
+// Pipeline brain: crawl -> Claude feature detection -> per-feature recording ->
+// Claude VO script + Suno prompt -> ElevenLabs -> motion-graphics assembly.
+export { runPipeline, OUTPUT_ROOT as PIPELINE_OUTPUT_ROOT } from './src/pipeline/index.js';
diff --git a/packages/core/package.json b/packages/core/package.json
@@ -6,6 +6,7 @@
   "main": "index.js",
   "exports": {
     ".": "./index.js",
+    "./pipeline": "./src/pipeline/index.js",
     "./browser": "./src/browser/manager.js",
     "./ai": "./src/ai/decision-maker.js",
     "./audio": "./src/audio/generator.js",
@@ -16,14 +17,20 @@
     "node": ">=20.0.0"
   },
   "dependencies": {
-    "@anthropic-ai/sdk": "^0.69.0",
+    "@anthropic-ai/sdk": "^0.106.0",
     "dotenv": "^16.4.1",
     "elevenlabs": "^0.8.2",
     "fluent-ffmpeg": "^2.1.2",
     "fs-extra": "^11.2.0",
     "node-fetch": "^3.3.2",
     "openai": "^4.28.4",
+    "playwright": "^1.61.0",
     "puppeteer": "^21.11.0",
-    "winston": "^3.11.0"
+    "winston": "^3.11.0",
+    "zod": "^4.0.0"
+  },
+  "optionalDependencies": {
+    "@remotion/bundler": "^4.0.0",
+    "@remotion/renderer": "^4.0.0"
   }
 }
diff --git a/packages/core/src/pipeline/assembly.js b/packages/core/src/pipeline/assembly.js
@@ -0,0 +1,146 @@
+import path from 'node:path';
+import fs from 'node:fs/promises';
+import { runFfmpeg } from './ffmpeg.js';
+import { renderCard, overlayLowerThird } from './graphics.js';
+import { mixMusicUnderVoice, validateSong } from './music.js';
+
+const W = 1920;
+const H = 1080;
+const FPS = 30;
+const PAD_COLOR = '0x0d0c0f';
+
+/**
+ * Assemble the final MP4 from a built timeline.
+ *
+ * Video and audio are built as two independent tracks, then muxed:
+ *   - VIDEO: each segment -> a normalized, silent 1080p/30fps clip (animated
+ *     card for intro/outro; recorded feature webm or uploaded clip otherwise,
+ *     with an animated lower-third burned on). Concatenated in order.
+ *   - AUDIO: each segment's voiceover, padded to the segment's exact duration,
+ *     concatenated into one continuous voice track, then (if a suno.com song
+ *     was uploaded) ducked under the looped music bed.
+ *
+ * @param {object} opts
+ * @param {Array}  opts.timeline   built timeline (see timeline.js)
+ * @param {Array<{path,duration}>} opts.audios  per-segment voiceover (aligned)
+ * @param {Array<{path}>}  opts.featureClips    recorded feature clips (mp4/webm on disk)
+ * @param {string[]} opts.uploadedClips         user-uploaded clip paths
+ * @param {string|null} opts.song               uploaded suno.com song path
+ * @param {string} opts.workDir
+ * @param {string} opts.outPath
+ * @param {(m:string)=>void} [opts.log]
+ */
+export async function assembleVideo(opts) {
+  const { timeline, audios, featureClips, uploadedClips, song, workDir, outPath, log } = opts;
+  await fs.mkdir(workDir, { recursive: true });
+
+  // 1. Build one normalized, silent video clip per segment.
+  const segVideos = [];
+  for (const seg of timeline) {
+    const out = path.join(workDir, `seg-${String(seg.index).padStart(2, '0')}.mp4`);
+    log?.(`Rendering segment ${seg.index + 1}/${timeline.length} (${seg.kind})`);
+
+    if (seg.kind === 'intro' || seg.kind === 'outro') {
+      await renderCard({ title: seg.title, subtitle: seg.caption, duration: seg.duration, outPath: out });
+    } else {
+      const src = sourceForSegment(seg, featureClips, uploadedClips);
+      if (!src) {
+        // Missing source — render a titled card so the timeline stays intact.
+        await renderCard({ title: seg.title || '', subtitle: seg.caption || '', duration: seg.duration, outPath: out });
+      } else {
+        const normalized = path.join(workDir, `norm-${String(seg.index).padStart(2, '0')}.mp4`);
+        await normalizeClip(src, seg.duration, normalized);
+        await overlayLowerThird(normalized, { title: seg.title, caption: seg.caption, outPath: out });
+      }
+    }
+    segVideos.push(out);
+  }
+
+  // 2. Concat the segment videos (identical params -> stream copy).
+  const videoTrack = path.join(workDir, 'video.mp4');
+  await concatCopy(segVideos, path.join(workDir, 'video-list.txt'), videoTrack);
+
+  // 3. Build the continuous voiceover track (pad each segment to its duration).
+  const paddedVo = [];
+  for (const seg of timeline) {
+    const vo = audios[seg.index];
+    const out = path.join(workDir, `voa-${String(seg.index).padStart(2, '0')}.m4a`);
+    await padAudioToDuration(vo.path, seg.duration, out);
+    paddedVo.push(out);
+  }
+  const voiceTrack = path.join(workDir, 'voice.m4a');
+  await concatAudioFilter(paddedVo, voiceTrack);
+
+  // 4. Mix in the ducked music bed if a song was uploaded.
+  let audioTrack = voiceTrack;
+  if (song && (await validateSong(song))) {
+    log?.('Ducking suno.com song under the voiceover');
+    audioTrack = path.join(workDir, 'final-audio.m4a');
+    await mixMusicUnderVoice(voiceTrack, song, audioTrack);
+  } else if (song) {
+    log?.('Uploaded song unreadable — continuing with voiceover only');
+  }
+
+  // 5. Mux video + audio.
+  await runFfmpeg([
+    '-i', videoTrack,
+    '-i', audioTrack,
+    '-map', '0:v:0', '-map', '1:a:0',
+    '-c:v', 'copy', '-c:a', 'aac', '-b:a', '192k',
+    '-shortest', '-movflags', '+faststart',
+    outPath,
+  ]);
+  log?.(`Final video written: ${outPath}`);
+  return outPath;
+}
+
+function sourceForSegment(seg, featureClips, uploadedClips) {
+  if (seg.kind === 'feature' && seg.featureIndex != null) return featureClips[seg.featureIndex]?.path;
+  if (seg.kind === 'clip' && seg.clipIndex != null) return uploadedClips[seg.clipIndex];
+  return null;
+}
+
+// Scale + letterbox a source to 1080p/30fps, looping short clips and trimming to
+// an exact duration. Output is silent so audio can be assembled independently.
+async function normalizeClip(src, duration, outPath) {
+  const vf = `scale=${W}:${H}:force_original_aspect_ratio=decrease,pad=${W}:${H}:(ow-iw)/2:(oh-ih)/2:color=${PAD_COLOR},fps=${FPS},format=yuv420p`;
+  await runFfmpeg([
+    '-stream_loop', '-1', '-i', src,
+    '-t', String(duration),
+    '-vf', vf,
+    '-an',
+    '-c:v', 'libx264', '-preset', 'veryfast', '-pix_fmt', 'yuv420p',
+    outPath,
+  ]);
+}
+
+async function concatCopy(files, listFile, outPath) {
+  await fs.writeFile(listFile, files.map((f) => `file '${f}'`).join('\n'), 'utf8');
+  await runFfmpeg(['-f', 'concat', '-safe', '0', '-i', listFile, '-c', 'copy', outPath]);
+}
+
+// Pad (or trim) an audio file to an exact duration; normalize to stereo/44.1k AAC.
+async function padAudioToDuration(audioPath, duration, outPath) {
+  await runFfmpeg([
+    '-i', audioPath,
+    '-af', 'apad',
+    '-t', String(duration),
+    '-ar', '44100', '-ac', '2',
+    '-c:a', 'aac', '-b:a', '192k',
+    outPath,
+  ]);
+}
+
+// Concatenate audio segments with the concat filter (robust across containers).
+async function concatAudioFilter(files, outPath) {
+  const inputs = files.flatMap((f) => ['-i', f]);
+  const filter = files.map((_, i) => `[${i}:a]`).join('') + `concat=n=${files.length}:v=0:a=1[a]`;
+  await runFfmpeg([
+    ...inputs,
+    '-filter_complex', filter,
+    '-map', '[a]',
+    '-ar', '44100', '-ac', '2',
+    '-c:a', 'aac', '-b:a', '192k',
+    outPath,
+  ]);
+}