Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,9 @@ WORKDIR /app
COPY . .
RUN pnpm install

# Playwright Chromium for the pipeline-brain recorder (MKDEMO_PIPELINE_BRAIN=1).
# Harmless when the brain is disabled; required when it's on.
RUN pnpm exec playwright install --with-deps chromium

EXPOSE 3000
CMD ["node", "apps/web/server.js"]
13 changes: 13 additions & 0 deletions apps/web/lib/jobs.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { randomUUID } from 'node:crypto';
import { EventEmitter } from 'node:events';
import { runScriptStage, runAssetsStage, runRenderStage } from './pipeline.js';
import { runPipeline as runBrainPipeline } from '@makedemo/core';

const jobs = new Map();

Expand All @@ -16,6 +17,10 @@ export function createJob(input) {
maxSteps: Math.min(Math.max(input.maxSteps ?? 6, 1), 12),
voice: input.voice || null,
credentials: input.credentials || null,
// Pipeline-brain inputs (used only when MKDEMO_PIPELINE_BRAIN=1):
maxFeatures: Math.min(Math.max(input.maxFeatures ?? 5, 1), 12),
clips: Array.isArray(input.clips) ? input.clips : [],
song: input.song || null,
status: 'queued',
stage: null,
steps: [],
Expand Down Expand Up @@ -85,6 +90,14 @@ async function runPipeline(job) {
job.status = 'running';
e('status', { status: 'running' });

// Opt-in pipeline brain: crawl -> Claude features -> per-feature recording ->
// Claude VO script + Suno prompt -> ElevenLabs -> motion-graphics assembly.
// Default-off so the live scroll-tour pipeline is unchanged until flipped.
if (process.env.MKDEMO_PIPELINE_BRAIN === '1') {
await runBrainPipeline(job, e); // emits its own 'done'
return;
}

await runScriptStage(job, e);
await runAssetsStage(job, e);
await runRenderStage(job, e);
Expand Down
123 changes: 123 additions & 0 deletions docs/PIPELINE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# makedemo pipeline brain (Claude feature-detection + media pipeline)

This document describes the `src/pipeline/` module: a qaaas-style flow that turns
a URL into a finished, scored-for-vibes demo video — animated motion graphics,
recorded feature clips, user-uploaded B-roll, a Claude-written voiceover, and a
ducked surreal/metal background track from a suno.com song.

It is designed as a **drop-in for the web upload UI** (built in parallel on the
`feat/web-app` branch): it speaks the same `job` / `emit(type, data)` contract.

## Why this exists

The original makedemo planned interactions on a **single page** with OpenAI and
narrated each click. This brain borrows the qaaas QA flow instead:

> crawl the whole site → let Claude pick the real features → act on each feature
> → let Claude write the script → synthesize → assemble.

…and extends it for marketing video: real screen-recorded clips, uploaded
clips, motion graphics, one cohesive voiceover, and music.

## Pipeline stages

| Stage | Module | What it does |
|---|---|---|
| 1. discover | `crawl.js` + `feature-detect.js` | Same-origin BFS crawl (optionally logged in), then Claude picks the demo-worthy features (name, pitch, start URL, concrete steps). Structured output via zod. Heuristic fallback with no key. |
| 2. record | `feature-recorder.js` | Playwright `recordVideo` per feature, performing the feature's steps, → one `.webm` clip each. |
| 3. script | `script-writer.js` | Claude writes ONE cohesive timeline (intro → features/clips → outro) with on-screen titles, captions, spoken narration, **and a ready-to-paste suno.com surreal/metal music prompt**. |
| 4. voiceover | `voiceover.js` | ElevenLabs TTS per segment (one consistent voice), silent beds for B-roll. |
| 5. assemble | `timeline.js` + `graphics.js` + `music.js` + `assembly.js` | Build a timeline from voiceover durations; render animated cards + lower-thirds; concat segment videos; build the continuous voiceover; **duck the uploaded song under it**; mux → `demo.mp4`. |

Orchestrated by `index.js#runPipeline(job, emit)`.

## The job + event contract (drop-in for the web shell)

```js
// job (superset of the existing web job)
{
id, url,
credentials: { user, password } | null,
maxFeatures: number, // default 5
voice: string | null, // ElevenLabs voice id
clips: string[], // paths to user-uploaded video clips
song: string | null, // path to an uploaded suno.com song clip
}

// emit(type, data) — same vocabulary as web/lib/jobs.js
// 'stage' { stage, status, step?, total? } stage ∈ discover|record|script|voiceover|assemble
// 'log' { level, msg }
// 'script' { features? , title?, segments?, sunoPrompt? }
// 'video' { video }
// 'done' { video, features, sunoPrompt }
// 'error' { message }
```

The web shell can swap its `web/lib/pipeline.js` import for:

```js
import { runPipeline, OUTPUT_ROOT } from '../../src/pipeline/index.js';
// in runPipeline(job) inside jobs.js:
await runPipeline(job, (type, data) => emit(job, type, data));
```

Outputs land in `output/<jobId>/`: `demo.mp4`, `transcript.txt`, `suno-prompt.txt`,
per-feature `feature-NN.webm`, per-segment voiceover, and a `work/` scratch dir.

## Music: the suno.com flow

1. The script writer emits a **surreal/metal Suno prompt** (saved to
`suno-prompt.txt` and emitted on the `script` event) — the user pastes it into
suno.com and downloads a clip.
2. The uploaded clip is passed as `job.song`. `music.js` loops it to cover the
whole video and **sidechain-compresses it against the voiceover** so the bed
ducks under narration and swells in the gaps.
3. No song → voiceover-only audio (still a complete video).

## Motion graphics

`graphics.js` has two backends:

- **ffmpeg (default):** animated `mandelbrot` background (surreal, zero-asset),
fading kinetic title/subtitle, and slide-in lower-thirds burned onto clips.
- **Remotion (opt-in, `MKDEMO_REMOTION=1`):** full React motion graphics via
`@remotion/renderer` + a `graphics/` composition. Falls back to ffmpeg if the
optional deps/project aren't present.

## Smart path vs fallback

Every external dependency degrades gracefully, matching makedemo's existing
style:

| Missing | Behavior |
|---|---|
| `ANTHROPIC_API_KEY` | Heuristic feature list + deterministic script (no Claude). |
| `ELEVENLABS_API_KEY` | Silent timed voiceover (timing preserved). |
| `job.song` | Voiceover-only audio. |
| `@remotion/*` | ffmpeg motion-graphics backend. |

## Running locally

```bash
pnpm install
npx playwright install chromium # browsers for crawl + recording
node scripts/run-pipeline.js --url https://example.com \
--clips ./a.mp4,./b.mp4 --song ./suno.mp3 --max-features 5
```

Unit tests for the pure pieces (timeline math, music filter, URL globs):

```bash
pnpm run test:pipeline
```

## Status / what's wired vs. pending

- **Wired & unit-tested:** timeline math, ducking-filter construction, crawl
URL globbing.
- **Wired (needs keys/browsers/ffmpeg to run end-to-end):** crawl, Claude
feature detection + script, Playwright recording, ElevenLabs voiceover,
ffmpeg assembly with ducked music.
- **Scaffolded:** Remotion backend (ffmpeg backend is the default and fully
functional); a Claude action-loop for richer in-clip interactions (current
recorder uses a light step interpreter) is the next upgrade.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"cli": "node src/index.js",
"setup": "node scripts/setup.js",
"test": "NODE_OPTIONS='--loader=./node_modules/mocha/lib/nodejs/esm-utils.js' mocha test/**/*.test.js --recursive",
"test:pipeline": "NODE_OPTIONS='--loader=./node_modules/mocha/lib/nodejs/esm-utils.js' mocha test/pipeline/**/*.test.js --recursive",
"test:watch": "NODE_OPTIONS='--loader=./node_modules/mocha/lib/nodejs/esm-utils.js' mocha test/**/*.test.js --recursive --watch",
"test:coverage": "NODE_OPTIONS='--loader=./node_modules/mocha/lib/nodejs/esm-utils.js' c8 mocha test/**/*.test.js --recursive",
"lint": "eslint src/ test/",
Expand Down
3 changes: 3 additions & 0 deletions packages/core/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@ export {
VOICES,
} from './src/audio/generator.js';
export { createDemo } from './src/index.js';
// Pipeline brain: crawl -> Claude feature detection -> per-feature recording ->
// Claude VO script + Suno prompt -> ElevenLabs -> motion-graphics assembly.
export { runPipeline, OUTPUT_ROOT as PIPELINE_OUTPUT_ROOT } from './src/pipeline/index.js';
11 changes: 9 additions & 2 deletions packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"main": "index.js",
"exports": {
".": "./index.js",
"./pipeline": "./src/pipeline/index.js",
"./browser": "./src/browser/manager.js",
"./ai": "./src/ai/decision-maker.js",
"./audio": "./src/audio/generator.js",
Expand All @@ -16,14 +17,20 @@
"node": ">=20.0.0"
},
"dependencies": {
"@anthropic-ai/sdk": "^0.69.0",
"@anthropic-ai/sdk": "^0.106.0",
"dotenv": "^16.4.1",
"elevenlabs": "^0.8.2",
"fluent-ffmpeg": "^2.1.2",
"fs-extra": "^11.2.0",
"node-fetch": "^3.3.2",
"openai": "^4.28.4",
"playwright": "^1.61.0",
"puppeteer": "^21.11.0",
"winston": "^3.11.0"
"winston": "^3.11.0",
"zod": "^4.0.0"
},
"optionalDependencies": {
"@remotion/bundler": "^4.0.0",
"@remotion/renderer": "^4.0.0"
}
}
146 changes: 146 additions & 0 deletions packages/core/src/pipeline/assembly.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import path from 'node:path';
import fs from 'node:fs/promises';
import { runFfmpeg } from './ffmpeg.js';
import { renderCard, overlayLowerThird } from './graphics.js';
import { mixMusicUnderVoice, validateSong } from './music.js';

const W = 1920;
const H = 1080;
const FPS = 30;
const PAD_COLOR = '0x0d0c0f';

/**
* Assemble the final MP4 from a built timeline.
*
* Video and audio are built as two independent tracks, then muxed:
* - VIDEO: each segment -> a normalized, silent 1080p/30fps clip (animated
* card for intro/outro; recorded feature webm or uploaded clip otherwise,
* with an animated lower-third burned on). Concatenated in order.
* - AUDIO: each segment's voiceover, padded to the segment's exact duration,
* concatenated into one continuous voice track, then (if a suno.com song
* was uploaded) ducked under the looped music bed.
*
* @param {object} opts
* @param {Array} opts.timeline built timeline (see timeline.js)
* @param {Array<{path,duration}>} opts.audios per-segment voiceover (aligned)
* @param {Array<{path}>} opts.featureClips recorded feature clips (mp4/webm on disk)
* @param {string[]} opts.uploadedClips user-uploaded clip paths
* @param {string|null} opts.song uploaded suno.com song path
* @param {string} opts.workDir
* @param {string} opts.outPath
* @param {(m:string)=>void} [opts.log]
*/
export async function assembleVideo(opts) {
const { timeline, audios, featureClips, uploadedClips, song, workDir, outPath, log } = opts;
await fs.mkdir(workDir, { recursive: true });

// 1. Build one normalized, silent video clip per segment.
const segVideos = [];
for (const seg of timeline) {
const out = path.join(workDir, `seg-${String(seg.index).padStart(2, '0')}.mp4`);
log?.(`Rendering segment ${seg.index + 1}/${timeline.length} (${seg.kind})`);

if (seg.kind === 'intro' || seg.kind === 'outro') {
await renderCard({ title: seg.title, subtitle: seg.caption, duration: seg.duration, outPath: out });
} else {
const src = sourceForSegment(seg, featureClips, uploadedClips);
if (!src) {
// Missing source — render a titled card so the timeline stays intact.
await renderCard({ title: seg.title || '', subtitle: seg.caption || '', duration: seg.duration, outPath: out });
} else {
const normalized = path.join(workDir, `norm-${String(seg.index).padStart(2, '0')}.mp4`);
await normalizeClip(src, seg.duration, normalized);
await overlayLowerThird(normalized, { title: seg.title, caption: seg.caption, outPath: out });
}
}
segVideos.push(out);
}

// 2. Concat the segment videos (identical params -> stream copy).
const videoTrack = path.join(workDir, 'video.mp4');
await concatCopy(segVideos, path.join(workDir, 'video-list.txt'), videoTrack);

// 3. Build the continuous voiceover track (pad each segment to its duration).
const paddedVo = [];
for (const seg of timeline) {
const vo = audios[seg.index];
const out = path.join(workDir, `voa-${String(seg.index).padStart(2, '0')}.m4a`);
await padAudioToDuration(vo.path, seg.duration, out);
paddedVo.push(out);
}
const voiceTrack = path.join(workDir, 'voice.m4a');
await concatAudioFilter(paddedVo, voiceTrack);

// 4. Mix in the ducked music bed if a song was uploaded.
let audioTrack = voiceTrack;
if (song && (await validateSong(song))) {
log?.('Ducking suno.com song under the voiceover');
audioTrack = path.join(workDir, 'final-audio.m4a');
await mixMusicUnderVoice(voiceTrack, song, audioTrack);
} else if (song) {
log?.('Uploaded song unreadable — continuing with voiceover only');
}

// 5. Mux video + audio.
await runFfmpeg([
'-i', videoTrack,
'-i', audioTrack,
'-map', '0:v:0', '-map', '1:a:0',
'-c:v', 'copy', '-c:a', 'aac', '-b:a', '192k',
'-shortest', '-movflags', '+faststart',
outPath,
]);
log?.(`Final video written: ${outPath}`);
return outPath;
}

function sourceForSegment(seg, featureClips, uploadedClips) {
if (seg.kind === 'feature' && seg.featureIndex != null) return featureClips[seg.featureIndex]?.path;
if (seg.kind === 'clip' && seg.clipIndex != null) return uploadedClips[seg.clipIndex];
return null;
}

// Scale + letterbox a source to 1080p/30fps, looping short clips and trimming to
// an exact duration. Output is silent so audio can be assembled independently.
async function normalizeClip(src, duration, outPath) {
const vf = `scale=${W}:${H}:force_original_aspect_ratio=decrease,pad=${W}:${H}:(ow-iw)/2:(oh-ih)/2:color=${PAD_COLOR},fps=${FPS},format=yuv420p`;
await runFfmpeg([
'-stream_loop', '-1', '-i', src,
'-t', String(duration),
'-vf', vf,
'-an',
'-c:v', 'libx264', '-preset', 'veryfast', '-pix_fmt', 'yuv420p',
outPath,
]);
}

async function concatCopy(files, listFile, outPath) {
await fs.writeFile(listFile, files.map((f) => `file '${f}'`).join('\n'), 'utf8');
await runFfmpeg(['-f', 'concat', '-safe', '0', '-i', listFile, '-c', 'copy', outPath]);
}

// Pad (or trim) an audio file to an exact duration; normalize to stereo/44.1k AAC.
async function padAudioToDuration(audioPath, duration, outPath) {
await runFfmpeg([
'-i', audioPath,
'-af', 'apad',
'-t', String(duration),
'-ar', '44100', '-ac', '2',
'-c:a', 'aac', '-b:a', '192k',
outPath,
]);
}

// Concatenate audio segments with the concat filter (robust across containers).
async function concatAudioFilter(files, outPath) {
const inputs = files.flatMap((f) => ['-i', f]);
const filter = files.map((_, i) => `[${i}:a]`).join('') + `concat=n=${files.length}:v=0:a=1[a]`;
await runFfmpeg([
...inputs,
'-filter_complex', filter,
'-map', '[a]',
'-ar', '44100', '-ac', '2',
'-c:a', 'aac', '-b:a', '192k',
outPath,
]);
}
Loading