Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion packages/cli/src/capture/assetCataloger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ export interface CatalogedAsset {
sectionClasses?: string;
/** Whether the image is above the fold (visible without scrolling) */
aboveFold?: boolean;
/** Element sits inside <header>, <nav>, or [role="banner"] — logo signal */
inBanner?: boolean;
/** Element sits inside <a> with site-root href ("/", "#", origin-only) — brand-home link */
inHomeLink?: boolean;
/** alt/aria-label/title contains the brand segment of document.title */
matchesTitleBrand?: boolean;
}

/**
Expand Down Expand Up @@ -62,6 +68,26 @@ export async function catalogAssets(page: Page): Promise<CatalogedAsset[]> {
var rect = el.getBoundingClientRect();
ctx.aboveFold = rect.top < window.innerHeight;
} catch(e) {}
// Structural logo-candidate signals: class-substring alone caught 0/32 SVGs on heygen.com.
ctx.inBanner = el.closest('header, nav, [role="banner"]') !== null;
var homeAnchor = el.closest('a[href]');
if (homeAnchor) {
var aHref = homeAnchor.getAttribute('href') || '';
ctx.inHomeLink = aHref === '/' || aHref === '#' || aHref === './' ||
/^https?:\\/\\/[^/]+\\/?$/.test(aHref);
}
// Brand can be first ("HeyGen - Ideas"), last ("Ideas - HeyGen"), or colon-separated ("Vercel: Build").
var titleParts = (document.title || '').split(/[-|—:]/);
if (desc) {
for (var ti = 0; ti < titleParts.length; ti++) {
var part = titleParts[ti].trim();
if (part.length > 1 && part.length < 30 &&
desc.toLowerCase().indexOf(part.toLowerCase()) !== -1) {
ctx.matchesTitleBrand = true;
break;
}
}
}
return ctx;
}

Expand Down Expand Up @@ -92,12 +118,15 @@ export async function catalogAssets(page: Page): Promise<CatalogedAsset[]> {
if (notes && !entry.notes) {
entry.notes = notes;
}
// Merge rich context (first one wins)
// Text fields: first-occurrence wins. Boolean signals: any positive sample wins.
if (richCtx) {
if (richCtx.description && !entry.description) entry.description = richCtx.description;
if (richCtx.nearestHeading && !entry.nearestHeading) entry.nearestHeading = richCtx.nearestHeading;
if (richCtx.sectionClasses && !entry.sectionClasses) entry.sectionClasses = richCtx.sectionClasses;
if (richCtx.aboveFold !== undefined && entry.aboveFold === undefined) entry.aboveFold = richCtx.aboveFold;
if (richCtx.inBanner) entry.inBanner = true;
if (richCtx.inHomeLink) entry.inHomeLink = true;
if (richCtx.matchesTitleBrand) entry.matchesTitleBrand = true;
}
}

Expand Down Expand Up @@ -324,6 +353,9 @@ function deduplicateSrcsetVariants(assets: CatalogedAsset[]): CatalogedAsset[] {
if (a.notes && !existing.notes) {
existing.notes = a.notes;
}
if (a.inBanner) existing.inBanner = true;
if (a.inHomeLink) existing.inHomeLink = true;
if (a.matchesTitleBrand) existing.matchesTitleBrand = true;
// Keep the URL with highest w= value (largest image)
const existingW = getWidthParam(existing.url);
const newW = getWidthParam(a.url);
Expand Down
31 changes: 25 additions & 6 deletions packages/cli/src/capture/assetDownloader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,16 @@

import { writeFileSync, mkdirSync } from "node:fs";
import { join, extname } from "node:path";
import { createHash } from "node:crypto";
import type { DesignTokens, DownloadedAsset } from "./types.js";
import type { CatalogedAsset } from "./assetCataloger.js";

// SVGs: hash-of-bytes filename so it can't drift from content; label-derived names mis-assigned brands.
function svgContentHashSlug(svgSource: string | Buffer, isLogo: boolean): string {
const hash = createHash("sha1").update(svgSource).digest("hex").slice(0, 8);
return isLogo ? `logo-${hash}` : `svg-${hash}`;
}

export async function downloadAssets(
tokens: DesignTokens,
outputDir: string,
Expand All @@ -22,15 +29,12 @@ export async function downloadAssets(
const assets: DownloadedAsset[] = [];
const downloadedUrls = new Set<string>();

// 1. ALL inline SVGs — save as files (logos get priority naming)
mkdirSync(join(outputDir, "assets", "svgs"), { recursive: true });
const usedSvgNames = new Set<string>();
for (let i = 0; i < tokens.svgs.length && i < 30; i++) {
const svg = tokens.svgs[i]!;
if (!svg.outerHTML || svg.outerHTML.length < 50) continue;
const label = svg.label?.replace(/[^a-zA-Z0-9-_ ]/g, "").trim();
let slug = label ? slugify(label) : svg.isLogo ? `logo-${i}` : `icon-${i}`;
// Deduplicate — two SVGs with same aria-label get suffixed
const slug = svgContentHashSlug(svg.outerHTML, !!svg.isLogo);
let finalSlug = slug;
let suffix = 2;
while (usedSvgNames.has(finalSlug)) {
Expand Down Expand Up @@ -135,8 +139,23 @@ export async function downloadAssets(
if (result.status !== "fulfilled" || !result.value) continue;
const { url, isPoster, parsedUrl, ext, buffer, catalog } = result.value;
try {
// Generate human-readable name from catalog context
const slug = deriveAssetName(parsedUrl, catalog, isPoster, imgIdx, usedNames);
let slug: string;
if (ext === ".svg") {
const c = catalog;
const brandRe = /logo|brand|wordmark/i;
const isLogo = !!(
c?.inBanner ||
c?.inHomeLink ||
c?.matchesTitleBrand ||
c?.contexts?.some((s) => brandRe.test(s)) ||
(c?.description && brandRe.test(c.description)) ||
(c?.nearestHeading && brandRe.test(c.nearestHeading)) ||
(c?.sectionClasses && brandRe.test(c.sectionClasses))
);
slug = svgContentHashSlug(buffer, isLogo);
} else {
slug = deriveAssetName(parsedUrl, catalog, isPoster, imgIdx, usedNames);
}
const name = `${slug}${ext}`;
usedNames.add(slug);
const localPath = `assets/${name}`;
Expand Down
69 changes: 52 additions & 17 deletions packages/cli/src/capture/contentExtractor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import type { Page } from "puppeteer-core";
import { existsSync, readdirSync, statSync, readFileSync } from "node:fs";
import { join } from "node:path";
import sharp from "sharp";
import type { CatalogedAsset } from "./assetCataloger.js";
import type { DesignTokens } from "./types.js";

Expand Down Expand Up @@ -232,7 +233,7 @@ export async function captionImagesWithGemini(
}
progress("design", `${Object.keys(geminiCaptions).length} images captioned with Gemini`);

// Caption SVGs by sending source code as text (vision API rejects image/svg+xml).
// Rasterize SVGs to PNG before captioning — Vision hallucinates wordmarks when reading SVG path text.
const svgFiles: Array<{ file: string; relPath: string }> = [];
const assetsDir = join(outputDir, "assets");
for (const f of readdirSync(assetsDir)) {
Expand All @@ -246,30 +247,59 @@ export async function captionImagesWithGemini(
}

if (svgFiles.length > 0) {
progress("design", `Captioning ${svgFiles.length} SVGs via code analysis...`);
progress("design", `Rasterizing + captioning ${svgFiles.length} SVGs via vision API...`);
const SVG_BATCH = 20;
const MAX_SVG_CHARS = 10_000;
const SVG_RENDER_SIZE = 256; // px — enough resolution for Gemini to read wordmarks, small enough to keep payload sub-MB
let svgsSkipped = 0;
for (let i = 0; i < svgFiles.length; i += SVG_BATCH) {
const batch = svgFiles.slice(i, i + SVG_BATCH);
const results = await Promise.allSettled(
batch.map(async ({ relPath }) => {
const filePath = join(assetsDir, relPath);
let svgText = readFileSync(filePath, "utf-8");
if (svgText.length > MAX_SVG_CHARS) {
svgText = svgText.slice(0, MAX_SVG_CHARS) + "\n<!-- truncated -->";
let pngBase64: string;
try {
// Flatten against a contrasting background — white-on-white SVGs render invisible to Vision.
const svgSource = readFileSync(filePath, "utf-8");
const lightFillHits = (
svgSource.match(/fill\s*=\s*["'](#fff(fff)?|white|#[ef][ef][ef]|#[ef]{6})["']/gi) ||
[]
).length;
const darkFillHits = (
svgSource.match(/fill\s*=\s*["'](#000(000)?|black|#[0-3]{6}|#[0-3]{3})["']/gi) || []
).length;
const bg =
lightFillHits > darkFillHits
? { r: 32, g: 32, b: 32 } // dark slate behind light glyphs
: { r: 255, g: 255, b: 255 }; // white behind dark glyphs (default)
const pngBuffer = await sharp(filePath)
.resize({
width: SVG_RENDER_SIZE,
height: SVG_RENDER_SIZE,
fit: "inside",
withoutEnlargement: false,
})
.flatten({ background: bg })
.png()
.toBuffer();
pngBase64 = pngBuffer.toString("base64");
} catch {
// exotic SVG features may break sharp; skip caption rather than block
svgsSkipped++;
return { file: relPath, caption: "" };
}
const response = await ai.models.generateContent({
model,
contents: [
{
role: "user",
parts: [
{ inlineData: { mimeType: "image/png", data: pngBase64 } },
{
text:
"This SVG code is from a website. Describe what it renders in ONE short sentence " +
"for a video storyboard. Focus on: what shape/icon/illustration it is, its colors. " +
"Be factual.\n\n" +
svgText,
"Describe this SVG asset rendered from a website in ONE short sentence for a video storyboard. " +
"Focus on: what shape/icon/illustration/wordmark it is, its colors, any text it contains. " +
"If you see a wordmark, READ THE LETTERS LITERALLY — do not guess a brand from context. " +
"Be factual.",
},
],
},
Expand All @@ -293,6 +323,12 @@ export async function captionImagesWithGemini(
);
}
progress("design", `${Object.keys(geminiCaptions).length} total assets captioned`);
if (svgsSkipped > 0) {
progress(
"design",
`skipped rasterizing ${svgsSkipped} SVG(s) — fell back to label-derived`,
);
}
}
} catch (err) {
warnings.push(`Gemini captioning failed: ${err}`);
Expand Down Expand Up @@ -358,11 +394,6 @@ export function generateAssetDescriptions(
const svgsPath = join(assetsPath, "svgs");
for (const file of readdirSync(svgsPath)) {
if (!file.endsWith(".svg")) continue;
const geminiCaption = geminiCaptions[`svgs/${file}`];
if (geminiCaption) {
svgLines.push(`svgs/${file} — ${geminiCaption}`);
continue;
}
const svgMatch = tokens.svgs.find(
(s) =>
s.label &&
Expand All @@ -373,9 +404,13 @@ export function generateAssetDescriptions(
.slice(0, 15),
),
);
const geminiCaption = geminiCaptions[`svgs/${file}`];
if (geminiCaption) {
svgLines.push(`svgs/${file} — ${geminiCaption}`);
continue;
}
const label = svgMatch?.label || file.replace(".svg", "").replace(/-/g, " ");
const isLogo = svgMatch?.isLogo || file.includes("logo");
svgLines.push(`svgs/${file} — ${isLogo ? "logo: " : "icon: "}${label}`);
svgLines.push(`svgs/${file} — ${label}`);
}
} catch {
/* no svgs dir */
Expand Down
13 changes: 9 additions & 4 deletions packages/cli/src/capture/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -579,14 +579,19 @@ export async function captureWebsite(
const lines = generateAssetDescriptions(outputDir, tokens, catalogedAssets, geminiCaptions);

if (lines.length > 0) {
const hasGeminiKey = !!(process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY);
const header = hasGeminiKey
? "# Asset Descriptions\n\nOne line per file. Read this instead of opening every image individually.\n\nTo find a specific brand or icon, **grep this file for the brand name in the description text** (e.g. `grep -i 'autodesk' asset-descriptions.md`). The Gemini Vision captions identify what's actually in each file — that's the agent's selector.\n\nThe `logo-<hash>.svg` filename prefix is a cheap structural hint (DOM said this SVG was inside a `<header>`, home-link `<a>`, or had an aria-label matching the page brand). It is NOT a content claim — many `logo-*` files are nav icons or decorative shapes. Trust the captions, not the filename prefix.\n\n"
: "# Asset Descriptions\n\n⚠️ GEMINI_API_KEY not set — descriptions below are catalog-derived (alt text, headings, section context, filename) instead of Vision-generated. To get richer Vision descriptions on the next capture, set GEMINI_API_KEY (or GOOGLE_API_KEY) and re-run.\n\nThe `logo-<hash>.svg` filename prefix is a structural hint (DOM said this SVG was inside a `<header>`, home-link `<a>`, or had an aria-label matching the page brand). To pick the actual brand logo without Vision, open the `logo-*` candidates in a previewer or rasterize them with `sharp` before referencing — composing a fake logo ships off-brand in the final video.\n\n";
writeFileSync(
join(outputDir, "extracted", "asset-descriptions.md"),
"# Asset Descriptions\n\nOne line per file. Read this instead of opening every image individually.\n\n" +
lines.map((l) => "- " + l).join("\n") +
"\n",
header + lines.map((l) => "- " + l).join("\n") + "\n",
"utf-8",
);
progress("design", `${lines.length} asset descriptions written`);
progress(
"design",
`${lines.length} asset descriptions written${hasGeminiKey ? "" : " (no Gemini key — catalog-fallback mode)"}`,
);
}
} catch {
/* non-critical */
Expand Down
29 changes: 28 additions & 1 deletion packages/cli/src/capture/tokenExtractor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -353,14 +353,41 @@ const EXTRACT_SCRIPT = `(() => {
// Keep SVGs that have a label OR are at least 16px wide OR are inside a logo/brand context
var inLogoContext = svg.closest('[class*="logo"], [class*="brand"], [class*="partner"], [class*="customer"], [class*="marquee"]') !== null;
if (!label && !inLogoContext && (!w || parseInt(w) < 16)) return null;
var isLogo = (label && label.toLowerCase().indexOf("logo") !== -1) ||
svg.closest('[class*="logo"], [class*="brand"], [class*="home"], [class*="marquee"], [class*="partner"], [class*="customer"]') !== null;
if (!isLogo) {
var bannerEl = svg.closest('header, nav, [role="banner"]');
if (bannerEl) {
var firstSvg = bannerEl.querySelector('svg');
if (firstSvg === svg) isLogo = true;
}
}
if (!isLogo) {
var anchor = svg.closest('a[href]');
if (anchor) {
var href = anchor.getAttribute('href') || '';
if (href === '/' || href === '#' || href === './' ||
/^https?:\\/\\/[^/]+\\/?$/.test(href)) {
isLogo = true;
}
}
}
if (!isLogo) {
var ariaLabel = svg.getAttribute('aria-label') || svg.getAttribute('title') || '';
var titleBrand = (document.title || '').split(/[-|—]/)[0].trim();
if (titleBrand.length > 1 && titleBrand.length < 30 &&
ariaLabel.toLowerCase().indexOf(titleBrand.toLowerCase()) !== -1) {
isLogo = true;
}
}
var rect = svg.getBoundingClientRect();
return {
label: label || undefined,
viewBox: svg.getAttribute("viewBox") || undefined,
width: Math.round(rect.width),
height: Math.round(rect.height),
outerHTML: svg.outerHTML.slice(0, 10000),
isLogo: (label && label.toLowerCase().indexOf("logo") !== -1) || svg.closest('[class*="logo"], [class*="brand"], [class*="home"], [class*="marquee"], [class*="partner"], [class*="customer"]') !== null
isLogo: isLogo
};
}).filter(Boolean).slice(0, 50);

Expand Down
11 changes: 10 additions & 1 deletion packages/cli/src/commands/capture.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,20 @@ export const examples: Example[] = [
["Capture a website", "hyperframes capture https://stripe.com"],
["Capture to a specific directory", "hyperframes capture https://linear.app -o linear-video"],
["JSON output for AI agents", "hyperframes capture https://example.com --json"],
[
"Pull a video from the captured manifest by index",
"hyperframes capture video ./linear-video --index 0",
],
];

export default defineCommand({
meta: {
name: "capture",
description: "Capture a website as editable HyperFrames components",
},
subCommands: {
video: () => import("./capture/video.js").then((m) => m.default),
},
args: {
url: {
type: "positional",
Expand Down Expand Up @@ -46,7 +53,9 @@ export default defineCommand({
async run({ args }) {
const url = args.url as string;

// Validate URL
// citty fires parent's run AFTER routing to a subcommand; skip when args.url is a subcommand name.
if (url === "video") return;

try {
new URL(url);
} catch {
Expand Down
Loading
Loading