Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions generated/benchmarks/INCREMENTAL-BENCHMARKS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ Import resolution: native batch vs JS fallback throughput.
|---------|--------|------:|-----------:|------:|-------:|------------------:|-------------:|
| 3.10.0 | native | 745 | 2.0s ↓34% | 15ms ↑50% | 66ms ↑22% | 5ms ↓18% | 7ms ↓38% |
| 3.10.0 | wasm | 745 | 8.4s ↓40% | 13ms ↓90% | 51ms ↓18% | 5ms ↓18% | 7ms ↓38% |
| 3.9.6 | native | 744 | 3.0s | 10ms | 54ms | 7ms ~ | 11ms ↑7% |
| 3.9.6 | wasm | 744 | 14.0s | 131ms | 62ms | 7ms ~ | 11ms ↑7% |
| 3.9.6 | native | 744 | 3.0s ↑39% | 10ms ↑11% | 54ms ↓87% | 7ms ~ | 11ms ↑7% |
| 3.9.6 | wasm | 744 | 14.0s ↑86% | 131ms ↑589% | 62ms ~ | 7ms ~ | 11ms ↑7% |
| 3.9.4 | native | 668 | 2.1s ↓4% | 9ms ~ | 406ms ↑7% | 6ms ↑7% | 9ms ↓15% |
| 3.9.4 | wasm | 668 | 7.6s ~ | 19ms ↑6% | 61ms ↓90% | 6ms ↑7% | 9ms ↓15% |
| 3.9.3 | native | 667 | 2.2s ↓76% | 9ms ↑13% | 380ms ↓32% | 6ms ↓5% | 11ms ↑6% |
Expand All @@ -22,10 +22,10 @@ Import resolution: native batch vs JS fallback throughput.
| 3.9.0 | wasm | 567 | 6.8s ↓3% | 12ms ↓20% | 541ms ↓10% | 5ms ↓18% | 11ms ~ |
| 3.8.1 | native | 565 | 6.6s ↑468% | 8ms ↑14% | 41ms ↑24% | 6ms ↑51% | 11ms ↓14% |
| 3.8.1 | wasm | 565 | 7.0s ↑493% | 15ms ↑88% | 603ms ↑1727% | 6ms ↑51% | 11ms ↓14% |
| 3.8.0 | native | 564 | 1.2s | 7ms | 33ms | 4ms ↑2% | 12ms ↓19% |
| 3.8.0 | native | 564 | 1.2s ↓50% | 7ms ↓46% | 33ms ↓90% | 4ms ↑2% | 12ms ↓19% |
| 3.8.0 | wasm | 564 | 1.2s ↓82% | 8ms ↓58% | 33ms ↓94% | 4ms ↑2% | 12ms ↓19% |
| 3.7.0 | wasm | 532 | 6.4s ↑5% | 19ms ↑46% | 558ms ↑2% | 4ms ↑3% | 15ms ↑31% |
| 3.6.0 | wasm | 514 | 6.1s | 13ms | 545ms | 4ms ↓3% | 12ms ↑9% |
| 3.6.0 | wasm | 514 | 6.1s ↑20% | 13ms ~ | 545ms ↑7% | 4ms ↓3% | 12ms ↑9% |
| 3.4.1 | native | 473 | 2.4s ↑3% | 13ms ↑8% | 331ms ↓26% | 4ms ~ | 13ms ↑8% |
| 3.4.1 | wasm | 473 | 5.1s ~ | 13ms ↑8% | 511ms ↓17% | 4ms ~ | 13ms ↑8% |
| 3.4.0 | native | 473 | 2.3s ↓4% | 12ms ↑9% | 448ms ↑29% | 4ms ↓58% | 12ms ↓54% |
Expand Down
105 changes: 67 additions & 38 deletions scripts/update-incremental-report.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ if (arg) {
const entry = JSON.parse(jsonText);

// ── Paths ────────────────────────────────────────────────────────────────
const reportPath = path.join(root, 'generated', 'benchmarks', 'INCREMENTAL-BENCHMARKS.md');
const reportPath =
process.env.CODEGRAPH_INCREMENTAL_REPORT_PATH ??
path.join(root, 'generated', 'benchmarks', 'INCREMENTAL-BENCHMARKS.md');

// ── Load existing history ────────────────────────────────────────────────
let history = [];
Expand All @@ -53,16 +55,24 @@ if (!isDev) {
}
history.unshift(entry);

function findPrevRelease(hist, fromIdx) {
// Walk back through release history to find the most recent non-null value
// for a specific metric. Lets trend annotations skip past releases whose
// workers crashed and stored null, instead of hiding regressions behind an
// empty cell.
function findPrevMetric(hist, fromIdx, getter) {
for (let i = fromIdx + 1; i < hist.length; i++) {
if (hist[i].version !== 'dev') return hist[i];
if (hist[i].version === 'dev') continue;
const v = getter(hist[i]);
if (v != null) return v;
}
return null;
}

// ── Helpers ──────────────────────────────────────────────────────────────
function trend(current, previous, lowerIsBetter = true) {
if (current == null || previous == null) return '';
// Guard against null/undefined and a zero baseline — dividing by zero would
// emit `↑Infinity%`. Matches the `previous === 0` guard in checkRegression.
if (current == null || previous == null || previous === 0) return '';
const pct = ((current - previous) / previous) * 100;
if (Math.abs(pct) < 2) return ' ~';
if (lowerIsBetter) {
Expand All @@ -76,19 +86,21 @@ function formatMs(ms) {
return `${Math.round(ms)}ms`;
}

function engineRow(h, prev, engineKey) {
function engineRow(hist, i, engineKey, prevResolveNative, prevResolveJs) {
const h = hist[i];
const e = h[engineKey];
const p = prev?.[engineKey] || null;
if (!e) return null;

const fullT = trend(e.fullBuildMs, p?.fullBuildMs);
const noopT = trend(e.noopRebuildMs, p?.noopRebuildMs);
const oneT = trend(e.oneFileRebuildMs, p?.oneFileRebuildMs);
const fullT = trend(e.fullBuildMs, findPrevMetric(hist, i, (r) => r[engineKey]?.fullBuildMs));
const noopT = trend(e.noopRebuildMs, findPrevMetric(hist, i, (r) => r[engineKey]?.noopRebuildMs));
const oneT = trend(
e.oneFileRebuildMs,
findPrevMetric(hist, i, (r) => r[engineKey]?.oneFileRebuildMs),
);

const r = h.resolve;
const pr = prev?.resolve || null;
const natT = r.nativeBatchMs != null ? trend(r.nativeBatchMs, pr?.nativeBatchMs) : '';
const jsT = trend(r.jsFallbackMs, pr?.jsFallbackMs);
const natT = r.nativeBatchMs != null ? trend(r.nativeBatchMs, prevResolveNative) : '';
const jsT = trend(r.jsFallbackMs, prevResolveJs);

const noopCell = e.noopRebuildMs != null ? `${formatMs(e.noopRebuildMs)}${noopT}` : 'n/a';
const oneFileCell = e.oneFileRebuildMs != null ? `${formatMs(e.oneFileRebuildMs)}${oneT}` : 'n/a';
Expand All @@ -115,11 +127,13 @@ md +=
'|---------|--------|------:|-----------:|------:|-------:|------------------:|-------------:|\n';

for (let i = 0; i < history.length; i++) {
const h = history[i];
const prev = findPrevRelease(history, i);

const nativeRow = engineRow(h, prev, 'native');
const wasmRow = engineRow(h, prev, 'wasm');
// Resolve metrics are release-level (not engine-specific), so pre-compute
// their fallback baselines once per release instead of having engineRow walk
// history twice (once per engine type).
const prevResolveNative = findPrevMetric(history, i, (x) => x.resolve?.nativeBatchMs);
const prevResolveJs = findPrevMetric(history, i, (x) => x.resolve?.jsFallbackMs);
const nativeRow = engineRow(history, i, 'native', prevResolveNative, prevResolveJs);
const wasmRow = engineRow(history, i, 'wasm', prevResolveNative, prevResolveJs);
if (nativeRow) md += nativeRow + '\n';
if (wasmRow) md += wasmRow + '\n';
}
Comment on lines 129 to 139
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 The natT/jsT resolve trends are computed inside engineRow, which is called once for each engine type per release. Since h.resolve is release-level (not engine-specific), findPrevMetric for nativeBatchMs and jsFallbackMs is invoked twice per release — returning identical results both times. This is harmless but wasteful for long history arrays. Consider hoisting the resolve trend calls to the call site, alongside the nativeRow/wasmRow pair, so they are computed once and passed in.

Suggested change
for (let i = 0; i < history.length; i++) {
const h = history[i];
const prev = findPrevRelease(history, i);
const nativeRow = engineRow(h, prev, 'native');
const wasmRow = engineRow(h, prev, 'wasm');
const nativeRow = engineRow(history, i, 'native');
const wasmRow = engineRow(history, i, 'wasm');
if (nativeRow) md += nativeRow + '\n';
if (wasmRow) md += wasmRow + '\n';
}
for (let i = 0; i < history.length; i++) {
// Resolve metrics are release-level; pre-compute their trends once per
// release so engineRow doesn't redundantly walk history twice.
const prevNative = findPrevMetric(history, i, (x) => x.resolve?.nativeBatchMs);
const prevJs = findPrevMetric(history, i, (x) => x.resolve?.jsFallbackMs);
const nativeRow = engineRow(history, i, 'native', prevNative, prevJs);
const wasmRow = engineRow(history, i, 'wasm', prevNative, prevJs);
if (nativeRow) md += nativeRow + '\n';
if (wasmRow) md += wasmRow + '\n';
}

Fix in Claude Code

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 628d37d — hoisted findPrevMetric for the release-level resolve metrics (nativeBatchMs, jsFallbackMs) to the call site and pass them into engineRow, so they are computed once per release instead of twice.

Expand Down Expand Up @@ -163,7 +177,6 @@ console.error(`Updated ${path.relative(root, reportPath)}`);

// ── Regression detection ─────────────────────────────────────────────────
const REGRESSION_THRESHOLD = 0.15; // 15% regression triggers a warning
const prev = findPrevRelease(history, 0);

function checkRegression(label, current, previous) {
if (previous == null || previous === 0) return;
Expand All @@ -178,26 +191,42 @@ function checkRegression(label, current, previous) {
}
}

if (prev) {
for (const engineKey of ['native', 'wasm']) {
const e = latest[engineKey];
const p = prev[engineKey];
if (!e || !p) continue;
const tag = `[${engineKey}]`;
checkRegression(`${tag} Full build`, e.fullBuildMs, p.fullBuildMs);
if (e.noopRebuildMs != null && p.noopRebuildMs != null) {
checkRegression(`${tag} No-op rebuild`, e.noopRebuildMs, p.noopRebuildMs);
}
if (e.oneFileRebuildMs != null && p.oneFileRebuildMs != null) {
checkRegression(`${tag} 1-file rebuild`, e.oneFileRebuildMs, p.oneFileRebuildMs);
}
for (const engineKey of ['native', 'wasm']) {
const e = latest[engineKey];
if (!e) continue;
const tag = `[${engineKey}]`;
checkRegression(
`${tag} Full build`,
e.fullBuildMs,
findPrevMetric(history, 0, (r) => r[engineKey]?.fullBuildMs),
);
if (e.noopRebuildMs != null) {
checkRegression(
`${tag} No-op rebuild`,
e.noopRebuildMs,
findPrevMetric(history, 0, (r) => r[engineKey]?.noopRebuildMs),
);
}
const re = latest.resolve;
const rp = prev.resolve;
if (re && rp) {
checkRegression(`[resolve] JS fallback`, re.jsFallbackMs, rp.jsFallbackMs);
if (re.nativeBatchMs != null && rp.nativeBatchMs != null) {
checkRegression(`[resolve] Native batch`, re.nativeBatchMs, rp.nativeBatchMs);
}
if (e.oneFileRebuildMs != null) {
checkRegression(
`${tag} 1-file rebuild`,
e.oneFileRebuildMs,
findPrevMetric(history, 0, (r) => r[engineKey]?.oneFileRebuildMs),
);
}
}
const re = latest.resolve;
if (re) {
checkRegression(
`[resolve] JS fallback`,
re.jsFallbackMs,
findPrevMetric(history, 0, (r) => r.resolve?.jsFallbackMs),
);
if (re.nativeBatchMs != null) {
checkRegression(
`[resolve] Native batch`,
re.nativeBatchMs,
findPrevMetric(history, 0, (r) => r.resolve?.nativeBatchMs),
);
}
}
143 changes: 143 additions & 0 deletions tests/unit/incremental-report-trend-fallback.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import { execFileSync } from 'node:child_process';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const repoRoot = path.resolve(__dirname, '..', '..');
const scriptPath = path.join(repoRoot, 'scripts', 'update-incremental-report.ts');

// --experimental-strip-types works in Node 22.6+ through current 24.x; the
// renamed --strip-types was added then removed again across 24.x minor
// versions, so prefer the experimental name for compatibility.
const stripFlag = '--experimental-strip-types';

let tmpDir: string;
let reportPath: string;
let entryPath: string;

beforeEach(() => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-trend-fallback-'));
reportPath = path.join(tmpDir, 'INCREMENTAL-BENCHMARKS.md');
entryPath = path.join(tmpDir, 'entry.json');
});

afterEach(() => {
if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true });
});

function runScript() {
execFileSync('node', [stripFlag, scriptPath, entryPath], {
env: { ...process.env, CODEGRAPH_INCREMENTAL_REPORT_PATH: reportPath },
stdio: 'pipe',
});
}

function row(content: string, version: string, engine: string): string {
const re = new RegExp(`^\\| ${version.replace(/\./g, '\\.')} \\| ${engine} \\|.*$`, 'm');
const m = content.match(re);
if (!m) throw new Error(`row not found for ${version}/${engine}`);
return m[0];
}

describe('update-incremental-report trend fallback', () => {
it('falls back to nearest non-null prior release when previous metrics are null', () => {
// Seed: 9.9.4 has full data. 9.9.5 is the null release (workers crashed).
const initial = `# Codegraph Incremental Build Benchmarks

<!-- INCREMENTAL_BENCHMARK_DATA
[
{
"version": "9.9.5",
"date": "2026-05-01",
"files": 100,
"wasm": null,
"native": null,
"resolve": { "imports": 100, "nativeBatchMs": 5, "jsFallbackMs": 10, "perImportNativeMs": 0, "perImportJsMs": 0 }
},
{
"version": "9.9.4",
"date": "2026-04-18",
"files": 100,
"wasm": { "fullBuildMs": 1000, "noopRebuildMs": 10, "oneFileRebuildMs": 50 },
"native": { "fullBuildMs": 500, "noopRebuildMs": 5, "oneFileRebuildMs": 25 },
"resolve": { "imports": 100, "nativeBatchMs": 5, "jsFallbackMs": 10, "perImportNativeMs": 0, "perImportJsMs": 0 }
}
]
-->
`;
fs.writeFileSync(reportPath, initial);
// New release 9.9.6 doubles wasm full-build and 13x's no-op rebuild — should
// compare against 9.9.4 (skipping null 9.9.5) and show large regressions.
fs.writeFileSync(
entryPath,
JSON.stringify({
version: '9.9.6',
date: '2026-04-30',
files: 100,
wasm: { fullBuildMs: 2000, noopRebuildMs: 130, oneFileRebuildMs: 60 },
native: { fullBuildMs: 600, noopRebuildMs: 8, oneFileRebuildMs: 30 },
resolve: {
imports: 100,
nativeBatchMs: 5,
jsFallbackMs: 10,
perImportNativeMs: 0,
perImportJsMs: 0,
},
}),
);

runScript();
const out = fs.readFileSync(reportPath, 'utf8');

const wasmRow = row(out, '9.9.6', 'wasm');
// Full build: 1000 → 2000 = +100%
expect(wasmRow).toContain('↑100%');
// No-op: 10 → 130 = +1200%
expect(wasmRow).toContain('↑1200%');
// 1-file: 50 → 60 = +20%
expect(wasmRow).toContain('↑20%');

// And the null 9.9.5 row still renders without a trend (no prior data
// for it to compare against — it itself has no metrics).
expect(out).not.toMatch(/^\| 9\.9\.5 \| wasm \|/m);
expect(out).not.toMatch(/^\| 9\.9\.5 \| native \|/m);
});

it('leaves trend empty when no prior release has the metric at all', () => {
const initial = `# Codegraph Incremental Build Benchmarks

<!-- INCREMENTAL_BENCHMARK_DATA
[]
-->
`;
fs.writeFileSync(reportPath, initial);
fs.writeFileSync(
entryPath,
JSON.stringify({
version: '1.0.0',
date: '2026-01-01',
files: 50,
wasm: { fullBuildMs: 1000, noopRebuildMs: 10, oneFileRebuildMs: 50 },
native: { fullBuildMs: 500, noopRebuildMs: 5, oneFileRebuildMs: 25 },
resolve: {
imports: 50,
nativeBatchMs: 1,
jsFallbackMs: 2,
perImportNativeMs: 0,
perImportJsMs: 0,
},
}),
);

runScript();
const out = fs.readFileSync(reportPath, 'utf8');
const wasmRow = row(out, '1.0.0', 'wasm');
// No prior history => no arrow annotations on any cell
expect(wasmRow).not.toContain('↑');
expect(wasmRow).not.toContain('↓');
expect(wasmRow).not.toContain('~');
});
});
Loading