From 3f948f07f726b1b217e06e0aef2e5d44aff466c2 Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Fri, 3 Jul 2026 10:52:22 +0800
Subject: [PATCH 1/2] feat: ingest and display kv_transfer_lib on benchmark
results
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The benchmarking repo now derives the KV-cache transfer library
(Mooncake, NIXL, MoRI-IO, UCX) at result-processing time and emits an
optional `kv_transfer_lib` string on disaggregated result rows. Until
now that detail was invisible downstream — e.g. dynamo-sglang covers
both Mooncake (DSV4) and NIXL (GLM-5 STP) recipes under one framework
key.
Forward-only ingestion:
- Migration 008: nullable `kv_transfer_lib` column on benchmark_results
(result-level metadata, not part of the config natural key, so config
identity and trend-line continuity are unchanged), and recreates the
latest_benchmarks materialized view to expose it.
- ETL: benchmark-mapper surfaces the field as a metrics sibling
(lowercased, defensively narrowed); benchmark-ingest writes it.
- Queries/API: all benchmark SELECTs and the json-provider return it.
- UI: tooltips show a "KV Transfer" line in the official, overlay
(unofficial run), and GPU-comparison views. NULL/absent renders
nothing — unknown is never guessed.
Historical rows stay NULL (the runner didn't emit the field); the
tooltip simply omits the line for them. AMD history is derivable from
the framework column if a backfill is ever wanted.
---
.../src/app/api/unofficial-run/route.test.ts | 10 +++
.../app/src/app/api/unofficial-run/route.ts | 1 +
.../app/src/components/inference/types.ts | 4 ++
.../inference/utils/tooltip-utils.test.ts | 70 +++++++++++++++++++
.../inference/utils/tooltipUtils.ts | 24 +++++++
packages/app/src/lib/api.ts | 7 ++
.../app/src/lib/benchmark-transform.test.ts | 20 ++++++
packages/app/src/lib/benchmark-transform.ts | 1 +
.../008_benchmark_results_kv_transfer_lib.sql | 52 ++++++++++++++
packages/db/src/etl/benchmark-ingest.ts | 10 ++-
packages/db/src/etl/benchmark-mapper.test.ts | 37 ++++++++++
packages/db/src/etl/benchmark-mapper.ts | 18 +++++
packages/db/src/json-provider.ts | 5 ++
packages/db/src/queries/benchmarks.ts | 11 +++
14 files changed, 267 insertions(+), 3 deletions(-)
create mode 100644 packages/db/migrations/008_benchmark_results_kv_transfer_lib.sql
diff --git a/packages/app/src/app/api/unofficial-run/route.test.ts b/packages/app/src/app/api/unofficial-run/route.test.ts
index be71324b..ef2bbb28 100644
--- a/packages/app/src/app/api/unofficial-run/route.test.ts
+++ b/packages/app/src/app/api/unofficial-run/route.test.ts
@@ -106,6 +106,16 @@ describe('normalizeArtifactRows', () => {
expect(rows[0].hardware).toBe('mi355x');
});
+ it('carries kv_transfer_lib so overlays show the KV transfer library', () => {
+ const rows = normalizeArtifactRows([rawRow({ kv_transfer_lib: 'mooncake' })], '2026-03-01');
+ expect(rows[0].kv_transfer_lib).toBe('mooncake');
+ });
+
+ it('nulls kv_transfer_lib for artifacts predating the field', () => {
+ const rows = normalizeArtifactRows([rawRow()], '2026-03-01');
+ expect(rows[0].kv_transfer_lib).toBeNull();
+ });
+
it('resolves model from infmax_model_prefix', () => {
const rows = normalizeArtifactRows(
[rawRow({ infmax_model_prefix: 'gptoss', model: 'openai/gpt-oss-120b' })],
diff --git a/packages/app/src/app/api/unofficial-run/route.ts b/packages/app/src/app/api/unofficial-run/route.ts
index 072c99f1..cfd76e0d 100644
--- a/packages/app/src/app/api/unofficial-run/route.ts
+++ b/packages/app/src/app/api/unofficial-run/route.ts
@@ -58,6 +58,7 @@ export function normalizeArtifactRows(
// Surface the same per-worker payload the DB path emits so unofficial
// overlays carry the multinode measured-power breakdown too.
workers: params.workers,
+ kv_transfer_lib: params.kvTransferLib ?? null,
date,
run_url: runUrl,
});
diff --git a/packages/app/src/components/inference/types.ts b/packages/app/src/components/inference/types.ts
index ecf2fe33..bb1c9c8d 100644
--- a/packages/app/src/components/inference/types.ts
+++ b/packages/app/src/components/inference/types.ts
@@ -141,6 +141,10 @@ export interface AggDataEntry {
// (a prefill, decode, agg, or frontend role). Optional because pre-multinode
// and pre-aggregate_power.py runs don't emit it.
workers?: WorkerPower[];
+ // KV-cache transfer library for disagg runs ('mooncake', 'nixl', 'mori',
+ // 'ucx'). Null/undefined = unknown (pre-field history, non-disagg runs) —
+ // render nothing rather than assume a default.
+ kv_transfer_lib?: string | null;
disagg: boolean;
num_prefill_gpu: number;
num_decode_gpu: number;
diff --git a/packages/app/src/components/inference/utils/tooltip-utils.test.ts b/packages/app/src/components/inference/utils/tooltip-utils.test.ts
index 5a5bd7e9..a37f1ea7 100644
--- a/packages/app/src/components/inference/utils/tooltip-utils.test.ts
+++ b/packages/app/src/components/inference/utils/tooltip-utils.test.ts
@@ -366,3 +366,73 @@ describe('generateGPUGraphTooltipContent', () => {
expect(html).toContain('vllm-v0.6.0
abc123');
});
});
+
+// ===========================================================================
+// KV transfer library line (official + overlay + GPU-graph tooltips)
+// ===========================================================================
+describe('kv_transfer_lib tooltip line', () => {
+ function overlayConfig(overrides: Partial = {}): OverlayTooltipConfig {
+ return {
+ ...tooltipConfig(),
+ overlayData: {
+ label: 'feature-branch',
+ hardwareConfig: mockHardwareConfig,
+ data: [],
+ runUrl: 'https://example.com',
+ } as any,
+ ...overrides,
+ };
+ }
+
+ it('shows the mapped display label in the official tooltip', () => {
+ const html = generateTooltipContent(
+ tooltipConfig({ data: pt({ kv_transfer_lib: 'mooncake' }) }),
+ );
+ expect(html).toContain('KV Transfer');
+ expect(html).toContain('Mooncake');
+ });
+
+ it('maps known libraries to display casing', () => {
+ for (const [raw, label] of [
+ ['nixl', 'NIXL'],
+ ['mori', 'MoRI-IO'],
+ ['ucx', 'UCX'],
+ ] as const) {
+ const html = generateTooltipContent(tooltipConfig({ data: pt({ kv_transfer_lib: raw }) }));
+ expect(html).toContain(label);
+ }
+ });
+
+ it('uppercases unmapped values instead of hiding them', () => {
+ const html = generateTooltipContent(
+ tooltipConfig({ data: pt({ kv_transfer_lib: 'somefuturelib' }) }),
+ );
+ expect(html).toContain('SOMEFUTURELIB');
+ });
+
+ it('renders nothing when the field is absent (unknown history)', () => {
+ const html = generateTooltipContent(tooltipConfig());
+ expect(html).not.toContain('KV Transfer');
+ });
+
+ it('shows the line in overlay (unofficial run) tooltips', () => {
+ const html = generateOverlayTooltipContent(
+ overlayConfig({ data: pt({ kv_transfer_lib: 'mooncake' }) }),
+ );
+ expect(html).toContain('KV Transfer');
+ expect(html).toContain('Mooncake');
+ });
+
+ it('omits the line in overlay tooltips when absent', () => {
+ const html = generateOverlayTooltipContent(overlayConfig());
+ expect(html).not.toContain('KV Transfer');
+ });
+
+ it('shows the line in GPU-graph (date comparison) tooltips', () => {
+ const html = generateGPUGraphTooltipContent(
+ tooltipConfig({ data: pt({ kv_transfer_lib: 'nixl' }) }),
+ );
+ expect(html).toContain('KV Transfer');
+ expect(html).toContain('NIXL');
+ });
+});
diff --git a/packages/app/src/components/inference/utils/tooltipUtils.ts b/packages/app/src/components/inference/utils/tooltipUtils.ts
index 9143f40f..8bcff3bd 100644
--- a/packages/app/src/components/inference/utils/tooltipUtils.ts
+++ b/packages/app/src/components/inference/utils/tooltipUtils.ts
@@ -88,6 +88,27 @@ const runLinkHTML = (runUrl?: string) =>
const tooltipLine = (label: string, value: string | number) =>
`${label}: ${value}
`;
+/** Display labels for kv_transfer_lib values; unmapped values are uppercased. */
+const KV_TRANSFER_LIB_LABELS: Record = {
+ mooncake: 'Mooncake',
+ nixl: 'NIXL',
+ mori: 'MoRI-IO',
+ ucx: 'UCX',
+};
+
+/**
+ * KV-cache transfer library line for disagg runs. Empty when the field is
+ * absent (pre-2026 history, non-disagg runs, unresolvable recipes) — unknown
+ * must render nothing, never a guessed default.
+ */
+const kvTransferTooltipLine = (d: InferenceData): string =>
+ d.kv_transfer_lib
+ ? tooltipLine(
+ 'KV Transfer',
+ KV_TRANSFER_LIB_LABELS[d.kv_transfer_lib] ?? d.kv_transfer_lib.toUpperCase(),
+ )
+ : '';
+
const shortenSha = (image: string) =>
image.replaceAll(/(?sha256:[a-f0-9]{7})[a-f0-9]+/giu, '$…');
@@ -180,6 +201,7 @@ export const generateTooltipContent = (config: TooltipConfig): string => {
}
${tooltipLine('Total GPUs', d.tp)}
${generateParallelismHTML(d)}
+ ${kvTransferTooltipLine(d)}
Concurrency: ${d.conc}
@@ -236,6 +258,7 @@ export const generateOverlayTooltipContent = (config: OverlayTooltipConfig): str
${tooltipLine('Total GPUs', d.tp)}
${generateParallelismHTML(d)}
+ ${kvTransferTooltipLine(d)}
Concurrency: ${d.conc}
@@ -295,6 +318,7 @@ export const generateGPUGraphTooltipContent = (config: TooltipConfig): string =>
}
${tooltipLine('Total GPUs', d.tp)}
${generateParallelismHTML(d)}
+ ${kvTransferTooltipLine(d)}
Concurrency: ${d.conc}
diff --git a/packages/app/src/lib/api.ts b/packages/app/src/lib/api.ts
index 0dac5883..42ec2515 100644
--- a/packages/app/src/lib/api.ts
+++ b/packages/app/src/lib/api.ts
@@ -39,6 +39,13 @@ export interface BenchmarkRow {
* aggregate_power.py.
*/
workers?: WorkerPower[];
+ /**
+ * KV-cache transfer library for disaggregated runs ('mooncake', 'nixl',
+ * 'mori', 'ucx'), emitted by the runner since mid-2026. Null/undefined means
+ * unknown (pre-field history, non-disagg runs, unresolvable recipes) — the
+ * UI must render nothing rather than assume a default.
+ */
+ kv_transfer_lib?: string | null;
date: string;
run_url: string | null;
}
diff --git a/packages/app/src/lib/benchmark-transform.test.ts b/packages/app/src/lib/benchmark-transform.test.ts
index 8f27cc8f..4f53f9cf 100644
--- a/packages/app/src/lib/benchmark-transform.test.ts
+++ b/packages/app/src/lib/benchmark-transform.test.ts
@@ -793,3 +793,23 @@ describe('transformBenchmarkRows — dp_attention narrowing', () => {
expect(point.decode_dp_attention).toBe(true);
});
});
+
+describe('kv_transfer_lib passthrough', () => {
+ it('carries kv_transfer_lib from the row to the chart point', () => {
+ const rows = [makeRow({ kv_transfer_lib: 'mooncake' })];
+ const { chartData } = transformBenchmarkRows(rows);
+ expect(chartData.flat()[0].kv_transfer_lib).toBe('mooncake');
+ });
+
+ it('normalizes null to undefined (unknown)', () => {
+ const rows = [makeRow({ kv_transfer_lib: null })];
+ const { chartData } = transformBenchmarkRows(rows);
+ expect(chartData.flat()[0].kv_transfer_lib).toBeUndefined();
+ });
+
+ it('is undefined when the row predates the field', () => {
+ const rows = [makeRow()];
+ const { chartData } = transformBenchmarkRows(rows);
+ expect(chartData.flat()[0].kv_transfer_lib).toBeUndefined();
+ });
+});
diff --git a/packages/app/src/lib/benchmark-transform.ts b/packages/app/src/lib/benchmark-transform.ts
index ac806b79..a1daacfe 100644
--- a/packages/app/src/lib/benchmark-transform.ts
+++ b/packages/app/src/lib/benchmark-transform.ts
@@ -72,6 +72,7 @@ export function rowToAggDataEntry(row: BenchmarkRow): AggDataEntry {
// scalar `metrics` dict (see api.ts). Narrow defensively so a malformed
// payload can't poison downstream consumers.
workers: Array.isArray(row.workers) ? row.workers : undefined,
+ kv_transfer_lib: row.kv_transfer_lib ?? undefined,
disagg: row.disagg,
num_prefill_gpu: row.num_prefill_gpu,
num_decode_gpu: row.num_decode_gpu,
diff --git a/packages/db/migrations/008_benchmark_results_kv_transfer_lib.sql b/packages/db/migrations/008_benchmark_results_kv_transfer_lib.sql
new file mode 100644
index 00000000..e7052ad8
--- /dev/null
+++ b/packages/db/migrations/008_benchmark_results_kv_transfer_lib.sql
@@ -0,0 +1,52 @@
+-- ============================================================
+-- BENCHMARK_RESULTS.KV_TRANSFER_LIB — KV-cache transfer library
+-- ============================================================
+--
+-- Disaggregated runs move KV cache from prefill to decode workers through a
+-- transfer library (mooncake, nixl, mori, ucx). The benchmarking repo now
+-- derives it at result-processing time (InferenceX utils/kv_transfer_lib.py)
+-- and emits an optional `kv_transfer_lib` string on each result row.
+--
+-- Stored on benchmark_results, NOT on configs: the library is result-level
+-- metadata, deliberately excluded from the config natural key so config
+-- identity — and therefore historical trend-line continuity — is unchanged.
+-- NULL means unknown: every row ingested before the runner emitted the field,
+-- non-disagg runs (no KV transfer), and runs whose recipe could not be
+-- resolved. Consumers must render nothing for NULL rather than assume a
+-- default.
+
+alter table benchmark_results add column kv_transfer_lib text;
+
+alter table benchmark_results
+ add constraint benchmark_results_kv_transfer_lib_lowercase
+ check (kv_transfer_lib is null or kv_transfer_lib = lower(kv_transfer_lib));
+
+-- latest_benchmarks materializes `select br.*` at creation time, so it must be
+-- rebuilt to expose the new column. Definition is identical to migration 007.
+
+drop materialized view if exists latest_benchmarks;
+
+create materialized view latest_benchmarks as
+with winners as (
+ select distinct on (br.config_id, br.benchmark_type, br.isl, br.osl)
+ br.config_id, br.benchmark_type, br.isl, br.osl,
+ br.workflow_run_id as winning_run_id
+ from benchmark_results br
+ join latest_workflow_runs wr on wr.id = br.workflow_run_id
+ where br.error is null
+ order by br.config_id, br.benchmark_type, br.isl, br.osl,
+ br.date desc, wr.run_started_at desc nulls last, br.workflow_run_id desc
+)
+select br.*
+from benchmark_results br
+join winners w
+ on w.config_id = br.config_id
+ and w.benchmark_type = br.benchmark_type
+ and w.isl is not distinct from br.isl
+ and w.osl is not distinct from br.osl
+ and w.winning_run_id = br.workflow_run_id
+where br.error is null;
+
+create unique index latest_benchmarks_pk
+ on latest_benchmarks (config_id, conc, isl, osl, benchmark_type);
+create index latest_benchmarks_model_idx on latest_benchmarks (config_id);
diff --git a/packages/db/src/etl/benchmark-ingest.ts b/packages/db/src/etl/benchmark-ingest.ts
index a5493629..f0e30545 100644
--- a/packages/db/src/etl/benchmark-ingest.ts
+++ b/packages/db/src/etl/benchmark-ingest.ts
@@ -46,11 +46,13 @@ export async function bulkIngestBenchmarkRows(
const workersJsons = deduped.map((r) =>
r.workers === undefined ? null : JSON.stringify(r.workers),
);
+ // kv_transfer_lib is optional — SQL NULL for rows that didn't emit it.
+ const kvTransferLibs = deduped.map((r) => r.kvTransferLib ?? null);
const result = await sql<{ inserted: boolean; id: number }[]>`
insert into benchmark_results (
workflow_run_id, config_id, benchmark_type, date,
- isl, osl, conc, image, metrics, workers
+ isl, osl, conc, image, metrics, workers, kv_transfer_lib
)
select
${workflowRunId},
@@ -62,12 +64,14 @@ export async function bulkIngestBenchmarkRows(
unnest(${sql.array(concs)}::int[]),
unnest(${sql.array(images)}),
unnest(${sql.array(metricsJsons)}::jsonb[]),
- unnest(${sql.array(workersJsons)}::jsonb[])
+ unnest(${sql.array(workersJsons)}::jsonb[]),
+ unnest(${sql.array(kvTransferLibs)}::text[])
on conflict (workflow_run_id, config_id, benchmark_type, isl, osl, conc)
do update set
metrics = excluded.metrics,
image = excluded.image,
- workers = excluded.workers
+ workers = excluded.workers,
+ kv_transfer_lib = excluded.kv_transfer_lib
returning (xmax = 0) as inserted, id
`;
diff --git a/packages/db/src/etl/benchmark-mapper.test.ts b/packages/db/src/etl/benchmark-mapper.test.ts
index 65fb3e39..f46567c9 100644
--- a/packages/db/src/etl/benchmark-mapper.test.ts
+++ b/packages/db/src/etl/benchmark-mapper.test.ts
@@ -570,3 +570,40 @@ describe('extractWorkers', () => {
expect(extractWorkers([null, 'bad', 0, undefined])).toBeUndefined();
});
});
+
+describe('kv_transfer_lib', () => {
+ it('captures kv_transfer_lib as a config sibling, not a metric', () => {
+ const tracker = createSkipTracker();
+ const result = mapBenchmarkRow(makeV2Row({ kv_transfer_lib: 'mooncake' }), tracker);
+
+ expect(result!.kvTransferLib).toBe('mooncake');
+ expect(result!.metrics).not.toHaveProperty('kv_transfer_lib');
+ });
+
+ it('normalizes to lowercase and trims', () => {
+ const tracker = createSkipTracker();
+ const result = mapBenchmarkRow(makeV2Row({ kv_transfer_lib: ' NIXL ' }), tracker);
+
+ expect(result!.kvTransferLib).toBe('nixl');
+ });
+
+ it('is undefined when absent', () => {
+ const tracker = createSkipTracker();
+ const result = mapBenchmarkRow(makeV2Row(), tracker);
+
+ expect(result!.kvTransferLib).toBeUndefined();
+ });
+
+ it('is undefined for empty or non-string values', () => {
+ const tracker = createSkipTracker();
+ expect(
+ mapBenchmarkRow(makeV2Row({ kv_transfer_lib: '' }), tracker)!.kvTransferLib,
+ ).toBeUndefined();
+ expect(
+ mapBenchmarkRow(makeV2Row({ kv_transfer_lib: 42 }), tracker)!.kvTransferLib,
+ ).toBeUndefined();
+ expect(
+ mapBenchmarkRow(makeV2Row({ kv_transfer_lib: null }), tracker)!.kvTransferLib,
+ ).toBeUndefined();
+ });
+});
diff --git a/packages/db/src/etl/benchmark-mapper.ts b/packages/db/src/etl/benchmark-mapper.ts
index b25baf60..f89f46d3 100644
--- a/packages/db/src/etl/benchmark-mapper.ts
+++ b/packages/db/src/etl/benchmark-mapper.ts
@@ -57,6 +57,9 @@ const NON_METRIC_KEYS = new Set([
'decode_num_workers',
'num_prefill_gpu',
'num_decode_gpu',
+ // KV-cache transfer library (string, e.g. 'mooncake'). Surfaced as a
+ // sibling of the metrics JSONB by mapBenchmarkRow, like `workers`.
+ 'kv_transfer_lib',
// per-worker measured-power array (not a numeric scalar). Surfaced as a
// sibling of the metrics JSONB by mapBenchmarkRow so the metrics column
// stays Record for the index signature on BenchmarkRow.
@@ -105,6 +108,13 @@ export interface BenchmarkParams {
* predating the multinode patch.
*/
workers?: WorkerPower[];
+ /**
+ * KV-cache transfer library used by a disaggregated run ('mooncake',
+ * 'nixl', 'mori', 'ucx'), derived by the runner's process_result.py.
+ * Undefined for non-disagg runs, runs predating the field, and runs whose
+ * recipe could not be resolved — consumers must treat that as unknown.
+ */
+ kvTransferLib?: string;
}
/**
@@ -222,6 +232,13 @@ export function mapBenchmarkRow(
// narrowing — anything other than a non-empty array of objects is dropped.
const workers = extractWorkers(row.workers);
+ // KV transfer library: non-empty string, normalized to lowercase.
+ // Anything else (absent, empty, non-string) is treated as unknown.
+ const kvTransferLib =
+ typeof row.kv_transfer_lib === 'string' && row.kv_transfer_lib.trim() !== ''
+ ? row.kv_transfer_lib.trim().toLowerCase()
+ : undefined;
+
return {
config: {
hardware: gpuKey,
@@ -248,6 +265,7 @@ export function mapBenchmarkRow(
image,
metrics,
workers,
+ kvTransferLib,
};
}
diff --git a/packages/db/src/json-provider.ts b/packages/db/src/json-provider.ts
index dfb03e98..8e2251ed 100644
--- a/packages/db/src/json-provider.ts
+++ b/packages/db/src/json-provider.ts
@@ -76,6 +76,8 @@ interface RawBenchmarkResult {
metrics: Record;
/** Added in migration 006; older dumps omit this field — surfaced as undefined. */
workers?: BenchmarkWorkerRow[] | null;
+ /** Added in migration 008; older dumps omit this field — surfaced as null. */
+ kv_transfer_lib?: string | null;
error: string | null;
server_log_id: number | null;
}
@@ -307,6 +309,9 @@ function toBenchmarkRow(
// simply lack the field — defensively narrow to an array or undefined so
// downstream consumers can rely on the property being well-typed.
workers: Array.isArray(br.workers) ? br.workers : undefined,
+ // kv_transfer_lib: optional column added in migration 008. Older dumps
+ // lack the field — normalize to null (unknown).
+ kv_transfer_lib: typeof br.kv_transfer_lib === 'string' ? br.kv_transfer_lib : null,
date: toDateString(br.date),
run_url: buildRunUrl(wr),
};
diff --git a/packages/db/src/queries/benchmarks.ts b/packages/db/src/queries/benchmarks.ts
index d99a1da1..afbcfd5b 100644
--- a/packages/db/src/queries/benchmarks.ts
+++ b/packages/db/src/queries/benchmarks.ts
@@ -40,6 +40,13 @@ export interface BenchmarkRow {
* aggregate_power.py's multinode patch — surfaced as undefined here.
*/
workers?: BenchmarkWorkerRow[];
+ /**
+ * KV-cache transfer library for disaggregated runs ('mooncake', 'nixl',
+ * 'mori', 'ucx'), emitted by the runner since mid-2026. Null means unknown
+ * (pre-field history, non-disagg runs, unresolvable recipes) — consumers
+ * must render nothing rather than assume a default.
+ */
+ kv_transfer_lib?: string | null;
date: string;
run_url: string | null;
}
@@ -138,6 +145,7 @@ export async function getLatestBenchmarks(
br.image,
br.metrics,
br.workers,
+ br.kv_transfer_lib,
br.date::text,
CASE WHEN wr.html_url IS NOT NULL THEN wr.html_url || '/attempts/' || wr.run_attempt ELSE NULL END AS run_url
FROM benchmark_results br
@@ -181,6 +189,7 @@ export async function getLatestBenchmarks(
lb.image,
lb.metrics,
lb.workers,
+ lb.kv_transfer_lib,
lb.date::text,
CASE WHEN wr.html_url IS NOT NULL THEN wr.html_url || '/attempts/' || wr.run_attempt ELSE NULL END AS run_url
FROM latest_benchmarks lb
@@ -230,6 +239,7 @@ export async function getBenchmarksForRun(
br.image,
br.metrics,
br.workers,
+ br.kv_transfer_lib,
br.date::text,
CASE WHEN wr.html_url IS NOT NULL THEN wr.html_url || '/attempts/' || wr.run_attempt ELSE NULL END AS run_url
FROM benchmark_results br
@@ -279,6 +289,7 @@ export async function getAllBenchmarksForHistory(
br.conc,
br.metrics - '{std_ttft,std_tpot,std_e2el,std_intvty,std_itl,mean_ttft,mean_tpot,mean_e2el,mean_intvty,mean_itl}'::text[] as metrics,
br.workers,
+ br.kv_transfer_lib,
br.date::text,
CASE WHEN wr.html_url IS NOT NULL THEN wr.html_url || '/attempts/' || wr.run_attempt ELSE NULL END AS run_url
FROM configs c
From 22a039c4fbc6eafd514ad8cfda9a785fbcefa9b4 Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Fri, 3 Jul 2026 11:20:17 +0800
Subject: [PATCH 2/2] fix(db): tolerate missing kv_transfer_lib column in
benchmark read queries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Migrations are applied manually (pnpm admin:db:migrate), separately from
the Vercel deploy, so the PR preview runs against a DB that doesn't have
migration 008 yet. A bare reference to a non-existent column fails to
plan ("column \"kv_transfer_lib\" does not exist"), 500ing every
benchmarks request and blanking the dashboard — the same failure mode as
the migration-006 workers rollout.
Read the column via to_jsonb(row) ->> 'kv_transfer_lib' in all four read
paths (getLatestBenchmarks base-table + view branches,
getBenchmarksForRun, getAllBenchmarksForHistory). JSON key lookup is a
runtime op that returns NULL for an absent key instead of failing at
parse time; behavior is identical once the column exists.
Adds a regression test asserting the read queries never reference the
column directly. The ingest INSERT still requires migration 008 — run it
before the first ingest of field-bearing artifacts.
---
packages/db/src/queries/benchmarks.test.ts | 72 ++++++++++++++++++++++
packages/db/src/queries/benchmarks.ts | 15 +++--
2 files changed, 83 insertions(+), 4 deletions(-)
create mode 100644 packages/db/src/queries/benchmarks.test.ts
diff --git a/packages/db/src/queries/benchmarks.test.ts b/packages/db/src/queries/benchmarks.test.ts
new file mode 100644
index 00000000..866ff764
--- /dev/null
+++ b/packages/db/src/queries/benchmarks.test.ts
@@ -0,0 +1,72 @@
+import { describe, expect, it } from 'vitest';
+
+import type { DbClient } from '../connection.js';
+import {
+ getAllBenchmarksForHistory,
+ getBenchmarksForRun,
+ getLatestBenchmarks,
+} from './benchmarks.js';
+
+/**
+ * A {@link DbClient} stand-in that records every SQL template it is handed and
+ * resolves to an empty result set. Lets us assert on the *generated SQL* without
+ * a live database — in particular that the read path never names the optional
+ * `kv_transfer_lib` column directly.
+ *
+ * Joining the template's static segments with a ` ? ` placeholder reconstructs
+ * the literal SQL (interpolated values like model keys / dates become `?`),
+ * which is all we need to substring-match the column selection.
+ */
+function makeRecordingSql(): { sql: DbClient; sqlText: () => string } {
+ const queries: string[] = [];
+ const sql = ((strings: TemplateStringsArray, ..._values: unknown[]) => {
+ queries.push(strings.join(' ? '));
+ return Promise.resolve([]);
+ }) as DbClient;
+ return { sql, sqlText: () => queries.join('\n') };
+}
+
+/**
+ * Regression guard for the migration-008 rollout (same failure mode as the
+ * migration-006 `workers` rollout, PR #405): migrations are applied manually
+ * (pnpm admin:db:migrate), separately from the Vercel deploy, so read queries
+ * must surface `kv_transfer_lib` via `to_jsonb(row) ->> 'kv_transfer_lib'`,
+ * NOT a bare `br.kv_transfer_lib` / `lb.kv_transfer_lib`. A bare column
+ * reference fails to plan ("column does not exist") on a pre-migration DB,
+ * which 500s every cache-miss request to /api/v1/benchmarks and blanks the
+ * dashboard. The to_jsonb form returns null for the absent column and behaves
+ * identically once the column exists.
+ */
+describe('benchmark read queries — kv_transfer_lib column tolerance', () => {
+ it('getLatestBenchmarks (no-date / materialized-view branch) does not reference lb.kv_transfer_lib directly', async () => {
+ const { sql, sqlText } = makeRecordingSql();
+ await getLatestBenchmarks(sql, 'dsr1');
+ const text = sqlText();
+ expect(text).toContain("to_jsonb(lb) ->> 'kv_transfer_lib'");
+ expect(text).not.toMatch(/\blb\.kv_transfer_lib\b/u);
+ });
+
+ it('getLatestBenchmarks (date-filtered / base-table branch) does not reference br.kv_transfer_lib directly', async () => {
+ const { sql, sqlText } = makeRecordingSql();
+ await getLatestBenchmarks(sql, 'dsr1', '2026-01-01');
+ const text = sqlText();
+ expect(text).toContain("to_jsonb(br) ->> 'kv_transfer_lib'");
+ expect(text).not.toMatch(/\bbr\.kv_transfer_lib\b/u);
+ });
+
+ it('getBenchmarksForRun does not reference br.kv_transfer_lib directly', async () => {
+ const { sql, sqlText } = makeRecordingSql();
+ await getBenchmarksForRun(sql, 'dsr1', 123456);
+ const text = sqlText();
+ expect(text).toContain("to_jsonb(br) ->> 'kv_transfer_lib'");
+ expect(text).not.toMatch(/\bbr\.kv_transfer_lib\b/u);
+ });
+
+ it('getAllBenchmarksForHistory does not reference br.kv_transfer_lib directly', async () => {
+ const { sql, sqlText } = makeRecordingSql();
+ await getAllBenchmarksForHistory(sql, 'dsr1', 1024, 1024);
+ const text = sqlText();
+ expect(text).toContain("to_jsonb(br) ->> 'kv_transfer_lib'");
+ expect(text).not.toMatch(/\bbr\.kv_transfer_lib\b/u);
+ });
+});
diff --git a/packages/db/src/queries/benchmarks.ts b/packages/db/src/queries/benchmarks.ts
index afbcfd5b..2a4134a4 100644
--- a/packages/db/src/queries/benchmarks.ts
+++ b/packages/db/src/queries/benchmarks.ts
@@ -145,7 +145,11 @@ export async function getLatestBenchmarks(
br.image,
br.metrics,
br.workers,
- br.kv_transfer_lib,
+ -- Read via to_jsonb(row)->>'kv_transfer_lib' rather than a bare column ref so the
+ -- query still plans (and returns null) until migration 008 is applied. A bare
+ -- reference to a non-existent column fails at parse time and 500s the whole
+ -- endpoint (same failure mode as the migration-006 workers rollout).
+ to_jsonb(br) ->> 'kv_transfer_lib' AS kv_transfer_lib,
br.date::text,
CASE WHEN wr.html_url IS NOT NULL THEN wr.html_url || '/attempts/' || wr.run_attempt ELSE NULL END AS run_url
FROM benchmark_results br
@@ -189,7 +193,8 @@ export async function getLatestBenchmarks(
lb.image,
lb.metrics,
lb.workers,
- lb.kv_transfer_lib,
+ -- to_jsonb guard: tolerate a pre-migration-008 view (see date branch above).
+ to_jsonb(lb) ->> 'kv_transfer_lib' AS kv_transfer_lib,
lb.date::text,
CASE WHEN wr.html_url IS NOT NULL THEN wr.html_url || '/attempts/' || wr.run_attempt ELSE NULL END AS run_url
FROM latest_benchmarks lb
@@ -239,7 +244,8 @@ export async function getBenchmarksForRun(
br.image,
br.metrics,
br.workers,
- br.kv_transfer_lib,
+ -- to_jsonb guard: tolerate a pre-migration-008 base table (see getLatestBenchmarks).
+ to_jsonb(br) ->> 'kv_transfer_lib' AS kv_transfer_lib,
br.date::text,
CASE WHEN wr.html_url IS NOT NULL THEN wr.html_url || '/attempts/' || wr.run_attempt ELSE NULL END AS run_url
FROM benchmark_results br
@@ -289,7 +295,8 @@ export async function getAllBenchmarksForHistory(
br.conc,
br.metrics - '{std_ttft,std_tpot,std_e2el,std_intvty,std_itl,mean_ttft,mean_tpot,mean_e2el,mean_intvty,mean_itl}'::text[] as metrics,
br.workers,
- br.kv_transfer_lib,
+ -- to_jsonb guard: tolerate a pre-migration-008 base table (see getLatestBenchmarks).
+ to_jsonb(br) ->> 'kv_transfer_lib' AS kv_transfer_lib,
br.date::text,
CASE WHEN wr.html_url IS NOT NULL THEN wr.html_url || '/attempts/' || wr.run_attempt ELSE NULL END AS run_url
FROM configs c