Skip to content

Commit 1e625b8

Browse files
os-zhuangCopilot
andcommitted
feat(objectql): hash-compat dry-run probe (ADR-0008 PR-10d.1)
Pure-function `runDryRun()` + CLI that audits a snapshot of sys_metadata rows for compatibility with SysMetadataRepository.put. Checks: - valid JSON in the metadata column - body is a plain object (not array/primitive) - hashSpec(body) is stable across canonical serialize → parse round-trip - no duplicate active (type, name, organization_id) overlay keys Exits 0 if compatible, 1 if not. Read-only — touches no database. 14 unit tests covering happy paths, all 5 error codes, and boundaries (empty, deep nesting, unicode). Full objectql suite: 322/322 green. Discovery: sys_metadata rows today do NOT carry _hash or parent_version columns — the probe shifts focus from hash-equality to hash-stability, and PR-10d.2 (schema migration) becomes a hard prerequisite. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 04a29c7 commit 1e625b8

3 files changed

Lines changed: 487 additions & 0 deletions

File tree

.changeset/pr10d1-dryrun-probe.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
---
2+
"@objectstack/objectql": patch
3+
---
4+
5+
feat(objectql): hash-compat dry-run probe for the legacy → repository
6+
write-path migration (ADR-0008 PR-10d.1). Pure-function `runDryRun()` plus
7+
a CLI (`scripts/dry-run-hash-compat.ts`) that audits a snapshot of
8+
`sys_metadata` for invalid JSON, non-object bodies, unstable hashes across
9+
canonical round-trip, and duplicate overlay keys. Exits non-zero when
10+
incompatibilities are found. 14 unit tests covering happy paths, error
11+
classifications (`invalid_json`, `non_object_body`, `unstable_hash`,
12+
`missing_metadata`, `duplicate_overlay_key`), and boundary conditions
13+
(empty snapshot, deep nesting, unicode).
Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license.
2+
3+
/**
4+
* PR-10d.1 — Hash-compat dry-run probe.
5+
*
6+
* Read-only probe that audits a snapshot of `sys_metadata` rows and asks:
7+
*
8+
* 1. Is every row's `metadata` column valid JSON?
9+
* 2. Is the parsed body an object (the shape SysMetadataRepository expects)?
10+
* 3. Does `hashSpec(body)` produce a stable hash across a serialize → parse
11+
* round-trip? (If not, flipping the write path will produce different
12+
* hashes on subsequent reads — silent corruption.)
13+
* 4. Are there any rows whose `(type, name, organization_id)` tuple is
14+
* duplicated within the snapshot? (Would violate the overlay invariant
15+
* once `SysMetadataRepository.put` enforces it.)
16+
*
17+
* Usage:
18+
*
19+
* pnpm tsx packages/objectql/scripts/dry-run-hash-compat.ts <snapshot.json>
20+
*
21+
* Where `<snapshot.json>` is an array of rows in the shape of `MetadataRecord`,
22+
* obtained via `SELECT * FROM sys_metadata` (any driver) and exported as JSON.
23+
*
24+
* The script writes a structured report to stdout and exits 0 if the snapshot
25+
* is compatible, 1 otherwise. No database is touched.
26+
*
27+
* Pair this with the test in `dry-run-hash-compat.test.ts` which exercises
28+
* the probe against synthetic fixtures covering legacy edge cases.
29+
*/
30+
31+
import { hashSpec } from '@objectstack/metadata-core';
32+
33+
export interface LegacyMetadataRow {
34+
id?: string;
35+
type?: string;
36+
name?: string;
37+
organization_id?: string | null;
38+
metadata?: string | null;
39+
state?: string;
40+
version?: number | null;
41+
[k: string]: unknown;
42+
}
43+
44+
export interface RowFinding {
45+
row: { id?: string; type?: string; name?: string; organization_id?: string | null };
46+
severity: 'error' | 'warning';
47+
code:
48+
| 'invalid_json'
49+
| 'non_object_body'
50+
| 'unstable_hash'
51+
| 'missing_metadata'
52+
| 'duplicate_overlay_key';
53+
detail: string;
54+
}
55+
56+
export interface DryRunReport {
57+
totalRows: number;
58+
okRows: number;
59+
findings: RowFinding[];
60+
typeDistribution: Record<string, number>;
61+
duplicateKeys: string[];
62+
compatible: boolean;
63+
}
64+
65+
/**
66+
* Run the probe over an in-memory snapshot. Pure function — does no I/O.
67+
*/
68+
export function runDryRun(rows: LegacyMetadataRow[]): DryRunReport {
69+
const findings: RowFinding[] = [];
70+
const typeDistribution: Record<string, number> = {};
71+
const seen = new Map<string, LegacyMetadataRow>();
72+
const duplicateKeys = new Set<string>();
73+
let okRows = 0;
74+
75+
for (const row of rows) {
76+
const tag = {
77+
id: row.id,
78+
type: row.type,
79+
name: row.name,
80+
organization_id: row.organization_id ?? null,
81+
};
82+
83+
// 1. Missing metadata column.
84+
if (row.metadata == null) {
85+
findings.push({
86+
row: tag,
87+
severity: 'error',
88+
code: 'missing_metadata',
89+
detail: 'metadata column is null/undefined',
90+
});
91+
continue;
92+
}
93+
94+
// 2. Invalid JSON.
95+
let body: unknown;
96+
try {
97+
body = JSON.parse(row.metadata);
98+
} catch (e) {
99+
findings.push({
100+
row: tag,
101+
severity: 'error',
102+
code: 'invalid_json',
103+
detail: `JSON.parse failed: ${(e as Error).message}`,
104+
});
105+
continue;
106+
}
107+
108+
// 3. Body must be a plain object — SysMetadataRepository.put rejects
109+
// arrays/primitives.
110+
if (body === null || typeof body !== 'object' || Array.isArray(body)) {
111+
findings.push({
112+
row: tag,
113+
severity: 'error',
114+
code: 'non_object_body',
115+
detail: `metadata body is ${Array.isArray(body) ? 'array' : typeof body}, not a plain object`,
116+
});
117+
continue;
118+
}
119+
120+
// 4. Hash stability across serialize → parse round-trip. The repository
121+
// will canonicalize on every put, so we must verify that
122+
// hashSpec(JSON.parse(JSON.stringify(body))) === hashSpec(body)
123+
// for every legacy row.
124+
let h1: string;
125+
let h2: string;
126+
try {
127+
h1 = hashSpec(body as Record<string, unknown>);
128+
const roundTrip = JSON.parse(JSON.stringify(body));
129+
h2 = hashSpec(roundTrip);
130+
} catch (e) {
131+
findings.push({
132+
row: tag,
133+
severity: 'error',
134+
code: 'unstable_hash',
135+
detail: `hashSpec threw: ${(e as Error).message}`,
136+
});
137+
continue;
138+
}
139+
if (h1 !== h2) {
140+
findings.push({
141+
row: tag,
142+
severity: 'error',
143+
code: 'unstable_hash',
144+
detail: `hash differs across round-trip: ${h1} vs ${h2}`,
145+
});
146+
continue;
147+
}
148+
149+
// 5. Duplicate (type, name, organization_id) — would break the unique
150+
// overlay invariant. Only count active rows.
151+
if (row.state === 'active' && row.type && row.name) {
152+
const key = `${row.type}|${row.name}|${row.organization_id ?? '__env__'}`;
153+
const prior = seen.get(key);
154+
if (prior) {
155+
duplicateKeys.add(key);
156+
findings.push({
157+
row: tag,
158+
severity: 'error',
159+
code: 'duplicate_overlay_key',
160+
detail: `duplicate active overlay key ${key} (conflicts with row id=${prior.id})`,
161+
});
162+
continue;
163+
}
164+
seen.set(key, row);
165+
}
166+
167+
// 6. Distribution.
168+
if (row.type) {
169+
typeDistribution[row.type] = (typeDistribution[row.type] ?? 0) + 1;
170+
}
171+
okRows += 1;
172+
}
173+
174+
return {
175+
totalRows: rows.length,
176+
okRows,
177+
findings,
178+
typeDistribution,
179+
duplicateKeys: Array.from(duplicateKeys),
180+
compatible: findings.every((f) => f.severity !== 'error'),
181+
};
182+
}
183+
184+
/**
185+
* Pretty-print a report. Returns a multi-line string suitable for stdout.
186+
*/
187+
export function formatReport(report: DryRunReport): string {
188+
const lines: string[] = [];
189+
lines.push('# Hash-compat dry-run report');
190+
lines.push('');
191+
lines.push(`Total rows: ${report.totalRows}`);
192+
lines.push(`OK rows: ${report.okRows}`);
193+
lines.push(`Findings: ${report.findings.length}`);
194+
lines.push(`Compatible: ${report.compatible ? 'YES ✅' : 'NO ❌'}`);
195+
lines.push('');
196+
lines.push('## Type distribution');
197+
const types = Object.entries(report.typeDistribution).sort((a, b) => b[1] - a[1]);
198+
if (types.length === 0) lines.push(' (none)');
199+
for (const [t, n] of types) lines.push(` ${t}: ${n}`);
200+
lines.push('');
201+
if (report.findings.length > 0) {
202+
lines.push('## Findings');
203+
for (const f of report.findings) {
204+
lines.push(
205+
` [${f.severity}] ${f.code} — id=${f.row.id ?? '?'} type=${f.row.type ?? '?'} name=${f.row.name ?? '?'} org=${f.row.organization_id ?? 'null'}`,
206+
);
207+
lines.push(` ${f.detail}`);
208+
}
209+
lines.push('');
210+
}
211+
if (report.duplicateKeys.length > 0) {
212+
lines.push('## Duplicate overlay keys');
213+
for (const k of report.duplicateKeys) lines.push(` ${k}`);
214+
lines.push('');
215+
}
216+
return lines.join('\n');
217+
}
218+
219+
// CLI entrypoint — only runs when invoked directly.
220+
if (typeof process !== 'undefined' && process.argv[1] && /dry-run-hash-compat\.ts$/.test(process.argv[1])) {
221+
const path = process.argv[2];
222+
if (!path) {
223+
console.error('Usage: pnpm tsx packages/objectql/scripts/dry-run-hash-compat.ts <snapshot.json>');
224+
process.exit(2);
225+
}
226+
void (async () => {
227+
const fs = await import('node:fs/promises');
228+
const raw = await fs.readFile(path, 'utf8');
229+
const rows: LegacyMetadataRow[] = JSON.parse(raw);
230+
if (!Array.isArray(rows)) {
231+
console.error(`Snapshot at ${path} is not a JSON array.`);
232+
process.exit(2);
233+
}
234+
const report = runDryRun(rows);
235+
console.log(formatReport(report));
236+
process.exit(report.compatible ? 0 : 1);
237+
})();
238+
}

0 commit comments

Comments
 (0)