From 180b391c1deca1cd63707de4a89c16f54b974f33 Mon Sep 17 00:00:00 2001 From: Derek Misler Date: Tue, 23 Jun 2026 20:05:37 +0000 Subject: [PATCH 1/5] fix(score-risk): give plain application files a baseline score of 1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, let score = 0 meant any file with no special signals (security path, large change, many hunks, error handling) stayed at score 0. The auto-filter-diff Phase 1 then excluded ALL score-0 files, leaving nothing for the reviewer to look at when a PR contained only ordinary application code. Fix: change the starting score to 1 for any file that survives the early-exit rules (1 = exclude-paths prefix, 2 = generated-file marker). Rule 6 (test/doc/config reset) still resets to 0 before rule 7, so tests, docs, JSON configs, and generated files continue to be auto-excluded. Only intentional low-risk files score 0; normal application code now scores ≥1 and is kept for review. Score changes with new baseline: plain app file (no signals) 0 → 1 security path only 2 → 3 large change only 2 → 3 many hunks only 1 → 2 error handling only 1 → 2 security + large 4 → 5 security + large + hunks 5 → 6 security + large + hunks + err 6 → 7 test/doc/config (rule 6 reset) 0 → 0 (unchanged) exclude-path prefix (rule 1) 0 → 0 (unchanged) generated marker (rule 2) 0 → 0 (unchanged) Updated all test expectations accordingly and added a dedicated 'baseline score 1 for plain application files' suite with six new test cases covering src/app.ts, server.ts, types.ts, TeamDetail.tsx, entityEnrichment.ts, and internal/handler/http.go. --- src/score-risk/__tests__/score-risk.test.ts | 152 +++++++++++++------- src/score-risk/score-risk.ts | 15 +- 2 files changed, 111 insertions(+), 56 deletions(-) diff --git a/src/score-risk/__tests__/score-risk.test.ts b/src/score-risk/__tests__/score-risk.test.ts index a532e27..74f7c47 100644 --- a/src/score-risk/__tests__/score-risk.test.ts +++ b/src/score-risk/__tests__/score-risk.test.ts @@ -7,6 +7,10 @@ * Each scoring rule is tested in isolation first (single-variable control), * then in combination. Edge cases and both zero-score exit paths (exclude-paths * prefix match and generated-file marker) are verified independently. + * + * Baseline score is 1 for any file that survives rules 1, 2, and 6 — ensuring + * plain application files are never silently auto-excluded alongside + * intentionally-low-risk files (tests, docs, generated code, JSON configs). */ import { describe, expect, it } from 'vitest'; import { parseExcludePrefixes, scoreFiles } from '../score-risk.js'; @@ -80,7 +84,7 @@ describe('scoreFiles — rule 1: exclude-paths prefix → score 0', () => { }); it('security-path file in excluded prefix still scores 0 (prefix wins)', () => { - // "session_service.pb.go" would score 2 via the security-path rule, + // "session_service.pb.go" would score via the security-path rule, // but the exclude-paths rule fires first and short-circuits to 0. const diff = makeLargeDiff('backend/gen/session_service.pb.go', 200); const scores = scoreFiles(diff, ['backend/gen/']); @@ -91,7 +95,7 @@ describe('scoreFiles — rule 1: exclude-paths prefix → score 0', () => { const diff = makeDiff('backend/gen/foo.pb.go') + makeDiff('src/auth/handler.go'); const scores = scoreFiles(diff, ['backend/gen/']); expect(scores['backend/gen/foo.pb.go']).toBe(0); - expect(scores['src/auth/handler.go']).toBe(2); // security path + expect(scores['src/auth/handler.go']).toBe(3); // baseline 1 + security path +2 }); it('multiple prefixes — file matching any is scored 0', () => { @@ -102,7 +106,7 @@ describe('scoreFiles — rule 1: exclude-paths prefix → score 0', () => { const scores = scoreFiles(diff, ['backend/gen/', 'frontend/src/gen/']); expect(scores['backend/gen/foo.pb.go']).toBe(0); expect(scores['frontend/src/gen/api.ts']).toBe(0); - expect(scores['src/real.go']).toBe(0); // no rules fire for a plain file + expect(scores['src/real.go']).toBe(1); // baseline 1: plain file, no signals }); }); @@ -152,8 +156,8 @@ describe('scoreFiles — rule 2: generated-file marker → score 0', () => { '+// Code generated (too late to trigger marker check)', ]); const scores = scoreFiles(diff, []); - // Security path +2; no other rules fired. - expect(scores['src/auth/handler.go']).toBe(2); + // Baseline 1 + security path +2 = 3; marker too late to fire. + expect(scores['src/auth/handler.go']).toBe(3); }); it('marker on context line (not a "+" line) does NOT trigger → scored normally', () => { @@ -163,8 +167,8 @@ describe('scoreFiles — rule 2: generated-file marker → score 0', () => { '+real change', ]); const scores = scoreFiles(diff, []); - // Security path matches "auth" → +2; no marker on added line. - expect(scores['src/auth/gen.go']).toBe(2); + // Baseline 1 + security path matches "auth" → +2 = 3; no marker on added line. + expect(scores['src/auth/gen.go']).toBe(3); }); }); @@ -183,67 +187,67 @@ describe('scoreFiles — rule 3: security-sensitive path → +2', () => { ] as const; for (const kw of KEYWORDS) { - it(`keyword "${kw}" in path → score 2`, () => { + it(`keyword "${kw}" in path → score 3 (baseline 1 + security +2)`, () => { const diff = makeDiff(`src/${kw}/handler.go`); const scores = scoreFiles(diff, []); - expect(scores[`src/${kw}/handler.go`]).toBe(2); + expect(scores[`src/${kw}/handler.go`]).toBe(3); }); } - it('keyword is case-insensitive (AUTH → 2)', () => { + it('keyword is case-insensitive (AUTH → 3)', () => { const diff = makeDiff('src/AUTH/handler.go'); const scores = scoreFiles(diff, []); - expect(scores['src/AUTH/handler.go']).toBe(2); + expect(scores['src/AUTH/handler.go']).toBe(3); }); - it('no keyword in path → 0', () => { + it('no keyword in path → baseline score 1', () => { const diff = makeDiff('src/utils/helper.go'); const scores = scoreFiles(diff, []); - expect(scores['src/utils/helper.go']).toBe(0); + expect(scores['src/utils/helper.go']).toBe(1); }); }); // ── Rule 4: large change (>100 added lines) → +2 ───────────────────────────── describe('scoreFiles — rule 4: large change (>100 added lines) → +2', () => { - it('101 added lines → score 2', () => { + it('101 added lines → score 3 (baseline 1 + large +2)', () => { const diff = makeLargeDiff('src/big.go', 101); const scores = scoreFiles(diff, []); - expect(scores['src/big.go']).toBe(2); + expect(scores['src/big.go']).toBe(3); }); - it('100 added lines (boundary) → score 0 (not triggered)', () => { + it('100 added lines (boundary) → score 1 (baseline only; rule not triggered)', () => { const diff = makeLargeDiff('src/big.go', 100); const scores = scoreFiles(diff, []); - expect(scores['src/big.go']).toBe(0); + expect(scores['src/big.go']).toBe(1); }); - it('1 added line → score 0', () => { + it('1 added line → score 1 (baseline only)', () => { const diff = makeDiff('src/small.go', ['+one line']); const scores = scoreFiles(diff, []); - expect(scores['src/small.go']).toBe(0); + expect(scores['src/small.go']).toBe(1); }); }); // ── Rule 5: many hunks (>3) → +1 ───────────────────────────────────────────── describe('scoreFiles — rule 5: many hunks (>3 hunk headers) → +1', () => { - it('4 hunks → score 1', () => { + it('4 hunks → score 2 (baseline 1 + hunks +1)', () => { const diff = makeHunksDiff('src/hunky.go', 4); const scores = scoreFiles(diff, []); - expect(scores['src/hunky.go']).toBe(1); + expect(scores['src/hunky.go']).toBe(2); }); - it('3 hunks (boundary) → score 0 (not triggered)', () => { + it('3 hunks (boundary) → score 1 (baseline only; rule not triggered)', () => { const diff = makeHunksDiff('src/hunky.go', 3); const scores = scoreFiles(diff, []); - expect(scores['src/hunky.go']).toBe(0); + expect(scores['src/hunky.go']).toBe(1); }); - it('1 hunk → score 0', () => { + it('1 hunk → score 1 (baseline only)', () => { const diff = makeDiff('src/simple.go', ['+change']); const scores = scoreFiles(diff, []); - expect(scores['src/simple.go']).toBe(0); + expect(scores['src/simple.go']).toBe(1); }); }); @@ -251,7 +255,6 @@ describe('scoreFiles — rule 5: many hunks (>3 hunk headers) → +1', () => { describe('scoreFiles — rule 6: Rust/Ruby test/bench/spec suffixes → reset score to 0', () => { it('Rust file in tests/ directory scores 0 (directory component match)', () => { - // Security keyword "virtiofs" not in path, but tests/ directory resets to 0 const diff = makeDiff('crates/vm/tests/integration/virtiofs.rs'); const scores = scoreFiles(diff, []); expect(scores['crates/vm/tests/integration/virtiofs.rs']).toBe(0); @@ -273,7 +276,7 @@ describe('scoreFiles — rule 6: Rust/Ruby test/bench/spec suffixes → reset sc // "auth" in path would normally add +2 (rule 3), but tests/ directory resets to 0 const diff = makeDiff('src/auth/tests/handler_test.go'); const scores = scoreFiles(diff, []); - // Rule 7 (error handling) can still apply; rule 3 (+2) is reset by rule 6. + // Rule 7 (error handling) can still apply; rule 3 (+2) + baseline (1) are reset by rule 6. // No error-handling keywords in the default diff content, so score stays 0. expect(scores['src/auth/tests/handler_test.go']).toBe(0); }); @@ -344,7 +347,7 @@ describe('scoreFiles — rule 6: test/doc/config file → reset score to 0', () '', ].join('\n'); const scores = scoreFiles(diff, []); - // Rule 6 resets to 0; rule 7 (error handling) doesn't fire here. + // Rule 6 resets to 0 (overrides baseline + rules 3-5); rule 7 (error handling) doesn't fire here. expect(scores[p]).toBe(0); }); } @@ -363,21 +366,21 @@ describe('scoreFiles — rule 7: error-handling patterns → +1', () => { const KEYWORDS = ['catch', 'rescue', 'except', 'recover', 'error', 'panic'] as const; for (const kw of KEYWORDS) { - it(`keyword "${kw}" in an added line → +1`, () => { + it(`keyword "${kw}" in an added line → baseline 1 + error +1 = 2`, () => { const diff = makeDiff('src/service.go', [`+handle ${kw} here`]); const scores = scoreFiles(diff, []); - expect(scores['src/service.go']).toBe(1); + expect(scores['src/service.go']).toBe(2); }); } - it('keyword in a context line (not added) → no +1', () => { + it('keyword in a context line (not added) → no +1 (baseline 1 only)', () => { // Context line (space-prefixed) doesn't count. const diff = makeDiff('src/service.go', [' existing catch block', '+new line only']); const scores = scoreFiles(diff, []); - expect(scores['src/service.go']).toBe(0); + expect(scores['src/service.go']).toBe(1); }); - it('multiple error-handling lines still only add +1 total', () => { + it('multiple error-handling lines still only add +1 total (baseline 1 + error +1 = 2)', () => { const diff = makeDiff('src/service.go', [ '+catch(err) {', '+ recover();', @@ -385,20 +388,20 @@ describe('scoreFiles — rule 7: error-handling patterns → +1', () => { '}', ]); const scores = scoreFiles(diff, []); - expect(scores['src/service.go']).toBe(1); + expect(scores['src/service.go']).toBe(2); }); }); // ── Combined scoring ────────────────────────────────────────────────────────── describe('scoreFiles — combined rules', () => { - it('security path + large change = 4', () => { + it('security path + large change = 5 (baseline 1 + security +2 + large +2)', () => { const diff = makeLargeDiff('src/auth/handler.go', 150); const scores = scoreFiles(diff, []); - expect(scores['src/auth/handler.go']).toBe(4); // +2 security + +2 large + expect(scores['src/auth/handler.go']).toBe(5); }); - it('security path + large change + many hunks = 5', () => { + it('security path + large change + many hunks = 6 (baseline 1 + security +2 + large +2 + hunks +1)', () => { const lines = Array.from({ length: 110 }, (_, i) => `+line${i}`); const hunkHeaders = Array.from( { length: 4 }, @@ -414,10 +417,10 @@ describe('scoreFiles — combined rules', () => { '', ].join('\n'); const scores = scoreFiles(diff, []); - expect(scores['src/session/service.go']).toBe(5); // +2 security +2 large +1 hunks + expect(scores['src/session/service.go']).toBe(6); }); - it('security path + large change + many hunks + error handling = 6 (max realistic)', () => { + it('security path + large change + many hunks + error handling = 7 (max realistic)', () => { const lines = Array.from({ length: 110 }, (_, i) => `+line${i}`); const hunkHeaders = Array.from( { length: 4 }, @@ -434,18 +437,18 @@ describe('scoreFiles — combined rules', () => { '', ].join('\n'); const scores = scoreFiles(diff, []); - expect(scores['src/session/service.go']).toBe(6); + expect(scores['src/session/service.go']).toBe(7); }); it('multiple files in one diff are scored independently', () => { const diff = - makeDiff('src/auth/handler.go') + // security → 2 - makeLargeDiff('src/utils/big.go', 200) + // large → 2 + makeDiff('src/auth/handler.go') + // baseline 1 + security +2 = 3 + makeLargeDiff('src/utils/big.go', 200) + // baseline 1 + large +2 = 3 makeDiff('backend/gen/foo.pb.go'); // excluded → 0 const scores = scoreFiles(diff, ['backend/gen/']); - expect(scores['src/auth/handler.go']).toBe(2); - expect(scores['src/utils/big.go']).toBe(2); + expect(scores['src/auth/handler.go']).toBe(3); + expect(scores['src/utils/big.go']).toBe(3); expect(scores['backend/gen/foo.pb.go']).toBe(0); }); }); @@ -453,18 +456,60 @@ describe('scoreFiles — combined rules', () => { // ── extractFilePath regression: greedy-regex vs indexOf(' b/') ───────────────── describe('scoreFiles — extractFilePath: indexOf regression for paths containing b/', () => { - it('path with security keyword before a b/ directory component scores 2, not 0', () => { + it('path with security keyword before a b/ directory component scores 3, not 1', () => { // Old greedy regex: replace(/.*b\//, '') on // "diff --git a/src/auth/b/helper.ts b/src/auth/b/helper.ts" // matches everything up to the last 'b/' giving 'helper.ts'. - // 'helper.ts' has no security keyword → score 0. + // 'helper.ts' has no security keyword → baseline score 1 only. // // New indexOf(' b/'): strips "diff --git a/" prefix and splits at the // first ' b/' separator giving 'src/auth/b/helper.ts'. - // 'src/auth/b/helper.ts' matches SECURITY_PATH_RE ('auth') → score 2. + // 'src/auth/b/helper.ts' matches SECURITY_PATH_RE ('auth') → baseline 1 + +2 = 3. const diff = makeDiff('src/auth/b/helper.ts', ['+changed']); const scores = scoreFiles(diff, []); - expect(scores['src/auth/b/helper.ts']).toBe(2); + expect(scores['src/auth/b/helper.ts']).toBe(3); + }); +}); + +// ── Baseline score for plain application files ──────────────────────────────── + +describe('scoreFiles — baseline score 1 for plain application files', () => { + it('plain app file (src/app.ts) with a small change gets score 1, not 0', () => { + // This is the key regression test: ordinary application code must never + // be auto-excluded alongside intentionally-low-risk files. + const diff = makeDiff('src/app.ts', ['+const x = 1;']); + const scores = scoreFiles(diff, []); + expect(scores['src/app.ts']).toBe(1); + }); + + it('plain server file (src/server.ts) scores 1', () => { + const diff = makeDiff('src/server.ts', ['+app.listen(3000);']); + const scores = scoreFiles(diff, []); + expect(scores['src/server.ts']).toBe(1); + }); + + it('plain types file (src/types.ts) scores 1', () => { + const diff = makeDiff('src/types.ts', ['+export type Foo = string;']); + const scores = scoreFiles(diff, []); + expect(scores['src/types.ts']).toBe(1); + }); + + it('React component (src/TeamDetail.tsx) scores 1', () => { + const diff = makeDiff('src/TeamDetail.tsx', ['+export default function TeamDetail() {}']); + const scores = scoreFiles(diff, []); + expect(scores['src/TeamDetail.tsx']).toBe(1); + }); + + it('service file (src/entityEnrichment.ts) scores 1', () => { + const diff = makeDiff('src/entityEnrichment.ts', ['+export function enrich() {}']); + const scores = scoreFiles(diff, []); + expect(scores['src/entityEnrichment.ts']).toBe(1); + }); + + it('plain Go file (internal/handler/http.go) scores 1', () => { + const diff = makeDiff('internal/handler/http.go', ['+func handleRequest() {}']); + const scores = scoreFiles(diff, []); + expect(scores['internal/handler/http.go']).toBe(1); }); }); @@ -475,16 +520,17 @@ describe('scoreFiles — edge cases', () => { expect(scoreFiles('', [])).toEqual({}); }); - it('empty exclude prefixes → all files scored normally', () => { + it('empty exclude prefixes → all files scored normally (plain files get baseline 1)', () => { const diff = makeDiff('backend/gen/foo.pb.go'); const scores = scoreFiles(diff, []); - // No security keyword, no large change, no hunks → 0 - expect(scores['backend/gen/foo.pb.go']).toBe(0); + // No security keyword, no large change, no hunks, not a test/doc/config file + // → baseline score 1 + expect(scores['backend/gen/foo.pb.go']).toBe(1); }); - it('single plain file with no matching rules → score 0', () => { + it('single plain file with no matching rules → baseline score 1', () => { const diff = makeDiff('src/utils/helper.go', ['+minor tweak']); const scores = scoreFiles(diff, []); - expect(scores['src/utils/helper.go']).toBe(0); + expect(scores['src/utils/helper.go']).toBe(1); }); }); diff --git a/src/score-risk/score-risk.ts b/src/score-risk/score-risk.ts index f0ba741..43f058f 100644 --- a/src/score-risk/score-risk.ts +++ b/src/score-risk/score-risk.ts @@ -15,12 +15,17 @@ * (auth|security|crypto|session|secret|token|password|credential, case-insensitive) * 4. Large change (>100 added lines)→ +2 * 5. Many hunks (>3 hunk headers) → +1 - * 6. Test/doc/config file → score = 0 (resets 3-5 to zero) + * 6. Test/doc/config file → score = 0 (resets baseline + 3-5 to zero) * 7. Error-handling patterns → +1 * (catch|rescue|except|recover|error|panic in any added line, case-sensitive) * + * Files not caught by rules 1, 2, or 6 start with a baseline score of 1 + * so that ordinary application code is never auto-excluded alongside + * intentionally-low-risk files (tests, docs, generated code, JSON configs). + * * Rule 6 resets the running total to 0, then rule 7 can still add 1. - * This faithfully reproduces the existing bash behaviour. + * This faithfully reproduces the existing bash behaviour for test/doc/config + * files while ensuring plain application files always reach the reviewer. */ // --------------------------------------------------------------------------- @@ -198,7 +203,11 @@ export function scoreFiles(diffContent: string, excludePrefixes: string[]): Risk continue; } - let score = 0; + // Baseline: plain application files that don't match any positive rule + // (rules 3-5) or the reset rule (6) still need review — give them score 1 + // so they are not auto-excluded alongside intentionally-low-risk files. + // Rules 1 and 2 already exited early above with score 0; rule 6 resets to 0. + let score = 1; // Rule 3: security-sensitive path → +2. if (SECURITY_PATH_RE.test(path)) score += 2; From 0d93edffa6ff3479a1408fd0faa111a052613f09 Mon Sep 17 00:00:00 2001 From: Derek Misler Date: Tue, 23 Jun 2026 20:10:54 +0000 Subject: [PATCH 2/5] docs(score-risk tests): clarify three minor comments per review feedback - Rule 1 comment: state the numeric score (3) that the security-path file would receive without the exclude-prefix short-circuit - Rule 6 loop comment: note that rule 7 can still add +1 after the reset (even though no error-handling keywords fire in this particular diff fixture) - Edge-case comment: explain that makeDiff() emits no 'Code generated' added line, so rule 2 does not fire and the baseline-1 score stands --- src/score-risk/__tests__/score-risk.test.ts | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/score-risk/__tests__/score-risk.test.ts b/src/score-risk/__tests__/score-risk.test.ts index 74f7c47..fcacd74 100644 --- a/src/score-risk/__tests__/score-risk.test.ts +++ b/src/score-risk/__tests__/score-risk.test.ts @@ -84,8 +84,9 @@ describe('scoreFiles — rule 1: exclude-paths prefix → score 0', () => { }); it('security-path file in excluded prefix still scores 0 (prefix wins)', () => { - // "session_service.pb.go" would score via the security-path rule, - // but the exclude-paths rule fires first and short-circuits to 0. + // Without an exclude prefix, "session_service.pb.go" would score 3 + // (baseline 1 + security-path +2), but the exclude-paths rule fires + // first and short-circuits to 0. const diff = makeLargeDiff('backend/gen/session_service.pb.go', 200); const scores = scoreFiles(diff, ['backend/gen/']); expect(scores['backend/gen/session_service.pb.go']).toBe(0); @@ -347,7 +348,9 @@ describe('scoreFiles — rule 6: test/doc/config file → reset score to 0', () '', ].join('\n'); const scores = scoreFiles(diff, []); - // Rule 6 resets to 0 (overrides baseline + rules 3-5); rule 7 (error handling) doesn't fire here. + // Rule 6 resets to 0 (overrides baseline + rules 3-5); rule 7 + // (error-handling keywords) can still add +1 after the reset, but + // no error-handling keywords appear in this diff, so the score stays 0. expect(scores[p]).toBe(0); }); } @@ -523,8 +526,9 @@ describe('scoreFiles — edge cases', () => { it('empty exclude prefixes → all files scored normally (plain files get baseline 1)', () => { const diff = makeDiff('backend/gen/foo.pb.go'); const scores = scoreFiles(diff, []); - // No security keyword, no large change, no hunks, not a test/doc/config file - // → baseline score 1 + // No security keyword, no large change, no hunks, not a test/doc/config file. + // makeDiff() does not emit a "Code generated" or "DO NOT EDIT" added line, + // so rule 2 (generated-file marker) does not fire — the file gets baseline 1. expect(scores['backend/gen/foo.pb.go']).toBe(1); }); From a5bd00cbf1f909ba19837b89b508378ff09153e9 Mon Sep 17 00:00:00 2001 From: Derek Misler Date: Tue, 23 Jun 2026 20:26:39 +0000 Subject: [PATCH 3/5] fix(score-risk): extend low-risk file pattern to cover lock files, assets, dotfiles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The baseline-score-1 change made any file NOT matched by the old TEST_FILE_RE survive auto-exclusion with score 1. Lock files, binary assets, dotfiles, CSS, LICENSE, and .env.example were not matched and would incorrectly reach the reviewer. Changes: - Rename TEST_FILE_RE → LOW_RISK_FILE_RE to reflect the broader scope - Add LOW_RISK_FILE_EXTRA_RE (RegExp constructor so the source can be split over labelled lines) covering: Lock files: yarn.lock, package-lock.json, Cargo.lock, go.sum, Gemfile.lock, pnpm-lock.yaml, composer.lock, poetry.lock Assets: .svg, .png, .jpg/.jpeg, .gif, .ico, .woff/.woff2, .eot, .ttf CSS: .css Dotfiles: .gitignore, .gitattributes, .editorconfig, .prettierrc[.*], .eslintignore, .dockerignore Misc: LICENSE[.*], .env.example - Add helper isLowRiskFile() that checks both regexes - Update rule 6 comment to say 'low-risk file' instead of 'test/doc/config' Tests (463 total, +36 vs previous): - New describe block for each new category (lock files, assets, CSS, dotfiles, legal/example) — one test per path, plus: - lock file with security keyword still scores 0 (rule 6 beats rule 3) - dotfile with error keyword scores 1 (rule 7 still applies after reset) - Regression cases in the baseline-1 suite confirming yarn.lock, pnpm-lock.yaml, and .gitignore score 0 (not promoted to 1) --- src/score-risk/__tests__/score-risk.test.ts | 88 ++++++++++++++++++++- src/score-risk/score-risk.ts | 67 ++++++++++++++-- 2 files changed, 147 insertions(+), 8 deletions(-) diff --git a/src/score-risk/__tests__/score-risk.test.ts b/src/score-risk/__tests__/score-risk.test.ts index fcacd74..f9ebaf7 100644 --- a/src/score-risk/__tests__/score-risk.test.ts +++ b/src/score-risk/__tests__/score-risk.test.ts @@ -252,7 +252,7 @@ describe('scoreFiles — rule 5: many hunks (>3 hunk headers) → +1', () => { }); }); -// ── Rule 6: test/doc/config file → reset score to 0 ────────────────────────── +// ── Rule 6: low-risk file → reset score to 0 ─────────────────────────────────── describe('scoreFiles — rule 6: Rust/Ruby test/bench/spec suffixes → reset score to 0', () => { it('Rust file in tests/ directory scores 0 (directory component match)', () => { @@ -313,7 +313,7 @@ describe('scoreFiles — rule 6: Rust/Ruby test/bench/spec suffixes → reset sc }); }); -describe('scoreFiles — rule 6: test/doc/config file → reset score to 0', () => { +describe('scoreFiles — rule 6: existing test/doc/config patterns → reset score to 0', () => { const TEST_PATHS = [ 'src/auth_test.go', 'src/auth.test.ts', @@ -363,6 +363,70 @@ describe('scoreFiles — rule 6: test/doc/config file → reset score to 0', () }); }); +describe('scoreFiles — rule 6: lock files, assets, CSS, dotfiles → reset score to 0', () => { + const LOW_RISK_PATHS = [ + // Lock files + 'yarn.lock', + 'package-lock.json', + 'Cargo.lock', + 'go.sum', + 'Gemfile.lock', + 'pnpm-lock.yaml', + 'composer.lock', + 'poetry.lock', + // Binary / asset files + 'assets/logo.svg', + 'public/icon.png', + 'images/hero.jpg', + 'images/hero.jpeg', + 'favicon.ico', + 'fonts/myfont.woff', + 'fonts/myfont.woff2', + 'fonts/old.eot', + 'fonts/body.ttf', + // CSS + 'styles/main.css', + // Dotfiles + '.gitignore', + 'src/.gitignore', + '.gitattributes', + '.editorconfig', + '.prettierrc', + '.prettierrc.js', + '.prettierrc.json', + '.eslintignore', + '.dockerignore', + // Legal / example + 'LICENSE', + 'LICENSE.md', + '.env.example', + 'config/.env.example', + ] as const; + + for (const p of LOW_RISK_PATHS) { + it(`"${p}" scores 0 (low-risk file, rule 6 resets)`, () => { + const diff = makeDiff(p, ['+changed']); + const scores = scoreFiles(diff, []); + expect(scores[p]).toBe(0); + }); + } + + it('lock file with security keyword in path still scores 0 (rule 6 wins over rule 3)', () => { + // "auth" in path would add +2 via rule 3, but go.sum is a lock file + // so rule 6 resets to 0 regardless. + const diff = makeDiff('vendor/auth/go.sum', ['+h1:abc']); + const scores = scoreFiles(diff, []); + expect(scores['vendor/auth/go.sum']).toBe(0); + }); + + it('dotfile with error-handling keyword scores 1 (rule 7 still applies after rule 6 reset)', () => { + // Rule 6 resets to 0; rule 7 can still add +1 if the diff contains error keywords. + const diff = makeDiff('.eslintignore', ['+catch']); + const scores = scoreFiles(diff, []); + expect(scores['.eslintignore']).toBe(1); + }); +}); + // ── Rule 7: error-handling patterns → +1 ───────────────────────────────────── describe('scoreFiles — rule 7: error-handling patterns → +1', () => { @@ -514,6 +578,26 @@ describe('scoreFiles — baseline score 1 for plain application files', () => { const scores = scoreFiles(diff, []); expect(scores['internal/handler/http.go']).toBe(1); }); + + // Regression: these were at score 0 before the baseline change and must + // remain at 0 (rule 6 still resets them) — NOT promoted to 1. + it('yarn.lock scores 0 (lock file, not promoted by baseline)', () => { + const diff = makeDiff('yarn.lock', ['+dep@1.2.3']); + const scores = scoreFiles(diff, []); + expect(scores['yarn.lock']).toBe(0); + }); + + it('.gitignore scores 0 (dotfile, not promoted by baseline)', () => { + const diff = makeDiff('.gitignore', ['+node_modules/']); + const scores = scoreFiles(diff, []); + expect(scores['.gitignore']).toBe(0); + }); + + it('pnpm-lock.yaml scores 0 (lock file, not promoted by baseline)', () => { + const diff = makeDiff('pnpm-lock.yaml', ['+ version: 1.0.0']); + const scores = scoreFiles(diff, []); + expect(scores['pnpm-lock.yaml']).toBe(0); + }); }); // ── Edge cases ──────────────────────────────────────────────────────────────── diff --git a/src/score-risk/score-risk.ts b/src/score-risk/score-risk.ts index 43f058f..494ad87 100644 --- a/src/score-risk/score-risk.ts +++ b/src/score-risk/score-risk.ts @@ -15,7 +15,8 @@ * (auth|security|crypto|session|secret|token|password|credential, case-insensitive) * 4. Large change (>100 added lines)→ +2 * 5. Many hunks (>3 hunk headers) → +1 - * 6. Test/doc/config file → score = 0 (resets baseline + 3-5 to zero) + * 6. Low-risk file → score = 0 (resets baseline + 3-5 to zero) + * Tests, docs, config, lock files, binary assets, dotfiles, LICENSE, etc. * 7. Error-handling patterns → +1 * (catch|rescue|except|recover|error|panic in any added line, case-sensitive) * @@ -35,9 +36,55 @@ /** Matches security-sensitive keywords in a file path (case-insensitive). */ const SECURITY_PATH_RE = /auth|security|crypto|session|secret|token|password|credential/i; -/** Matches test, documentation, and config file extensions (case-insensitive). */ -const TEST_FILE_RE = - /_test\.go$|\.test\.[tj]sx?$|\.spec\.[tj]sx?$|test_.*\.py$|\.md$|\.ya?ml$|\.json$|\.toml$|_test\.rs$|_bench\.rs$|_spec\.rs$|_spec\.rb$|(^|\/)(tests?|benches|__tests__|specs?)\//i; +/** + * Matches low-risk file types that should be auto-excluded from review. + * Covers test files, documentation, config/manifest/data files, lock files, + * binary assets (images, fonts), CSS, dotfiles, and misc non-code files. + * + * Renamed from TEST_FILE_RE to reflect the broader scope introduced alongside + * the baseline-score-1 change: with a baseline of 1 any file not matched here + * would survive auto-exclusion, so this list must be comprehensive. + * + * Categories: + * - Test files: _test.go, .test.ts/js/tsx/jsx, .spec.ts/js/tsx/jsx, + * test_*.py, _test.rs, _bench.rs, _spec.rs, _spec.rb, + * tests/, benches/, __tests__/, specs/ directories + * - Documentation: .md + * - Config/data: .yml, .yaml, .json, .toml, .css + * - Lock files: yarn.lock, package-lock.json, Cargo.lock, go.sum, + * Gemfile.lock, pnpm-lock.yaml, composer.lock, poetry.lock + * - Assets: .svg, .png, .jpg, .jpeg, .gif, .ico, + * .woff, .woff2, .eot, .ttf + * - Dotfiles: .gitignore, .gitattributes, .editorconfig, + * .prettierrc[.*], .eslintignore, .dockerignore + * - Misc: LICENSE[.*], .env.example + */ +const LOW_RISK_FILE_RE = + // Test files + /_test\.go$|\.test\.[tj]sx?$|\.spec\.[tj]sx?$|test_.*\.py$|_test\.rs$|_bench\.rs$|_spec\.rs$|_spec\.rb$|(^|\/)(tests?|benches|__tests__|specs?)\//i; + +/** + * Extended low-risk pattern covering everything LOW_RISK_FILE_RE covers plus + * lock files, binary assets, CSS, dotfiles, and misc non-code files. + * Built as a RegExp constructor call so the source can be split across lines. + */ +const LOW_RISK_FILE_EXTRA_RE = new RegExp( + // Documentation + '\\.md$' + + // Config / manifests / data / CSS + '|\\.ya?ml$|\\.json$|\\.toml$|\\.css$' + + // Lock files + '|yarn\\.lock$|package-lock\\.json$|Cargo\\.lock$|go\\.sum$' + + '|Gemfile\\.lock$|pnpm-lock\\.yaml$|composer\\.lock$|poetry\\.lock$' + + // Binary / asset files + '|\\.svg$|\\.png$|\\.jpe?g$|\\.gif$|\\.ico$|\\.woff2?$|\\.eot$|\\.ttf$' + + // Dotfiles and project-root config files + '|(^|\\/)\\.gitignore$|(^|\\/)\\.gitattributes$|(^|\\/)\\.editorconfig$' + + '|(^|\\/)\\.prettierrc(\\.\\w+)?$|(^|\\/)\\.eslintignore$|(^|\\/)\\.dockerignore$' + + // Misc: legal / example files + '|(^|\\/)LICENSE(\\.\\w+)?$|(^|\\/)\\.env\\.example$', + 'i', +); /** Matches error-handling keywords in diff hunk lines (case-sensitive, matching bash awk). */ const ERROR_PATTERN_RE = /catch|rescue|except|recover|error|panic/; @@ -161,6 +208,14 @@ function countErrorHandlingLines(diffContent: string, filePath: string): number return count; } +/** + * Return true if `filePath` matches any low-risk pattern (rule 6). + * Checks both the literal regex and the constructor-built extended regex. + */ +function isLowRiskFile(filePath: string): boolean { + return LOW_RISK_FILE_RE.test(filePath) || LOW_RISK_FILE_EXTRA_RE.test(filePath); +} + // --------------------------------------------------------------------------- // Public API // --------------------------------------------------------------------------- @@ -218,8 +273,8 @@ export function scoreFiles(diffContent: string, excludePrefixes: string[]): Risk // Rule 5: many hunks (>3 hunk headers) → +1. if (hunks > 3) score += 1; - // Rule 6: test/doc/config file → reset score to 0. - if (TEST_FILE_RE.test(path)) score = 0; + // Rule 6: low-risk file (test/doc/config/lock/asset/dotfile) → reset score to 0. + if (isLowRiskFile(path)) score = 0; // Rule 7: error-handling patterns in added lines → +1. if (countErrorHandlingLines(diffContent, path) > 0) score += 1; From 1974ee3bb1c23c4b7368c669817851d5cc060533 Mon Sep 17 00:00:00 2001 From: Derek Misler Date: Tue, 23 Jun 2026 21:05:21 +0000 Subject: [PATCH 4/5] fix(auto-filter-diff): never exclude all files when all score 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, Phase 1 could remove every file from the diff when all changed files scored 0 (e.g. a PR touching only lock files, CSS, or dotfiles). The downstream steps were then silently skipped, leaving the reviewer with no feedback at all. Fix: after Phase 1 filtering, if keptSections would be empty and the original diff had sections, reset to keeping all sections (skip Phase 1 for this diff). Phase 2 (progressive cap) still applies, so timeout protection is preserved. Behaviour: Mixed PR (some low-risk, some high-risk) → Phase 1 removes score-0 files ✓ All-low-risk PR (e.g. only lock files) → Phase 1 kept all instead ✓ Large all-low-risk PR → Phase 1 kept all, Phase 2 trims ✓ New AutoFilterResult field allFilesKept: true signals the fallback fired. index.ts logs: 'All files scored 0 — keeping all files for review'. Tests: 8 new cases covering single score-0 file, lock-file-only PR, all-score-0 + Phase 2 cap, round-trip fidelity, and allFilesKept=false for normal/empty/no-exclusion cases. Total: 470 (all passing). --- .../__tests__/auto-filter-diff.test.ts | 103 +++++++++++++++++- src/auto-filter-diff/auto-filter-diff.ts | 33 +++++- src/auto-filter-diff/index.ts | 6 + 3 files changed, 135 insertions(+), 7 deletions(-) diff --git a/src/auto-filter-diff/__tests__/auto-filter-diff.test.ts b/src/auto-filter-diff/__tests__/auto-filter-diff.test.ts index 9f4350d..e19f4a8 100644 --- a/src/auto-filter-diff/__tests__/auto-filter-diff.test.ts +++ b/src/auto-filter-diff/__tests__/auto-filter-diff.test.ts @@ -103,14 +103,30 @@ describe('autoFilterDiff — phase 1: auto-exclude score-0 files', () => { expect(result.remainingFiles).toBe(1); }); - it('all files excluded returns empty string', () => { + it('when ALL files are score-0, keeps every file instead of excluding them all (allFilesKept=true)', () => { + // The key fix: a PR where every changed file is low-risk should still + // be reviewed. Phase 1 detects the all-excluded condition and falls back + // to keeping all sections; autoExcludedFiles is empty, allFilesKept=true. const diff = makeDiff('src/a.go') + makeDiff('src/b.go'); const riskScores = { 'src/a.go': 0, 'src/b.go': 0 }; const result = autoFilterDiff(diff, riskScores, 0); - expect(result.filtered).toBe(''); - expect(result.remainingFiles).toBe(0); - expect(result.autoExcludedFiles).toHaveLength(2); + expect(result.allFilesKept).toBe(true); + expect(result.autoExcludedFiles).toHaveLength(0); + expect(result.remainingFiles).toBe(2); + expect(result.filtered).toContain('src/a.go'); + expect(result.filtered).toContain('src/b.go'); + }); + + it('when SOME files are score-0, normal exclusion applies (allFilesKept=false)', () => { + // Mixed PR: low-risk file is excluded, high-risk file is kept. + const diff = makeDiff('src/gen.pb.go') + makeDiff('src/auth/handler.go'); + const riskScores = { 'src/gen.pb.go': 0, 'src/auth/handler.go': 2 }; + const result = autoFilterDiff(diff, riskScores, 0); + + expect(result.allFilesKept).toBe(false); + expect(result.autoExcludedFiles).toEqual(['src/gen.pb.go']); + expect(result.remainingFiles).toBe(1); }); it('reports correct originalLines', () => { @@ -124,6 +140,84 @@ describe('autoFilterDiff — phase 1: auto-exclude score-0 files', () => { }); }); +// ═════════════════════════════════════════════════════════════════════════════ +// Phase 1 — never-exclude-all guarantee +// ═════════════════════════════════════════════════════════════════════════════ + +describe('autoFilterDiff — never exclude all files (all-score-0 fallback)', () => { + it('single score-0 file: kept (would otherwise exclude everything)', () => { + const diff = makeDiff('yarn.lock'); + const riskScores = { 'yarn.lock': 0 }; + const result = autoFilterDiff(diff, riskScores, 0); + + expect(result.allFilesKept).toBe(true); + expect(result.autoExcludedFiles).toHaveLength(0); + expect(result.remainingFiles).toBe(1); + expect(result.filtered).toContain('yarn.lock'); + }); + + it('many score-0 files (e.g. lock file PR): all kept', () => { + const diff = + makeDiff('yarn.lock') + + makeDiff('package-lock.json') + + makeDiff('Cargo.lock') + + makeDiff('go.sum'); + const riskScores = { 'yarn.lock': 0, 'package-lock.json': 0, 'Cargo.lock': 0, 'go.sum': 0 }; + const result = autoFilterDiff(diff, riskScores, 0); + + expect(result.allFilesKept).toBe(true); + expect(result.autoExcludedFiles).toHaveLength(0); + expect(result.remainingFiles).toBe(4); + expect(result.filtered).toContain('yarn.lock'); + expect(result.filtered).toContain('package-lock.json'); + }); + + it('all score-0 but Phase 2 cap still trims (timeout protection preserved)', () => { + // Four score-0 files whose total exceeds the cap: Phase 1 falls back to + // keeping all, then Phase 2 removes the lowest-score files until the cap fits. + const diff = + makeDiff('src/a.go', 80) + // score 0 → all-kept fallback + makeDiff('src/b.go', 80) + // score 0 + makeDiff('src/c.go', 80) + // score 0 + makeDiff('src/d.go', 80); // score 0 + const riskScores = { 'src/a.go': 0, 'src/b.go': 0, 'src/c.go': 0, 'src/d.go': 0 }; + // Combined ~344 lines; cap at 100 → Phase 2 removes some + const result = autoFilterDiff(diff, riskScores, 100); + + expect(result.allFilesKept).toBe(true); // Phase 1 fell back + expect(result.autoExcludedFiles).toHaveLength(0); + expect(result.remainingFiles).toBeGreaterThanOrEqual(1); // Phase 2 trimmed but kept ≥1 + expect(result.remainingLines).toBeLessThanOrEqual(100 + 90); // rough check + expect(result.progressivelyExcludedFiles.length).toBeGreaterThan(0); + }); + + it('allFilesKept is false for empty diff', () => { + const result = autoFilterDiff('', {}, 0); + expect(result.allFilesKept).toBe(false); + }); + + it('allFilesKept is false when no files are score-0', () => { + const diff = makeDiff('src/app.ts') + makeDiff('src/server.ts'); + const riskScores = { 'src/app.ts': 1, 'src/server.ts': 2 }; + const result = autoFilterDiff(diff, riskScores, 0); + + expect(result.allFilesKept).toBe(false); + expect(result.autoExcludedFiles).toHaveLength(0); + expect(result.remainingFiles).toBe(2); + }); + + it('filtered output is identical to original diff when all-kept fallback fires', () => { + // Round-trip: the fallback must return the original diff unchanged + // (Phase 2 disabled via cap=0). + const diff = makeDiff('styles/main.css') + makeDiff('.gitignore'); + const riskScores = { 'styles/main.css': 0, '.gitignore': 0 }; + const result = autoFilterDiff(diff, riskScores, 0); + + expect(result.allFilesKept).toBe(true); + expect(result.filtered).toBe(diff); + }); +}); + // ═════════════════════════════════════════════════════════════════════════════ // Phase 2 — progressive cap // ═════════════════════════════════════════════════════════════════════════════ @@ -248,6 +342,7 @@ describe('autoFilterDiff — edge cases', () => { expect(result.remainingFiles).toBe(0); expect(result.remainingLines).toBe(0); expect(result.originalLines).toBe(0); + expect(result.allFilesKept).toBe(false); }); it('diff with no matching risk scores is passed through unchanged', () => { diff --git a/src/auto-filter-diff/auto-filter-diff.ts b/src/auto-filter-diff/auto-filter-diff.ts index f3bf17e..4fe8962 100644 --- a/src/auto-filter-diff/auto-filter-diff.ts +++ b/src/auto-filter-diff/auto-filter-diff.ts @@ -9,6 +9,9 @@ * Phase 1 — Auto-exclude score-0 files: * Remove every file section whose path appears in `riskScores` with a value * of 0. Files NOT present in `riskScores` are kept (unknown = needs review). + * Exception: if Phase 1 would remove ALL files, keep all instead — a PR + * where every changed file is low-risk still deserves a review. + * Phase 2 still applies in that case, so timeout protection is preserved. * * Phase 2 — Progressive cap (only when maxDiffLines > 0): * If the remaining diff exceeds `maxDiffLines` after Phase 1, sort the kept @@ -35,6 +38,11 @@ export interface AutoFilterResult { remainingLines: number; /** Line count of the original diff (split by '\n'). */ originalLines: number; + /** + * True when Phase 1 would have excluded everything and was skipped. + * The review will cover all files; Phase 2 (progressive cap) still applies. + */ + allFilesKept: boolean; } // --------------------------------------------------------------------------- @@ -132,6 +140,7 @@ export function autoFilterDiff( remainingFiles: 0, remainingLines: 0, originalLines: 0, + allFilesKept: false, }; } @@ -139,15 +148,32 @@ export function autoFilterDiff( const { preamble, sections } = parseSections(diffContent); // ── Phase 1: auto-exclude score-0 files ──────────────────────────────────── - const autoExcludedFiles: string[] = []; - let keptSections = sections.filter((section) => { + const candidateExcluded: string[] = []; + const afterPhase1 = sections.filter((section) => { if (section.path in riskScores && riskScores[section.path] === 0) { - autoExcludedFiles.push(section.path); + candidateExcluded.push(section.path); return false; } return true; }); + // If Phase 1 would remove every file, keep all files instead. + // A PR where every changed file is low-risk still deserves a review — the + // reviewer should see it rather than silently skipping the whole PR. + // Phase 2 (progressive cap) still applies to bound review size. + let allFilesKept = false; + let autoExcludedFiles: string[]; + let keptSections: typeof sections; + + if (afterPhase1.length === 0 && sections.length > 0) { + allFilesKept = true; + autoExcludedFiles = []; // nothing actually excluded + keptSections = sections; // keep everything + } else { + autoExcludedFiles = candidateExcluded; + keptSections = afterPhase1; + } + // ── Phase 2: progressive cap ─────────────────────────────────────────────── const progressivelyExcludedFiles: string[] = []; @@ -188,5 +214,6 @@ export function autoFilterDiff( remainingFiles: keptSections.length, remainingLines, originalLines, + allFilesKept, }; } diff --git a/src/auto-filter-diff/index.ts b/src/auto-filter-diff/index.ts index 0507768..607f295 100644 --- a/src/auto-filter-diff/index.ts +++ b/src/auto-filter-diff/index.ts @@ -53,6 +53,12 @@ try { process.stderr.write(`⏭️ Auto-excluded (score 0): ${path}\n`); } + if (result.allFilesKept) { + process.stderr.write( + 'ℹ️ All files scored 0 — keeping all files for review (Phase 2 cap still applies)\n', + ); + } + for (const path of result.progressivelyExcludedFiles) { const score = riskScores[path] ?? 'unknown'; process.stderr.write(`⏭️ Progressive cap (score ${score}): ${path}\n`); From 5ed3aa7cc11e302e7c3d3cf6a0b90ac2ceda2797 Mon Sep 17 00:00:00 2001 From: Derek Misler Date: Tue, 23 Jun 2026 21:08:36 +0000 Subject: [PATCH 5/5] style: fix trailing spaces in auto-filter-diff comments (biome) --- src/auto-filter-diff/auto-filter-diff.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/auto-filter-diff/auto-filter-diff.ts b/src/auto-filter-diff/auto-filter-diff.ts index 4fe8962..c497904 100644 --- a/src/auto-filter-diff/auto-filter-diff.ts +++ b/src/auto-filter-diff/auto-filter-diff.ts @@ -167,8 +167,8 @@ export function autoFilterDiff( if (afterPhase1.length === 0 && sections.length > 0) { allFilesKept = true; - autoExcludedFiles = []; // nothing actually excluded - keptSections = sections; // keep everything + autoExcludedFiles = []; // nothing actually excluded + keptSections = sections; // keep everything } else { autoExcludedFiles = candidateExcluded; keptSections = afterPhase1;