diff --git a/src/auto-filter-diff/__tests__/auto-filter-diff.test.ts b/src/auto-filter-diff/__tests__/auto-filter-diff.test.ts index 9f4350d..e19f4a8 100644 --- a/src/auto-filter-diff/__tests__/auto-filter-diff.test.ts +++ b/src/auto-filter-diff/__tests__/auto-filter-diff.test.ts @@ -103,14 +103,30 @@ describe('autoFilterDiff — phase 1: auto-exclude score-0 files', () => { expect(result.remainingFiles).toBe(1); }); - it('all files excluded returns empty string', () => { + it('when ALL files are score-0, keeps every file instead of excluding them all (allFilesKept=true)', () => { + // The key fix: a PR where every changed file is low-risk should still + // be reviewed. Phase 1 detects the all-excluded condition and falls back + // to keeping all sections; autoExcludedFiles is empty, allFilesKept=true. const diff = makeDiff('src/a.go') + makeDiff('src/b.go'); const riskScores = { 'src/a.go': 0, 'src/b.go': 0 }; const result = autoFilterDiff(diff, riskScores, 0); - expect(result.filtered).toBe(''); - expect(result.remainingFiles).toBe(0); - expect(result.autoExcludedFiles).toHaveLength(2); + expect(result.allFilesKept).toBe(true); + expect(result.autoExcludedFiles).toHaveLength(0); + expect(result.remainingFiles).toBe(2); + expect(result.filtered).toContain('src/a.go'); + expect(result.filtered).toContain('src/b.go'); + }); + + it('when SOME files are score-0, normal exclusion applies (allFilesKept=false)', () => { + // Mixed PR: low-risk file is excluded, high-risk file is kept. + const diff = makeDiff('src/gen.pb.go') + makeDiff('src/auth/handler.go'); + const riskScores = { 'src/gen.pb.go': 0, 'src/auth/handler.go': 2 }; + const result = autoFilterDiff(diff, riskScores, 0); + + expect(result.allFilesKept).toBe(false); + expect(result.autoExcludedFiles).toEqual(['src/gen.pb.go']); + expect(result.remainingFiles).toBe(1); }); it('reports correct originalLines', () => { @@ -124,6 +140,84 @@ describe('autoFilterDiff — phase 1: auto-exclude score-0 files', () => { }); }); +// ═════════════════════════════════════════════════════════════════════════════ +// Phase 1 — never-exclude-all guarantee +// ═════════════════════════════════════════════════════════════════════════════ + +describe('autoFilterDiff — never exclude all files (all-score-0 fallback)', () => { + it('single score-0 file: kept (would otherwise exclude everything)', () => { + const diff = makeDiff('yarn.lock'); + const riskScores = { 'yarn.lock': 0 }; + const result = autoFilterDiff(diff, riskScores, 0); + + expect(result.allFilesKept).toBe(true); + expect(result.autoExcludedFiles).toHaveLength(0); + expect(result.remainingFiles).toBe(1); + expect(result.filtered).toContain('yarn.lock'); + }); + + it('many score-0 files (e.g. lock file PR): all kept', () => { + const diff = + makeDiff('yarn.lock') + + makeDiff('package-lock.json') + + makeDiff('Cargo.lock') + + makeDiff('go.sum'); + const riskScores = { 'yarn.lock': 0, 'package-lock.json': 0, 'Cargo.lock': 0, 'go.sum': 0 }; + const result = autoFilterDiff(diff, riskScores, 0); + + expect(result.allFilesKept).toBe(true); + expect(result.autoExcludedFiles).toHaveLength(0); + expect(result.remainingFiles).toBe(4); + expect(result.filtered).toContain('yarn.lock'); + expect(result.filtered).toContain('package-lock.json'); + }); + + it('all score-0 but Phase 2 cap still trims (timeout protection preserved)', () => { + // Four score-0 files whose total exceeds the cap: Phase 1 falls back to + // keeping all, then Phase 2 removes the lowest-score files until the cap fits. + const diff = + makeDiff('src/a.go', 80) + // score 0 → all-kept fallback + makeDiff('src/b.go', 80) + // score 0 + makeDiff('src/c.go', 80) + // score 0 + makeDiff('src/d.go', 80); // score 0 + const riskScores = { 'src/a.go': 0, 'src/b.go': 0, 'src/c.go': 0, 'src/d.go': 0 }; + // Combined ~344 lines; cap at 100 → Phase 2 removes some + const result = autoFilterDiff(diff, riskScores, 100); + + expect(result.allFilesKept).toBe(true); // Phase 1 fell back + expect(result.autoExcludedFiles).toHaveLength(0); + expect(result.remainingFiles).toBeGreaterThanOrEqual(1); // Phase 2 trimmed but kept ≥1 + expect(result.remainingLines).toBeLessThanOrEqual(100 + 90); // rough check + expect(result.progressivelyExcludedFiles.length).toBeGreaterThan(0); + }); + + it('allFilesKept is false for empty diff', () => { + const result = autoFilterDiff('', {}, 0); + expect(result.allFilesKept).toBe(false); + }); + + it('allFilesKept is false when no files are score-0', () => { + const diff = makeDiff('src/app.ts') + makeDiff('src/server.ts'); + const riskScores = { 'src/app.ts': 1, 'src/server.ts': 2 }; + const result = autoFilterDiff(diff, riskScores, 0); + + expect(result.allFilesKept).toBe(false); + expect(result.autoExcludedFiles).toHaveLength(0); + expect(result.remainingFiles).toBe(2); + }); + + it('filtered output is identical to original diff when all-kept fallback fires', () => { + // Round-trip: the fallback must return the original diff unchanged + // (Phase 2 disabled via cap=0). + const diff = makeDiff('styles/main.css') + makeDiff('.gitignore'); + const riskScores = { 'styles/main.css': 0, '.gitignore': 0 }; + const result = autoFilterDiff(diff, riskScores, 0); + + expect(result.allFilesKept).toBe(true); + expect(result.filtered).toBe(diff); + }); +}); + // ═════════════════════════════════════════════════════════════════════════════ // Phase 2 — progressive cap // ═════════════════════════════════════════════════════════════════════════════ @@ -248,6 +342,7 @@ describe('autoFilterDiff — edge cases', () => { expect(result.remainingFiles).toBe(0); expect(result.remainingLines).toBe(0); expect(result.originalLines).toBe(0); + expect(result.allFilesKept).toBe(false); }); it('diff with no matching risk scores is passed through unchanged', () => { diff --git a/src/auto-filter-diff/auto-filter-diff.ts b/src/auto-filter-diff/auto-filter-diff.ts index f3bf17e..c497904 100644 --- a/src/auto-filter-diff/auto-filter-diff.ts +++ b/src/auto-filter-diff/auto-filter-diff.ts @@ -9,6 +9,9 @@ * Phase 1 — Auto-exclude score-0 files: * Remove every file section whose path appears in `riskScores` with a value * of 0. Files NOT present in `riskScores` are kept (unknown = needs review). + * Exception: if Phase 1 would remove ALL files, keep all instead — a PR + * where every changed file is low-risk still deserves a review. + * Phase 2 still applies in that case, so timeout protection is preserved. * * Phase 2 — Progressive cap (only when maxDiffLines > 0): * If the remaining diff exceeds `maxDiffLines` after Phase 1, sort the kept @@ -35,6 +38,11 @@ export interface AutoFilterResult { remainingLines: number; /** Line count of the original diff (split by '\n'). */ originalLines: number; + /** + * True when Phase 1 would have excluded everything and was skipped. + * The review will cover all files; Phase 2 (progressive cap) still applies. + */ + allFilesKept: boolean; } // --------------------------------------------------------------------------- @@ -132,6 +140,7 @@ export function autoFilterDiff( remainingFiles: 0, remainingLines: 0, originalLines: 0, + allFilesKept: false, }; } @@ -139,15 +148,32 @@ export function autoFilterDiff( const { preamble, sections } = parseSections(diffContent); // ── Phase 1: auto-exclude score-0 files ──────────────────────────────────── - const autoExcludedFiles: string[] = []; - let keptSections = sections.filter((section) => { + const candidateExcluded: string[] = []; + const afterPhase1 = sections.filter((section) => { if (section.path in riskScores && riskScores[section.path] === 0) { - autoExcludedFiles.push(section.path); + candidateExcluded.push(section.path); return false; } return true; }); + // If Phase 1 would remove every file, keep all files instead. + // A PR where every changed file is low-risk still deserves a review — the + // reviewer should see it rather than silently skipping the whole PR. + // Phase 2 (progressive cap) still applies to bound review size. + let allFilesKept = false; + let autoExcludedFiles: string[]; + let keptSections: typeof sections; + + if (afterPhase1.length === 0 && sections.length > 0) { + allFilesKept = true; + autoExcludedFiles = []; // nothing actually excluded + keptSections = sections; // keep everything + } else { + autoExcludedFiles = candidateExcluded; + keptSections = afterPhase1; + } + // ── Phase 2: progressive cap ─────────────────────────────────────────────── const progressivelyExcludedFiles: string[] = []; @@ -188,5 +214,6 @@ export function autoFilterDiff( remainingFiles: keptSections.length, remainingLines, originalLines, + allFilesKept, }; } diff --git a/src/auto-filter-diff/index.ts b/src/auto-filter-diff/index.ts index 0507768..607f295 100644 --- a/src/auto-filter-diff/index.ts +++ b/src/auto-filter-diff/index.ts @@ -53,6 +53,12 @@ try { process.stderr.write(`⏭️ Auto-excluded (score 0): ${path}\n`); } + if (result.allFilesKept) { + process.stderr.write( + 'ℹ️ All files scored 0 — keeping all files for review (Phase 2 cap still applies)\n', + ); + } + for (const path of result.progressivelyExcludedFiles) { const score = riskScores[path] ?? 'unknown'; process.stderr.write(`⏭️ Progressive cap (score ${score}): ${path}\n`); diff --git a/src/score-risk/__tests__/score-risk.test.ts b/src/score-risk/__tests__/score-risk.test.ts index a532e27..f9ebaf7 100644 --- a/src/score-risk/__tests__/score-risk.test.ts +++ b/src/score-risk/__tests__/score-risk.test.ts @@ -7,6 +7,10 @@ * Each scoring rule is tested in isolation first (single-variable control), * then in combination. Edge cases and both zero-score exit paths (exclude-paths * prefix match and generated-file marker) are verified independently. + * + * Baseline score is 1 for any file that survives rules 1, 2, and 6 — ensuring + * plain application files are never silently auto-excluded alongside + * intentionally-low-risk files (tests, docs, generated code, JSON configs). */ import { describe, expect, it } from 'vitest'; import { parseExcludePrefixes, scoreFiles } from '../score-risk.js'; @@ -80,8 +84,9 @@ describe('scoreFiles — rule 1: exclude-paths prefix → score 0', () => { }); it('security-path file in excluded prefix still scores 0 (prefix wins)', () => { - // "session_service.pb.go" would score 2 via the security-path rule, - // but the exclude-paths rule fires first and short-circuits to 0. + // Without an exclude prefix, "session_service.pb.go" would score 3 + // (baseline 1 + security-path +2), but the exclude-paths rule fires + // first and short-circuits to 0. const diff = makeLargeDiff('backend/gen/session_service.pb.go', 200); const scores = scoreFiles(diff, ['backend/gen/']); expect(scores['backend/gen/session_service.pb.go']).toBe(0); @@ -91,7 +96,7 @@ describe('scoreFiles — rule 1: exclude-paths prefix → score 0', () => { const diff = makeDiff('backend/gen/foo.pb.go') + makeDiff('src/auth/handler.go'); const scores = scoreFiles(diff, ['backend/gen/']); expect(scores['backend/gen/foo.pb.go']).toBe(0); - expect(scores['src/auth/handler.go']).toBe(2); // security path + expect(scores['src/auth/handler.go']).toBe(3); // baseline 1 + security path +2 }); it('multiple prefixes — file matching any is scored 0', () => { @@ -102,7 +107,7 @@ describe('scoreFiles — rule 1: exclude-paths prefix → score 0', () => { const scores = scoreFiles(diff, ['backend/gen/', 'frontend/src/gen/']); expect(scores['backend/gen/foo.pb.go']).toBe(0); expect(scores['frontend/src/gen/api.ts']).toBe(0); - expect(scores['src/real.go']).toBe(0); // no rules fire for a plain file + expect(scores['src/real.go']).toBe(1); // baseline 1: plain file, no signals }); }); @@ -152,8 +157,8 @@ describe('scoreFiles — rule 2: generated-file marker → score 0', () => { '+// Code generated (too late to trigger marker check)', ]); const scores = scoreFiles(diff, []); - // Security path +2; no other rules fired. - expect(scores['src/auth/handler.go']).toBe(2); + // Baseline 1 + security path +2 = 3; marker too late to fire. + expect(scores['src/auth/handler.go']).toBe(3); }); it('marker on context line (not a "+" line) does NOT trigger → scored normally', () => { @@ -163,8 +168,8 @@ describe('scoreFiles — rule 2: generated-file marker → score 0', () => { '+real change', ]); const scores = scoreFiles(diff, []); - // Security path matches "auth" → +2; no marker on added line. - expect(scores['src/auth/gen.go']).toBe(2); + // Baseline 1 + security path matches "auth" → +2 = 3; no marker on added line. + expect(scores['src/auth/gen.go']).toBe(3); }); }); @@ -183,75 +188,74 @@ describe('scoreFiles — rule 3: security-sensitive path → +2', () => { ] as const; for (const kw of KEYWORDS) { - it(`keyword "${kw}" in path → score 2`, () => { + it(`keyword "${kw}" in path → score 3 (baseline 1 + security +2)`, () => { const diff = makeDiff(`src/${kw}/handler.go`); const scores = scoreFiles(diff, []); - expect(scores[`src/${kw}/handler.go`]).toBe(2); + expect(scores[`src/${kw}/handler.go`]).toBe(3); }); } - it('keyword is case-insensitive (AUTH → 2)', () => { + it('keyword is case-insensitive (AUTH → 3)', () => { const diff = makeDiff('src/AUTH/handler.go'); const scores = scoreFiles(diff, []); - expect(scores['src/AUTH/handler.go']).toBe(2); + expect(scores['src/AUTH/handler.go']).toBe(3); }); - it('no keyword in path → 0', () => { + it('no keyword in path → baseline score 1', () => { const diff = makeDiff('src/utils/helper.go'); const scores = scoreFiles(diff, []); - expect(scores['src/utils/helper.go']).toBe(0); + expect(scores['src/utils/helper.go']).toBe(1); }); }); // ── Rule 4: large change (>100 added lines) → +2 ───────────────────────────── describe('scoreFiles — rule 4: large change (>100 added lines) → +2', () => { - it('101 added lines → score 2', () => { + it('101 added lines → score 3 (baseline 1 + large +2)', () => { const diff = makeLargeDiff('src/big.go', 101); const scores = scoreFiles(diff, []); - expect(scores['src/big.go']).toBe(2); + expect(scores['src/big.go']).toBe(3); }); - it('100 added lines (boundary) → score 0 (not triggered)', () => { + it('100 added lines (boundary) → score 1 (baseline only; rule not triggered)', () => { const diff = makeLargeDiff('src/big.go', 100); const scores = scoreFiles(diff, []); - expect(scores['src/big.go']).toBe(0); + expect(scores['src/big.go']).toBe(1); }); - it('1 added line → score 0', () => { + it('1 added line → score 1 (baseline only)', () => { const diff = makeDiff('src/small.go', ['+one line']); const scores = scoreFiles(diff, []); - expect(scores['src/small.go']).toBe(0); + expect(scores['src/small.go']).toBe(1); }); }); // ── Rule 5: many hunks (>3) → +1 ───────────────────────────────────────────── describe('scoreFiles — rule 5: many hunks (>3 hunk headers) → +1', () => { - it('4 hunks → score 1', () => { + it('4 hunks → score 2 (baseline 1 + hunks +1)', () => { const diff = makeHunksDiff('src/hunky.go', 4); const scores = scoreFiles(diff, []); - expect(scores['src/hunky.go']).toBe(1); + expect(scores['src/hunky.go']).toBe(2); }); - it('3 hunks (boundary) → score 0 (not triggered)', () => { + it('3 hunks (boundary) → score 1 (baseline only; rule not triggered)', () => { const diff = makeHunksDiff('src/hunky.go', 3); const scores = scoreFiles(diff, []); - expect(scores['src/hunky.go']).toBe(0); + expect(scores['src/hunky.go']).toBe(1); }); - it('1 hunk → score 0', () => { + it('1 hunk → score 1 (baseline only)', () => { const diff = makeDiff('src/simple.go', ['+change']); const scores = scoreFiles(diff, []); - expect(scores['src/simple.go']).toBe(0); + expect(scores['src/simple.go']).toBe(1); }); }); -// ── Rule 6: test/doc/config file → reset score to 0 ────────────────────────── +// ── Rule 6: low-risk file → reset score to 0 ─────────────────────────────────── describe('scoreFiles — rule 6: Rust/Ruby test/bench/spec suffixes → reset score to 0', () => { it('Rust file in tests/ directory scores 0 (directory component match)', () => { - // Security keyword "virtiofs" not in path, but tests/ directory resets to 0 const diff = makeDiff('crates/vm/tests/integration/virtiofs.rs'); const scores = scoreFiles(diff, []); expect(scores['crates/vm/tests/integration/virtiofs.rs']).toBe(0); @@ -273,7 +277,7 @@ describe('scoreFiles — rule 6: Rust/Ruby test/bench/spec suffixes → reset sc // "auth" in path would normally add +2 (rule 3), but tests/ directory resets to 0 const diff = makeDiff('src/auth/tests/handler_test.go'); const scores = scoreFiles(diff, []); - // Rule 7 (error handling) can still apply; rule 3 (+2) is reset by rule 6. + // Rule 7 (error handling) can still apply; rule 3 (+2) + baseline (1) are reset by rule 6. // No error-handling keywords in the default diff content, so score stays 0. expect(scores['src/auth/tests/handler_test.go']).toBe(0); }); @@ -309,7 +313,7 @@ describe('scoreFiles — rule 6: Rust/Ruby test/bench/spec suffixes → reset sc }); }); -describe('scoreFiles — rule 6: test/doc/config file → reset score to 0', () => { +describe('scoreFiles — rule 6: existing test/doc/config patterns → reset score to 0', () => { const TEST_PATHS = [ 'src/auth_test.go', 'src/auth.test.ts', @@ -344,7 +348,9 @@ describe('scoreFiles — rule 6: test/doc/config file → reset score to 0', () '', ].join('\n'); const scores = scoreFiles(diff, []); - // Rule 6 resets to 0; rule 7 (error handling) doesn't fire here. + // Rule 6 resets to 0 (overrides baseline + rules 3-5); rule 7 + // (error-handling keywords) can still add +1 after the reset, but + // no error-handling keywords appear in this diff, so the score stays 0. expect(scores[p]).toBe(0); }); } @@ -357,27 +363,91 @@ describe('scoreFiles — rule 6: test/doc/config file → reset score to 0', () }); }); +describe('scoreFiles — rule 6: lock files, assets, CSS, dotfiles → reset score to 0', () => { + const LOW_RISK_PATHS = [ + // Lock files + 'yarn.lock', + 'package-lock.json', + 'Cargo.lock', + 'go.sum', + 'Gemfile.lock', + 'pnpm-lock.yaml', + 'composer.lock', + 'poetry.lock', + // Binary / asset files + 'assets/logo.svg', + 'public/icon.png', + 'images/hero.jpg', + 'images/hero.jpeg', + 'favicon.ico', + 'fonts/myfont.woff', + 'fonts/myfont.woff2', + 'fonts/old.eot', + 'fonts/body.ttf', + // CSS + 'styles/main.css', + // Dotfiles + '.gitignore', + 'src/.gitignore', + '.gitattributes', + '.editorconfig', + '.prettierrc', + '.prettierrc.js', + '.prettierrc.json', + '.eslintignore', + '.dockerignore', + // Legal / example + 'LICENSE', + 'LICENSE.md', + '.env.example', + 'config/.env.example', + ] as const; + + for (const p of LOW_RISK_PATHS) { + it(`"${p}" scores 0 (low-risk file, rule 6 resets)`, () => { + const diff = makeDiff(p, ['+changed']); + const scores = scoreFiles(diff, []); + expect(scores[p]).toBe(0); + }); + } + + it('lock file with security keyword in path still scores 0 (rule 6 wins over rule 3)', () => { + // "auth" in path would add +2 via rule 3, but go.sum is a lock file + // so rule 6 resets to 0 regardless. + const diff = makeDiff('vendor/auth/go.sum', ['+h1:abc']); + const scores = scoreFiles(diff, []); + expect(scores['vendor/auth/go.sum']).toBe(0); + }); + + it('dotfile with error-handling keyword scores 1 (rule 7 still applies after rule 6 reset)', () => { + // Rule 6 resets to 0; rule 7 can still add +1 if the diff contains error keywords. + const diff = makeDiff('.eslintignore', ['+catch']); + const scores = scoreFiles(diff, []); + expect(scores['.eslintignore']).toBe(1); + }); +}); + // ── Rule 7: error-handling patterns → +1 ───────────────────────────────────── describe('scoreFiles — rule 7: error-handling patterns → +1', () => { const KEYWORDS = ['catch', 'rescue', 'except', 'recover', 'error', 'panic'] as const; for (const kw of KEYWORDS) { - it(`keyword "${kw}" in an added line → +1`, () => { + it(`keyword "${kw}" in an added line → baseline 1 + error +1 = 2`, () => { const diff = makeDiff('src/service.go', [`+handle ${kw} here`]); const scores = scoreFiles(diff, []); - expect(scores['src/service.go']).toBe(1); + expect(scores['src/service.go']).toBe(2); }); } - it('keyword in a context line (not added) → no +1', () => { + it('keyword in a context line (not added) → no +1 (baseline 1 only)', () => { // Context line (space-prefixed) doesn't count. const diff = makeDiff('src/service.go', [' existing catch block', '+new line only']); const scores = scoreFiles(diff, []); - expect(scores['src/service.go']).toBe(0); + expect(scores['src/service.go']).toBe(1); }); - it('multiple error-handling lines still only add +1 total', () => { + it('multiple error-handling lines still only add +1 total (baseline 1 + error +1 = 2)', () => { const diff = makeDiff('src/service.go', [ '+catch(err) {', '+ recover();', @@ -385,20 +455,20 @@ describe('scoreFiles — rule 7: error-handling patterns → +1', () => { '}', ]); const scores = scoreFiles(diff, []); - expect(scores['src/service.go']).toBe(1); + expect(scores['src/service.go']).toBe(2); }); }); // ── Combined scoring ────────────────────────────────────────────────────────── describe('scoreFiles — combined rules', () => { - it('security path + large change = 4', () => { + it('security path + large change = 5 (baseline 1 + security +2 + large +2)', () => { const diff = makeLargeDiff('src/auth/handler.go', 150); const scores = scoreFiles(diff, []); - expect(scores['src/auth/handler.go']).toBe(4); // +2 security + +2 large + expect(scores['src/auth/handler.go']).toBe(5); }); - it('security path + large change + many hunks = 5', () => { + it('security path + large change + many hunks = 6 (baseline 1 + security +2 + large +2 + hunks +1)', () => { const lines = Array.from({ length: 110 }, (_, i) => `+line${i}`); const hunkHeaders = Array.from( { length: 4 }, @@ -414,10 +484,10 @@ describe('scoreFiles — combined rules', () => { '', ].join('\n'); const scores = scoreFiles(diff, []); - expect(scores['src/session/service.go']).toBe(5); // +2 security +2 large +1 hunks + expect(scores['src/session/service.go']).toBe(6); }); - it('security path + large change + many hunks + error handling = 6 (max realistic)', () => { + it('security path + large change + many hunks + error handling = 7 (max realistic)', () => { const lines = Array.from({ length: 110 }, (_, i) => `+line${i}`); const hunkHeaders = Array.from( { length: 4 }, @@ -434,18 +504,18 @@ describe('scoreFiles — combined rules', () => { '', ].join('\n'); const scores = scoreFiles(diff, []); - expect(scores['src/session/service.go']).toBe(6); + expect(scores['src/session/service.go']).toBe(7); }); it('multiple files in one diff are scored independently', () => { const diff = - makeDiff('src/auth/handler.go') + // security → 2 - makeLargeDiff('src/utils/big.go', 200) + // large → 2 + makeDiff('src/auth/handler.go') + // baseline 1 + security +2 = 3 + makeLargeDiff('src/utils/big.go', 200) + // baseline 1 + large +2 = 3 makeDiff('backend/gen/foo.pb.go'); // excluded → 0 const scores = scoreFiles(diff, ['backend/gen/']); - expect(scores['src/auth/handler.go']).toBe(2); - expect(scores['src/utils/big.go']).toBe(2); + expect(scores['src/auth/handler.go']).toBe(3); + expect(scores['src/utils/big.go']).toBe(3); expect(scores['backend/gen/foo.pb.go']).toBe(0); }); }); @@ -453,18 +523,80 @@ describe('scoreFiles — combined rules', () => { // ── extractFilePath regression: greedy-regex vs indexOf(' b/') ───────────────── describe('scoreFiles — extractFilePath: indexOf regression for paths containing b/', () => { - it('path with security keyword before a b/ directory component scores 2, not 0', () => { + it('path with security keyword before a b/ directory component scores 3, not 1', () => { // Old greedy regex: replace(/.*b\//, '') on // "diff --git a/src/auth/b/helper.ts b/src/auth/b/helper.ts" // matches everything up to the last 'b/' giving 'helper.ts'. - // 'helper.ts' has no security keyword → score 0. + // 'helper.ts' has no security keyword → baseline score 1 only. // // New indexOf(' b/'): strips "diff --git a/" prefix and splits at the // first ' b/' separator giving 'src/auth/b/helper.ts'. - // 'src/auth/b/helper.ts' matches SECURITY_PATH_RE ('auth') → score 2. + // 'src/auth/b/helper.ts' matches SECURITY_PATH_RE ('auth') → baseline 1 + +2 = 3. const diff = makeDiff('src/auth/b/helper.ts', ['+changed']); const scores = scoreFiles(diff, []); - expect(scores['src/auth/b/helper.ts']).toBe(2); + expect(scores['src/auth/b/helper.ts']).toBe(3); + }); +}); + +// ── Baseline score for plain application files ──────────────────────────────── + +describe('scoreFiles — baseline score 1 for plain application files', () => { + it('plain app file (src/app.ts) with a small change gets score 1, not 0', () => { + // This is the key regression test: ordinary application code must never + // be auto-excluded alongside intentionally-low-risk files. + const diff = makeDiff('src/app.ts', ['+const x = 1;']); + const scores = scoreFiles(diff, []); + expect(scores['src/app.ts']).toBe(1); + }); + + it('plain server file (src/server.ts) scores 1', () => { + const diff = makeDiff('src/server.ts', ['+app.listen(3000);']); + const scores = scoreFiles(diff, []); + expect(scores['src/server.ts']).toBe(1); + }); + + it('plain types file (src/types.ts) scores 1', () => { + const diff = makeDiff('src/types.ts', ['+export type Foo = string;']); + const scores = scoreFiles(diff, []); + expect(scores['src/types.ts']).toBe(1); + }); + + it('React component (src/TeamDetail.tsx) scores 1', () => { + const diff = makeDiff('src/TeamDetail.tsx', ['+export default function TeamDetail() {}']); + const scores = scoreFiles(diff, []); + expect(scores['src/TeamDetail.tsx']).toBe(1); + }); + + it('service file (src/entityEnrichment.ts) scores 1', () => { + const diff = makeDiff('src/entityEnrichment.ts', ['+export function enrich() {}']); + const scores = scoreFiles(diff, []); + expect(scores['src/entityEnrichment.ts']).toBe(1); + }); + + it('plain Go file (internal/handler/http.go) scores 1', () => { + const diff = makeDiff('internal/handler/http.go', ['+func handleRequest() {}']); + const scores = scoreFiles(diff, []); + expect(scores['internal/handler/http.go']).toBe(1); + }); + + // Regression: these were at score 0 before the baseline change and must + // remain at 0 (rule 6 still resets them) — NOT promoted to 1. + it('yarn.lock scores 0 (lock file, not promoted by baseline)', () => { + const diff = makeDiff('yarn.lock', ['+dep@1.2.3']); + const scores = scoreFiles(diff, []); + expect(scores['yarn.lock']).toBe(0); + }); + + it('.gitignore scores 0 (dotfile, not promoted by baseline)', () => { + const diff = makeDiff('.gitignore', ['+node_modules/']); + const scores = scoreFiles(diff, []); + expect(scores['.gitignore']).toBe(0); + }); + + it('pnpm-lock.yaml scores 0 (lock file, not promoted by baseline)', () => { + const diff = makeDiff('pnpm-lock.yaml', ['+ version: 1.0.0']); + const scores = scoreFiles(diff, []); + expect(scores['pnpm-lock.yaml']).toBe(0); }); }); @@ -475,16 +607,18 @@ describe('scoreFiles — edge cases', () => { expect(scoreFiles('', [])).toEqual({}); }); - it('empty exclude prefixes → all files scored normally', () => { + it('empty exclude prefixes → all files scored normally (plain files get baseline 1)', () => { const diff = makeDiff('backend/gen/foo.pb.go'); const scores = scoreFiles(diff, []); - // No security keyword, no large change, no hunks → 0 - expect(scores['backend/gen/foo.pb.go']).toBe(0); + // No security keyword, no large change, no hunks, not a test/doc/config file. + // makeDiff() does not emit a "Code generated" or "DO NOT EDIT" added line, + // so rule 2 (generated-file marker) does not fire — the file gets baseline 1. + expect(scores['backend/gen/foo.pb.go']).toBe(1); }); - it('single plain file with no matching rules → score 0', () => { + it('single plain file with no matching rules → baseline score 1', () => { const diff = makeDiff('src/utils/helper.go', ['+minor tweak']); const scores = scoreFiles(diff, []); - expect(scores['src/utils/helper.go']).toBe(0); + expect(scores['src/utils/helper.go']).toBe(1); }); }); diff --git a/src/score-risk/score-risk.ts b/src/score-risk/score-risk.ts index f0ba741..494ad87 100644 --- a/src/score-risk/score-risk.ts +++ b/src/score-risk/score-risk.ts @@ -15,12 +15,18 @@ * (auth|security|crypto|session|secret|token|password|credential, case-insensitive) * 4. Large change (>100 added lines)→ +2 * 5. Many hunks (>3 hunk headers) → +1 - * 6. Test/doc/config file → score = 0 (resets 3-5 to zero) + * 6. Low-risk file → score = 0 (resets baseline + 3-5 to zero) + * Tests, docs, config, lock files, binary assets, dotfiles, LICENSE, etc. * 7. Error-handling patterns → +1 * (catch|rescue|except|recover|error|panic in any added line, case-sensitive) * + * Files not caught by rules 1, 2, or 6 start with a baseline score of 1 + * so that ordinary application code is never auto-excluded alongside + * intentionally-low-risk files (tests, docs, generated code, JSON configs). + * * Rule 6 resets the running total to 0, then rule 7 can still add 1. - * This faithfully reproduces the existing bash behaviour. + * This faithfully reproduces the existing bash behaviour for test/doc/config + * files while ensuring plain application files always reach the reviewer. */ // --------------------------------------------------------------------------- @@ -30,9 +36,55 @@ /** Matches security-sensitive keywords in a file path (case-insensitive). */ const SECURITY_PATH_RE = /auth|security|crypto|session|secret|token|password|credential/i; -/** Matches test, documentation, and config file extensions (case-insensitive). */ -const TEST_FILE_RE = - /_test\.go$|\.test\.[tj]sx?$|\.spec\.[tj]sx?$|test_.*\.py$|\.md$|\.ya?ml$|\.json$|\.toml$|_test\.rs$|_bench\.rs$|_spec\.rs$|_spec\.rb$|(^|\/)(tests?|benches|__tests__|specs?)\//i; +/** + * Matches low-risk file types that should be auto-excluded from review. + * Covers test files, documentation, config/manifest/data files, lock files, + * binary assets (images, fonts), CSS, dotfiles, and misc non-code files. + * + * Renamed from TEST_FILE_RE to reflect the broader scope introduced alongside + * the baseline-score-1 change: with a baseline of 1 any file not matched here + * would survive auto-exclusion, so this list must be comprehensive. + * + * Categories: + * - Test files: _test.go, .test.ts/js/tsx/jsx, .spec.ts/js/tsx/jsx, + * test_*.py, _test.rs, _bench.rs, _spec.rs, _spec.rb, + * tests/, benches/, __tests__/, specs/ directories + * - Documentation: .md + * - Config/data: .yml, .yaml, .json, .toml, .css + * - Lock files: yarn.lock, package-lock.json, Cargo.lock, go.sum, + * Gemfile.lock, pnpm-lock.yaml, composer.lock, poetry.lock + * - Assets: .svg, .png, .jpg, .jpeg, .gif, .ico, + * .woff, .woff2, .eot, .ttf + * - Dotfiles: .gitignore, .gitattributes, .editorconfig, + * .prettierrc[.*], .eslintignore, .dockerignore + * - Misc: LICENSE[.*], .env.example + */ +const LOW_RISK_FILE_RE = + // Test files + /_test\.go$|\.test\.[tj]sx?$|\.spec\.[tj]sx?$|test_.*\.py$|_test\.rs$|_bench\.rs$|_spec\.rs$|_spec\.rb$|(^|\/)(tests?|benches|__tests__|specs?)\//i; + +/** + * Extended low-risk pattern covering everything LOW_RISK_FILE_RE covers plus + * lock files, binary assets, CSS, dotfiles, and misc non-code files. + * Built as a RegExp constructor call so the source can be split across lines. + */ +const LOW_RISK_FILE_EXTRA_RE = new RegExp( + // Documentation + '\\.md$' + + // Config / manifests / data / CSS + '|\\.ya?ml$|\\.json$|\\.toml$|\\.css$' + + // Lock files + '|yarn\\.lock$|package-lock\\.json$|Cargo\\.lock$|go\\.sum$' + + '|Gemfile\\.lock$|pnpm-lock\\.yaml$|composer\\.lock$|poetry\\.lock$' + + // Binary / asset files + '|\\.svg$|\\.png$|\\.jpe?g$|\\.gif$|\\.ico$|\\.woff2?$|\\.eot$|\\.ttf$' + + // Dotfiles and project-root config files + '|(^|\\/)\\.gitignore$|(^|\\/)\\.gitattributes$|(^|\\/)\\.editorconfig$' + + '|(^|\\/)\\.prettierrc(\\.\\w+)?$|(^|\\/)\\.eslintignore$|(^|\\/)\\.dockerignore$' + + // Misc: legal / example files + '|(^|\\/)LICENSE(\\.\\w+)?$|(^|\\/)\\.env\\.example$', + 'i', +); /** Matches error-handling keywords in diff hunk lines (case-sensitive, matching bash awk). */ const ERROR_PATTERN_RE = /catch|rescue|except|recover|error|panic/; @@ -156,6 +208,14 @@ function countErrorHandlingLines(diffContent: string, filePath: string): number return count; } +/** + * Return true if `filePath` matches any low-risk pattern (rule 6). + * Checks both the literal regex and the constructor-built extended regex. + */ +function isLowRiskFile(filePath: string): boolean { + return LOW_RISK_FILE_RE.test(filePath) || LOW_RISK_FILE_EXTRA_RE.test(filePath); +} + // --------------------------------------------------------------------------- // Public API // --------------------------------------------------------------------------- @@ -198,7 +258,11 @@ export function scoreFiles(diffContent: string, excludePrefixes: string[]): Risk continue; } - let score = 0; + // Baseline: plain application files that don't match any positive rule + // (rules 3-5) or the reset rule (6) still need review — give them score 1 + // so they are not auto-excluded alongside intentionally-low-risk files. + // Rules 1 and 2 already exited early above with score 0; rule 6 resets to 0. + let score = 1; // Rule 3: security-sensitive path → +2. if (SECURITY_PATH_RE.test(path)) score += 2; @@ -209,8 +273,8 @@ export function scoreFiles(diffContent: string, excludePrefixes: string[]): Risk // Rule 5: many hunks (>3 hunk headers) → +1. if (hunks > 3) score += 1; - // Rule 6: test/doc/config file → reset score to 0. - if (TEST_FILE_RE.test(path)) score = 0; + // Rule 6: low-risk file (test/doc/config/lock/asset/dotfile) → reset score to 0. + if (isLowRiskFile(path)) score = 0; // Rule 7: error-handling patterns in added lines → +1. if (countErrorHandlingLines(diffContent, path) > 0) score += 1;