From 15cc94e5921b5416ec682ae550bb6016e0c9666f Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:19:21 -0600 Subject: [PATCH 01/22] refactor: decompose AST analysis visitors and engine into focused helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move nested handler functions to module level in cfg-visitor.js, dataflow-visitor.js, and complexity-visitor.js — reducing cognitive complexity of each factory function from 100-337 down to thin coordinators. Extract WASM pre-parse, visitor setup, result storage, and build delegation from runAnalyses into focused helper functions. Impact: 66 functions changed, 43 affected --- src/ast-analysis/engine.js | 510 ++++--- src/ast-analysis/visitors/cfg-visitor.js | 1284 ++++++++--------- .../visitors/complexity-visitor.js | 274 ++-- src/ast-analysis/visitors/dataflow-visitor.js | 454 +++--- 4 files changed, 1252 insertions(+), 1270 deletions(-) diff --git a/src/ast-analysis/engine.js b/src/ast-analysis/engine.js index 981ec514..76ba8cd2 100644 --- a/src/ast-analysis/engine.js +++ b/src/ast-analysis/engine.js @@ -50,294 +50,227 @@ async function getParserModule() { return _parserModule; } -// ─── Public API ────────────────────────────────────────────────────────── +// ─── WASM pre-parse ───────────────────────────────────────────────────── -/** - * Run all enabled AST analyses in a coordinated pass. - * - * @param {object} db - open better-sqlite3 database (read-write) - * @param {Map} fileSymbols - Map - * @param {string} rootDir - absolute project root path - * @param {object} opts - build options (ast, complexity, cfg, dataflow toggles) - * @param {object} [engineOpts] - engine options - * @returns {Promise<{ astMs: number, complexityMs: number, cfgMs: number, dataflowMs: number }>} - */ -export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { - const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 }; - - const doAst = opts.ast !== false; +async function ensureWasmTreesIfNeeded(fileSymbols, opts) { const doComplexity = opts.complexity !== false; const doCfg = opts.cfg !== false; const doDataflow = opts.dataflow !== false; - if (!doAst && !doComplexity && !doCfg && !doDataflow) return timing; - - const extToLang = buildExtToLangMap(); - - // ── WASM pre-parse for files that need it ─────────────────────────── - // The native engine only handles parsing (symbols, calls, imports). - // Complexity, CFG, and dataflow all require a WASM tree-sitter tree - // for their visitor walks. Without this, incremental rebuilds on the - // native engine silently lose these analyses for changed files (#468). - if (doComplexity || doCfg || doDataflow) { - let needsWasmTrees = false; - for (const [relPath, symbols] of fileSymbols) { - if (symbols._tree) continue; - const ext = path.extname(relPath).toLowerCase(); - const defs = symbols.definitions || []; - - const needsComplexity = - doComplexity && - COMPLEXITY_EXTENSIONS.has(ext) && - defs.some((d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity); - const needsCfg = - doCfg && - CFG_EXTENSIONS.has(ext) && - defs.some( - (d) => - (d.kind === 'function' || d.kind === 'method') && - d.line && - d.cfg !== null && - !Array.isArray(d.cfg?.blocks), - ); - const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext); - - if (needsComplexity || needsCfg || needsDataflow) { - needsWasmTrees = true; - break; - } - } - - if (needsWasmTrees) { - try { - const { ensureWasmTrees } = await getParserModule(); - await ensureWasmTrees(fileSymbols, rootDir); - } catch (err) { - debug(`ensureWasmTrees failed: ${err.message}`); - } - } - } - - // ── Phase 7 Optimization: Unified pre-walk ───────────────────────── - // For files with WASM trees, run all applicable visitors in a SINGLE - // walkWithVisitors call. Store results in the format that buildXxx - // functions already expect as pre-computed data (same fields as native - // engine output). This eliminates ~3 redundant tree traversals per file. - const t0walk = performance.now(); + if (!doComplexity && !doCfg && !doDataflow) return; + let needsWasmTrees = false; for (const [relPath, symbols] of fileSymbols) { - if (!symbols._tree) continue; // No WASM tree — native path handles it - + if (symbols._tree) continue; const ext = path.extname(relPath).toLowerCase(); - const langId = symbols._langId || extToLang.get(ext); - if (!langId) continue; - const defs = symbols.definitions || []; - const visitors = []; - const walkerOpts = { - functionNodeTypes: new Set(), - nestingNodeTypes: new Set(), - getFunctionName: (_node) => null, - }; - - // ─ AST-store visitor ─ - const astTypeMap = AST_TYPE_MAPS.get(langId); - let astVisitor = null; - if (doAst && astTypeMap && WALK_EXTENSIONS.has(ext) && !symbols.astNodes?.length) { - const nodeIdMap = new Map(); - for (const row of bulkNodeIdsByFile(db, relPath)) { - nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); - } - astVisitor = createAstStoreVisitor(astTypeMap, defs, relPath, nodeIdMap); - visitors.push(astVisitor); - } - // ─ Complexity visitor (file-level mode) ─ - const cRules = COMPLEXITY_RULES.get(langId); - const hRules = HALSTEAD_RULES.get(langId); - let complexityVisitor = null; - if (doComplexity && cRules) { - // Only use visitor if some functions lack pre-computed complexity - const needsWasmComplexity = defs.some( - (d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity, - ); - if (needsWasmComplexity) { - complexityVisitor = createComplexityVisitor(cRules, hRules, { - fileLevelWalk: true, - langId, - }); - visitors.push(complexityVisitor); - - // Merge nesting nodes for complexity tracking - // NOTE: do NOT add functionNodes here — funcDepth in the complexity - // visitor already tracks function-level nesting. Adding them to - // nestingNodeTypes would inflate context.nestingLevel by +1 inside - // every function body, double-counting in cognitive += 1 + nestingLevel. - for (const t of cRules.nestingNodes) walkerOpts.nestingNodeTypes.add(t); - - // Provide getFunctionName for complexity visitor - const dfRules = DATAFLOW_RULES.get(langId); - walkerOpts.getFunctionName = (node) => { - // Try complexity rules' function name field first - const nameNode = node.childForFieldName('name'); - if (nameNode) return nameNode.text; - // Fall back to dataflow rules' richer name extraction - if (dfRules) return getFuncName(node, dfRules); - return null; - }; - } - } - - // ─ CFG visitor ─ - const cfgRulesForLang = CFG_RULES.get(langId); - let cfgVisitor = null; - if (doCfg && cfgRulesForLang && CFG_EXTENSIONS.has(ext)) { - // Only use visitor if some functions lack pre-computed CFG - const needsWasmCfg = defs.some( + const needsComplexity = + doComplexity && + COMPLEXITY_EXTENSIONS.has(ext) && + defs.some((d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity); + const needsCfg = + doCfg && + CFG_EXTENSIONS.has(ext) && + defs.some( (d) => (d.kind === 'function' || d.kind === 'method') && d.line && d.cfg !== null && !Array.isArray(d.cfg?.blocks), ); - if (needsWasmCfg) { - cfgVisitor = createCfgVisitor(cfgRulesForLang); - visitors.push(cfgVisitor); - } + const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext); + + if (needsComplexity || needsCfg || needsDataflow) { + needsWasmTrees = true; + break; } + } - // ─ Dataflow visitor ─ - const dfRules = DATAFLOW_RULES.get(langId); - let dataflowVisitor = null; - if (doDataflow && dfRules && DATAFLOW_EXTENSIONS.has(ext) && !symbols.dataflow) { - dataflowVisitor = createDataflowVisitor(dfRules); - visitors.push(dataflowVisitor); + if (needsWasmTrees) { + try { + const { ensureWasmTrees } = await getParserModule(); + await ensureWasmTrees(fileSymbols); + } catch (err) { + debug(`ensureWasmTrees failed: ${err.message}`); } + } +} - // ─ Run unified walk if we have visitors ─ - if (visitors.length === 0) continue; +// ─── Per-file visitor setup ───────────────────────────────────────────── - const results = walkWithVisitors(symbols._tree.rootNode, visitors, langId, walkerOpts); +function setupVisitors(db, relPath, symbols, langId, opts) { + const ext = path.extname(relPath).toLowerCase(); + const defs = symbols.definitions || []; + const doAst = opts.ast !== false; + const doComplexity = opts.complexity !== false; + const doCfg = opts.cfg !== false; + const doDataflow = opts.dataflow !== false; - // ─ Store AST results (buildAstNodes will find symbols.astNodes and skip its walk) ─ - if (astVisitor) { - const astRows = results['ast-store'] || []; - if (astRows.length > 0) { - // Store in the format buildAstNodes expects for the native path - symbols.astNodes = astRows; - } + const visitors = []; + const walkerOpts = { + functionNodeTypes: new Set(), + nestingNodeTypes: new Set(), + getFunctionName: (_node) => null, + }; + + // AST-store visitor + let astVisitor = null; + const astTypeMap = AST_TYPE_MAPS.get(langId); + if (doAst && astTypeMap && WALK_EXTENSIONS.has(ext) && !symbols.astNodes?.length) { + const nodeIdMap = new Map(); + for (const row of bulkNodeIdsByFile(db, relPath)) { + nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); } + astVisitor = createAstStoreVisitor(astTypeMap, defs, relPath, nodeIdMap); + visitors.push(astVisitor); + } - // ─ Store complexity results on definitions (buildComplexityMetrics will find def.complexity) ─ - if (complexityVisitor) { - const complexityResults = results.complexity || []; - // Match results back to definitions by function start line - // Store the full result (metrics + funcNode) for O(1) lookup - const resultByLine = new Map(); - for (const r of complexityResults) { - if (r.funcNode) { - const line = r.funcNode.startPosition.row + 1; - if (!resultByLine.has(line)) resultByLine.set(line, []); - resultByLine.get(line).push(r); - } - } - for (const def of defs) { - if ((def.kind === 'function' || def.kind === 'method') && def.line && !def.complexity) { - const candidates = resultByLine.get(def.line); - const funcResult = !candidates - ? undefined - : candidates.length === 1 - ? candidates[0] - : (candidates.find((r) => { - const n = r.funcNode.childForFieldName('name'); - return n && n.text === def.name; - }) ?? candidates[0]); - if (funcResult) { - const { metrics } = funcResult; - const loc = computeLOCMetrics(funcResult.funcNode, langId); - const volume = metrics.halstead ? metrics.halstead.volume : 0; - const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; - const mi = computeMaintainabilityIndex( - volume, - metrics.cyclomatic, - loc.sloc, - commentRatio, - ); - - def.complexity = { - cognitive: metrics.cognitive, - cyclomatic: metrics.cyclomatic, - maxNesting: metrics.maxNesting, - halstead: metrics.halstead, - loc, - maintainabilityIndex: mi, - }; - } - } - } + // Complexity visitor (file-level mode) + let complexityVisitor = null; + const cRules = COMPLEXITY_RULES.get(langId); + const hRules = HALSTEAD_RULES.get(langId); + if (doComplexity && cRules) { + const needsWasmComplexity = defs.some( + (d) => (d.kind === 'function' || d.kind === 'method') && d.line && !d.complexity, + ); + if (needsWasmComplexity) { + complexityVisitor = createComplexityVisitor(cRules, hRules, { fileLevelWalk: true, langId }); + visitors.push(complexityVisitor); + + for (const t of cRules.nestingNodes) walkerOpts.nestingNodeTypes.add(t); + + const dfRules = DATAFLOW_RULES.get(langId); + walkerOpts.getFunctionName = (node) => { + const nameNode = node.childForFieldName('name'); + if (nameNode) return nameNode.text; + if (dfRules) return getFuncName(node, dfRules); + return null; + }; } + } - // ─ Store CFG results on definitions (buildCFGData will find def.cfg and skip its walk) ─ - if (cfgVisitor) { - const cfgResults = results.cfg || []; - const cfgByLine = new Map(); - for (const r of cfgResults) { - if (r.funcNode) { - const line = r.funcNode.startPosition.row + 1; - if (!cfgByLine.has(line)) cfgByLine.set(line, []); - cfgByLine.get(line).push(r); - } - } - for (const def of defs) { - if ( - (def.kind === 'function' || def.kind === 'method') && - def.line && - !def.cfg?.blocks?.length - ) { - const candidates = cfgByLine.get(def.line); - const cfgResult = !candidates - ? undefined - : candidates.length === 1 - ? candidates[0] - : (candidates.find((r) => { - const n = r.funcNode.childForFieldName('name'); - return n && n.text === def.name; - }) ?? candidates[0]); - if (cfgResult) { - def.cfg = { blocks: cfgResult.blocks, edges: cfgResult.edges }; - - // Override complexity's cyclomatic with CFG-derived value (single source of truth) - // and recompute maintainability index to stay consistent - if (def.complexity && cfgResult.cyclomatic != null) { - def.complexity.cyclomatic = cfgResult.cyclomatic; - const { loc, halstead } = def.complexity; - const volume = halstead ? halstead.volume : 0; - const commentRatio = loc?.loc > 0 ? loc.commentLines / loc.loc : 0; - def.complexity.maintainabilityIndex = computeMaintainabilityIndex( - volume, - cfgResult.cyclomatic, - loc?.sloc ?? 0, - commentRatio, - ); - } - } - } - } + // CFG visitor + let cfgVisitor = null; + const cfgRulesForLang = CFG_RULES.get(langId); + if (doCfg && cfgRulesForLang && CFG_EXTENSIONS.has(ext)) { + const needsWasmCfg = defs.some( + (d) => + (d.kind === 'function' || d.kind === 'method') && + d.line && + d.cfg !== null && + !Array.isArray(d.cfg?.blocks), + ); + if (needsWasmCfg) { + cfgVisitor = createCfgVisitor(cfgRulesForLang); + visitors.push(cfgVisitor); } + } + + // Dataflow visitor + let dataflowVisitor = null; + const dfRules = DATAFLOW_RULES.get(langId); + if (doDataflow && dfRules && DATAFLOW_EXTENSIONS.has(ext) && !symbols.dataflow) { + dataflowVisitor = createDataflowVisitor(dfRules); + visitors.push(dataflowVisitor); + } + + return { visitors, walkerOpts, astVisitor, complexityVisitor, cfgVisitor, dataflowVisitor }; +} - // ─ Store dataflow results (buildDataflowEdges will find symbols.dataflow and skip its walk) ─ - if (dataflowVisitor) { - symbols.dataflow = results.dataflow; +// ─── Result storage helpers ───────────────────────────────────────────── + +function storeComplexityResults(results, defs, langId) { + const complexityResults = results.complexity || []; + const resultByLine = new Map(); + for (const r of complexityResults) { + if (r.funcNode) { + const line = r.funcNode.startPosition.row + 1; + if (!resultByLine.has(line)) resultByLine.set(line, []); + resultByLine.get(line).push(r); } } + for (const def of defs) { + if ((def.kind === 'function' || def.kind === 'method') && def.line && !def.complexity) { + const candidates = resultByLine.get(def.line); + const funcResult = !candidates + ? undefined + : candidates.length === 1 + ? candidates[0] + : (candidates.find((r) => { + const n = r.funcNode.childForFieldName('name'); + return n && n.text === def.name; + }) ?? candidates[0]); + if (funcResult) { + const { metrics } = funcResult; + const loc = computeLOCMetrics(funcResult.funcNode, langId); + const volume = metrics.halstead ? metrics.halstead.volume : 0; + const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; + const mi = computeMaintainabilityIndex(volume, metrics.cyclomatic, loc.sloc, commentRatio); + + def.complexity = { + cognitive: metrics.cognitive, + cyclomatic: metrics.cyclomatic, + maxNesting: metrics.maxNesting, + halstead: metrics.halstead, + loc, + maintainabilityIndex: mi, + }; + } + } + } +} - timing._unifiedWalkMs = performance.now() - t0walk; +function storeCfgResults(results, defs) { + const cfgResults = results.cfg || []; + const cfgByLine = new Map(); + for (const r of cfgResults) { + if (r.funcNode) { + const line = r.funcNode.startPosition.row + 1; + if (!cfgByLine.has(line)) cfgByLine.set(line, []); + cfgByLine.get(line).push(r); + } + } + for (const def of defs) { + if ( + (def.kind === 'function' || def.kind === 'method') && + def.line && + !def.cfg?.blocks?.length + ) { + const candidates = cfgByLine.get(def.line); + const cfgResult = !candidates + ? undefined + : candidates.length === 1 + ? candidates[0] + : (candidates.find((r) => { + const n = r.funcNode.childForFieldName('name'); + return n && n.text === def.name; + }) ?? candidates[0]); + if (cfgResult) { + def.cfg = { blocks: cfgResult.blocks, edges: cfgResult.edges }; + + // Override complexity's cyclomatic with CFG-derived value (single source of truth) + if (def.complexity && cfgResult.cyclomatic != null) { + def.complexity.cyclomatic = cfgResult.cyclomatic; + const { loc, halstead } = def.complexity; + const volume = halstead ? halstead.volume : 0; + const commentRatio = loc?.loc > 0 ? loc.commentLines / loc.loc : 0; + def.complexity.maintainabilityIndex = computeMaintainabilityIndex( + volume, + cfgResult.cyclomatic, + loc?.sloc ?? 0, + commentRatio, + ); + } + } + } + } +} - // ── Delegate to buildXxx functions ───────────────────────────────── - // Each function finds pre-computed data from the unified walk above - // (or from the native engine) and only does DB writes + native fallback. +// ─── Build delegation ─────────────────────────────────────────────────── - if (doAst) { +async function delegateToBuildFunctions(db, fileSymbols, rootDir, opts, engineOpts, timing) { + if (opts.ast !== false) { const t0 = performance.now(); try { const { buildAstNodes } = await import('../features/ast.js'); @@ -348,7 +281,7 @@ export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { timing.astMs = performance.now() - t0; } - if (doComplexity) { + if (opts.complexity !== false) { const t0 = performance.now(); try { const { buildComplexityMetrics } = await import('../features/complexity.js'); @@ -359,7 +292,7 @@ export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { timing.complexityMs = performance.now() - t0; } - if (doCfg) { + if (opts.cfg !== false) { const t0 = performance.now(); try { const { buildCFGData } = await import('../features/cfg.js'); @@ -370,7 +303,7 @@ export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { timing.cfgMs = performance.now() - t0; } - if (doDataflow) { + if (opts.dataflow !== false) { const t0 = performance.now(); try { const { buildDataflowEdges } = await import('../features/dataflow.js'); @@ -380,6 +313,67 @@ export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { } timing.dataflowMs = performance.now() - t0; } +} + +// ─── Public API ────────────────────────────────────────────────────────── + +/** + * Run all enabled AST analyses in a coordinated pass. + * + * @param {object} db - open better-sqlite3 database (read-write) + * @param {Map} fileSymbols - Map + * @param {string} rootDir - absolute project root path + * @param {object} opts - build options (ast, complexity, cfg, dataflow toggles) + * @param {object} [engineOpts] - engine options + * @returns {Promise<{ astMs: number, complexityMs: number, cfgMs: number, dataflowMs: number }>} + */ +export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) { + const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 }; + + const doAst = opts.ast !== false; + const doComplexity = opts.complexity !== false; + const doCfg = opts.cfg !== false; + const doDataflow = opts.dataflow !== false; + + if (!doAst && !doComplexity && !doCfg && !doDataflow) return timing; + + const extToLang = buildExtToLangMap(); + + // WASM pre-parse for files that need it + await ensureWasmTreesIfNeeded(fileSymbols, opts); + + // Unified pre-walk: run all applicable visitors in a single DFS per file + const t0walk = performance.now(); + + for (const [relPath, symbols] of fileSymbols) { + if (!symbols._tree) continue; + + const ext = path.extname(relPath).toLowerCase(); + const langId = symbols._langId || extToLang.get(ext); + if (!langId) continue; + + const { visitors, walkerOpts, astVisitor, complexityVisitor, cfgVisitor, dataflowVisitor } = + setupVisitors(db, relPath, symbols, langId, opts); + + if (visitors.length === 0) continue; + + const results = walkWithVisitors(symbols._tree.rootNode, visitors, langId, walkerOpts); + const defs = symbols.definitions || []; + + if (astVisitor) { + const astRows = results['ast-store'] || []; + if (astRows.length > 0) symbols.astNodes = astRows; + } + + if (complexityVisitor) storeComplexityResults(results, defs, langId); + if (cfgVisitor) storeCfgResults(results, defs); + if (dataflowVisitor) symbols.dataflow = results.dataflow; + } + + timing._unifiedWalkMs = performance.now() - t0walk; + + // Delegate to buildXxx functions for DB writes + native fallback + await delegateToBuildFunctions(db, fileSymbols, rootDir, opts, engineOpts, timing); return timing; } diff --git a/src/ast-analysis/visitors/cfg-visitor.js b/src/ast-analysis/visitors/cfg-visitor.js index 1fb1de50..97bb344f 100644 --- a/src/ast-analysis/visitors/cfg-visitor.js +++ b/src/ast-analysis/visitors/cfg-visitor.js @@ -10,756 +10,746 @@ * hooks, using a control-flow frame stack to track branch/loop/switch context. */ -/** - * Create a CFG visitor for use with walkWithVisitors. - * - * @param {object} cfgRules - CFG_RULES for the language - * @returns {Visitor} - */ -export function createCfgVisitor(cfgRules) { - // ── Per-function state ────────────────────────────────────────────── - // Pushed/popped on enterFunction/exitFunction for nested function support. - - /** @type {Array} Stack of per-function CFG state */ - const funcStateStack = []; - - /** @type {object|null} Active per-function state */ - let S = null; - - // Collected results (one per top-level function) - const results = []; - - function makeFuncState() { - const blocks = []; - const edges = []; - let nextIndex = 0; - - function makeBlock(type, startLine = null, endLine = null, label = null) { - const block = { index: nextIndex++, type, startLine, endLine, label }; - blocks.push(block); - return block; - } - - function addEdge(source, target, kind) { - edges.push({ sourceIndex: source.index, targetIndex: target.index, kind }); - } +// ── Node-type predicates ──────────────────────────────────────────────── - const entry = makeBlock('entry'); - const exit = makeBlock('exit'); - const firstBody = makeBlock('body'); - addEdge(entry, firstBody, 'fallthrough'); - - return { - blocks, - edges, - makeBlock, - addEdge, - entryBlock: entry, - exitBlock: exit, - currentBlock: firstBody, - loopStack: [], - labelMap: new Map(), - /** Control-flow frame stack for nested if/switch/try/loop/labeled */ - cfgStack: [], - funcNode: null, - }; - } - - // ── Helpers ───────────────────────────────────────────────────────── +function isIfNode(type, cfgRules) { + return type === cfgRules.ifNode || cfgRules.ifNodes?.has(type); +} - function isIfNode(type) { - return type === cfgRules.ifNode || cfgRules.ifNodes?.has(type); - } +function isForNode(type, cfgRules) { + return cfgRules.forNodes.has(type); +} - function isForNode(type) { - return cfgRules.forNodes.has(type); - } +function isWhileNode(type, cfgRules) { + return type === cfgRules.whileNode || cfgRules.whileNodes?.has(type); +} - function isWhileNode(type) { - return type === cfgRules.whileNode || cfgRules.whileNodes?.has(type); - } +function isSwitchNode(type, cfgRules) { + return type === cfgRules.switchNode || cfgRules.switchNodes?.has(type); +} - function isSwitchNode(type) { - return type === cfgRules.switchNode || cfgRules.switchNodes?.has(type); - } +function isCaseNode(type, cfgRules) { + return ( + type === cfgRules.caseNode || type === cfgRules.defaultNode || cfgRules.caseNodes?.has(type) + ); +} - function isCaseNode(type) { - return ( - type === cfgRules.caseNode || type === cfgRules.defaultNode || cfgRules.caseNodes?.has(type) - ); - } +function isBlockNode(type, cfgRules) { + return type === 'statement_list' || type === cfgRules.blockNode || cfgRules.blockNodes?.has(type); +} - function isBlockNode(type) { - return ( - type === 'statement_list' || type === cfgRules.blockNode || cfgRules.blockNodes?.has(type) - ); - } +/** Check if a node is a control-flow statement that we handle specially */ +function isControlFlow(type, cfgRules) { + return ( + isIfNode(type, cfgRules) || + (cfgRules.unlessNode && type === cfgRules.unlessNode) || + isForNode(type, cfgRules) || + isWhileNode(type, cfgRules) || + (cfgRules.untilNode && type === cfgRules.untilNode) || + (cfgRules.doNode && type === cfgRules.doNode) || + (cfgRules.infiniteLoopNode && type === cfgRules.infiniteLoopNode) || + isSwitchNode(type, cfgRules) || + (cfgRules.tryNode && type === cfgRules.tryNode) || + type === cfgRules.returnNode || + type === cfgRules.throwNode || + type === cfgRules.breakNode || + type === cfgRules.continueNode || + type === cfgRules.labeledNode + ); +} - /** Check if a node is a control-flow statement that we handle specially */ - function isControlFlow(type) { - return ( - isIfNode(type) || - (cfgRules.unlessNode && type === cfgRules.unlessNode) || - isForNode(type) || - isWhileNode(type) || - (cfgRules.untilNode && type === cfgRules.untilNode) || - (cfgRules.doNode && type === cfgRules.doNode) || - (cfgRules.infiniteLoopNode && type === cfgRules.infiniteLoopNode) || - isSwitchNode(type) || - (cfgRules.tryNode && type === cfgRules.tryNode) || - type === cfgRules.returnNode || - type === cfgRules.throwNode || - type === cfgRules.breakNode || - type === cfgRules.continueNode || - type === cfgRules.labeledNode - ); - } +// ── Utility functions ─────────────────────────────────────────────────── - /** - * Get the actual control-flow node (unwrapping expression_statement if needed). - */ - function effectiveNode(node) { - if (node.type === 'expression_statement' && node.namedChildCount === 1) { - const inner = node.namedChild(0); - if (isControlFlow(inner.type)) return inner; - } - return node; +/** + * Get the actual control-flow node (unwrapping expression_statement if needed). + */ +function effectiveNode(node, cfgRules) { + if (node.type === 'expression_statement' && node.namedChildCount === 1) { + const inner = node.namedChild(0); + if (isControlFlow(inner.type, cfgRules)) return inner; } + return node; +} - /** - * Register a loop/switch in label map for labeled break/continue. - */ - function registerLabelCtx(headerBlock, exitBlock) { - for (const [, ctx] of S.labelMap) { - if (!ctx.headerBlock) { - ctx.headerBlock = headerBlock; - ctx.exitBlock = exitBlock; - } +/** + * Register a loop/switch in label map for labeled break/continue. + */ +function registerLabelCtx(S, headerBlock, exitBlock) { + for (const [, ctx] of S.labelMap) { + if (!ctx.headerBlock) { + ctx.headerBlock = headerBlock; + ctx.exitBlock = exitBlock; } } +} - /** - * Get statements from a body node (block or single statement). - * Returns effective (unwrapped) nodes. - */ - function getBodyStatements(bodyNode) { - if (!bodyNode) return []; - if (isBlockNode(bodyNode.type)) { - const stmts = []; - for (let i = 0; i < bodyNode.namedChildCount; i++) { - const child = bodyNode.namedChild(i); - if (child.type === 'statement_list') { - for (let j = 0; j < child.namedChildCount; j++) { - stmts.push(child.namedChild(j)); - } - } else { - stmts.push(child); +/** + * Get statements from a body node (block or single statement). + * Returns effective (unwrapped) nodes. + */ +function getBodyStatements(bodyNode, cfgRules) { + if (!bodyNode) return []; + if (isBlockNode(bodyNode.type, cfgRules)) { + const stmts = []; + for (let i = 0; i < bodyNode.namedChildCount; i++) { + const child = bodyNode.namedChild(i); + if (child.type === 'statement_list') { + for (let j = 0; j < child.namedChildCount; j++) { + stmts.push(child.namedChild(j)); } + } else { + stmts.push(child); } - return stmts; } - return [bodyNode]; + return stmts; } + return [bodyNode]; +} - // ── Statement-level processing (replicates buildFunctionCFG logic) ── - // The visitor delegates to these for each control-flow construct, - // processing the body statements sequentially just like the original. +function makeFuncState() { + const blocks = []; + const edges = []; + let nextIndex = 0; - function processStatements(stmts, currentBlock) { - let cur = currentBlock; - for (const stmt of stmts) { - if (!cur) break; - cur = processStatement(stmt, cur); - } - return cur; + function makeBlock(type, startLine = null, endLine = null, label = null) { + const block = { index: nextIndex++, type, startLine, endLine, label }; + blocks.push(block); + return block; } - function processStatement(stmt, currentBlock) { - if (!stmt || !currentBlock) return currentBlock; + function addEdge(source, target, kind) { + edges.push({ sourceIndex: source.index, targetIndex: target.index, kind }); + } - // Unwrap expression_statement for Rust-style control flow expressions - const effNode = effectiveNode(stmt); - const type = effNode.type; + const entry = makeBlock('entry'); + const exit = makeBlock('exit'); + const firstBody = makeBlock('body'); + addEdge(entry, firstBody, 'fallthrough'); - // Labeled statement - if (type === cfgRules.labeledNode) { - return processLabeled(effNode, currentBlock); - } + return { + blocks, + edges, + makeBlock, + addEdge, + entryBlock: entry, + exitBlock: exit, + currentBlock: firstBody, + loopStack: [], + labelMap: new Map(), + cfgStack: [], + funcNode: null, + }; +} - // If / unless - if (isIfNode(type) || (cfgRules.unlessNode && type === cfgRules.unlessNode)) { - return processIf(effNode, currentBlock); - } +// ── Statement processors ──────────────────────────────────────────────── - // For loops - if (isForNode(type)) { - return processForLoop(effNode, currentBlock); - } +function processStatements(stmts, currentBlock, S, cfgRules) { + let cur = currentBlock; + for (const stmt of stmts) { + if (!cur) break; + cur = processStatement(stmt, cur, S, cfgRules); + } + return cur; +} - // While / until - if (isWhileNode(type) || (cfgRules.untilNode && type === cfgRules.untilNode)) { - return processWhileLoop(effNode, currentBlock); - } +function processStatement(stmt, currentBlock, S, cfgRules) { + if (!stmt || !currentBlock) return currentBlock; - // Do-while - if (cfgRules.doNode && type === cfgRules.doNode) { - return processDoWhileLoop(effNode, currentBlock); - } + const effNode = effectiveNode(stmt, cfgRules); + const type = effNode.type; - // Infinite loop (Rust) - if (cfgRules.infiniteLoopNode && type === cfgRules.infiniteLoopNode) { - return processInfiniteLoop(effNode, currentBlock); - } + if (type === cfgRules.labeledNode) { + return processLabeled(effNode, currentBlock, S, cfgRules); + } + if (isIfNode(type, cfgRules) || (cfgRules.unlessNode && type === cfgRules.unlessNode)) { + return processIf(effNode, currentBlock, S, cfgRules); + } + if (isForNode(type, cfgRules)) { + return processForLoop(effNode, currentBlock, S, cfgRules); + } + if (isWhileNode(type, cfgRules) || (cfgRules.untilNode && type === cfgRules.untilNode)) { + return processWhileLoop(effNode, currentBlock, S, cfgRules); + } + if (cfgRules.doNode && type === cfgRules.doNode) { + return processDoWhileLoop(effNode, currentBlock, S, cfgRules); + } + if (cfgRules.infiniteLoopNode && type === cfgRules.infiniteLoopNode) { + return processInfiniteLoop(effNode, currentBlock, S, cfgRules); + } + if (isSwitchNode(type, cfgRules)) { + return processSwitch(effNode, currentBlock, S, cfgRules); + } + if (cfgRules.tryNode && type === cfgRules.tryNode) { + return processTryCatch(effNode, currentBlock, S, cfgRules); + } + if (type === cfgRules.returnNode) { + currentBlock.endLine = effNode.startPosition.row + 1; + S.addEdge(currentBlock, S.exitBlock, 'return'); + return null; + } + if (type === cfgRules.throwNode) { + currentBlock.endLine = effNode.startPosition.row + 1; + S.addEdge(currentBlock, S.exitBlock, 'exception'); + return null; + } + if (type === cfgRules.breakNode) { + return processBreak(effNode, currentBlock, S); + } + if (type === cfgRules.continueNode) { + return processContinue(effNode, currentBlock, S); + } - // Switch / match - if (isSwitchNode(type)) { - return processSwitch(effNode, currentBlock); - } + // Regular statement — extend current block + if (!currentBlock.startLine) { + currentBlock.startLine = stmt.startPosition.row + 1; + } + currentBlock.endLine = stmt.endPosition.row + 1; + return currentBlock; +} - // Try/catch/finally - if (cfgRules.tryNode && type === cfgRules.tryNode) { - return processTryCatch(effNode, currentBlock); - } +// ── Labeled / break / continue ────────────────────────────────────────── + +function processLabeled(node, currentBlock, S, cfgRules) { + const labelNode = node.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; + const body = node.childForFieldName('body'); + if (body && labelName) { + const labelCtx = { headerBlock: null, exitBlock: null }; + S.labelMap.set(labelName, labelCtx); + const result = processStatement(body, currentBlock, S, cfgRules); + S.labelMap.delete(labelName); + return result; + } + return currentBlock; +} - // Return - if (type === cfgRules.returnNode) { - currentBlock.endLine = effNode.startPosition.row + 1; - S.addEdge(currentBlock, S.exitBlock, 'return'); - return null; - } +function processBreak(node, currentBlock, S) { + const labelNode = node.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; - // Throw - if (type === cfgRules.throwNode) { - currentBlock.endLine = effNode.startPosition.row + 1; - S.addEdge(currentBlock, S.exitBlock, 'exception'); - return null; - } + let target = null; + if (labelName && S.labelMap.has(labelName)) { + target = S.labelMap.get(labelName).exitBlock; + } else if (S.loopStack.length > 0) { + target = S.loopStack[S.loopStack.length - 1].exitBlock; + } - // Break - if (type === cfgRules.breakNode) { - return processBreak(effNode, currentBlock); - } + if (target) { + currentBlock.endLine = node.startPosition.row + 1; + S.addEdge(currentBlock, target, 'break'); + return null; + } + return currentBlock; +} - // Continue - if (type === cfgRules.continueNode) { - return processContinue(effNode, currentBlock); - } +function processContinue(node, currentBlock, S) { + const labelNode = node.childForFieldName('label'); + const labelName = labelNode ? labelNode.text : null; - // Regular statement — extend current block - if (!currentBlock.startLine) { - currentBlock.startLine = stmt.startPosition.row + 1; - } - currentBlock.endLine = stmt.endPosition.row + 1; - return currentBlock; - } - - function processLabeled(node, currentBlock) { - const labelNode = node.childForFieldName('label'); - const labelName = labelNode ? labelNode.text : null; - const body = node.childForFieldName('body'); - if (body && labelName) { - const labelCtx = { headerBlock: null, exitBlock: null }; - S.labelMap.set(labelName, labelCtx); - const result = processStatement(body, currentBlock); - S.labelMap.delete(labelName); - return result; - } - return currentBlock; + let target = null; + if (labelName && S.labelMap.has(labelName)) { + target = S.labelMap.get(labelName).headerBlock; + } else if (S.loopStack.length > 0) { + target = S.loopStack[S.loopStack.length - 1].headerBlock; } - function processBreak(node, currentBlock) { - const labelNode = node.childForFieldName('label'); - const labelName = labelNode ? labelNode.text : null; + if (target) { + currentBlock.endLine = node.startPosition.row + 1; + S.addEdge(currentBlock, target, 'continue'); + return null; + } + return currentBlock; +} - let target = null; - if (labelName && S.labelMap.has(labelName)) { - target = S.labelMap.get(labelName).exitBlock; - } else if (S.loopStack.length > 0) { - target = S.loopStack[S.loopStack.length - 1].exitBlock; - } +// ── If / else-if / else ───────────────────────────────────────────────── + +function processIf(ifStmt, currentBlock, S, cfgRules) { + currentBlock.endLine = ifStmt.startPosition.row + 1; + + const condBlock = S.makeBlock( + 'condition', + ifStmt.startPosition.row + 1, + ifStmt.startPosition.row + 1, + 'if', + ); + S.addEdge(currentBlock, condBlock, 'fallthrough'); + + const joinBlock = S.makeBlock('body'); + + // True branch + const consequentField = cfgRules.ifConsequentField || 'consequence'; + const consequent = ifStmt.childForFieldName(consequentField); + const trueBlock = S.makeBlock('branch_true', null, null, 'then'); + S.addEdge(condBlock, trueBlock, 'branch_true'); + const trueStmts = getBodyStatements(consequent, cfgRules); + const trueEnd = processStatements(trueStmts, trueBlock, S, cfgRules); + if (trueEnd) { + S.addEdge(trueEnd, joinBlock, 'fallthrough'); + } - if (target) { - currentBlock.endLine = node.startPosition.row + 1; - S.addEdge(currentBlock, target, 'break'); - return null; - } - return currentBlock; + // False branch + if (cfgRules.elifNode) { + processElifSiblings(ifStmt, condBlock, joinBlock, S, cfgRules); + } else { + processAlternative(ifStmt, condBlock, joinBlock, S, cfgRules); } - function processContinue(node, currentBlock) { - const labelNode = node.childForFieldName('label'); - const labelName = labelNode ? labelNode.text : null; + return joinBlock; +} - let target = null; - if (labelName && S.labelMap.has(labelName)) { - target = S.labelMap.get(labelName).headerBlock; - } else if (S.loopStack.length > 0) { - target = S.loopStack[S.loopStack.length - 1].headerBlock; - } +function processAlternative(ifStmt, condBlock, joinBlock, S, cfgRules) { + const alternative = ifStmt.childForFieldName('alternative'); + if (!alternative) { + S.addEdge(condBlock, joinBlock, 'branch_false'); + return; + } - if (target) { - currentBlock.endLine = node.startPosition.row + 1; - S.addEdge(currentBlock, target, 'continue'); - return null; + if (cfgRules.elseViaAlternative && alternative.type !== cfgRules.elseClause) { + // Pattern C: direct alternative (Go, Java, C#) + if (isIfNode(alternative.type, cfgRules)) { + const falseBlock = S.makeBlock('branch_false', null, null, 'else-if'); + S.addEdge(condBlock, falseBlock, 'branch_false'); + const elseIfEnd = processIf(alternative, falseBlock, S, cfgRules); + if (elseIfEnd) S.addEdge(elseIfEnd, joinBlock, 'fallthrough'); + } else { + const falseBlock = S.makeBlock('branch_false', null, null, 'else'); + S.addEdge(condBlock, falseBlock, 'branch_false'); + const falseStmts = getBodyStatements(alternative, cfgRules); + const falseEnd = processStatements(falseStmts, falseBlock, S, cfgRules); + if (falseEnd) S.addEdge(falseEnd, joinBlock, 'fallthrough'); + } + } else if (alternative.type === cfgRules.elseClause) { + // Pattern A: else_clause wrapper (JS/TS, Rust) + const elseChildren = []; + for (let i = 0; i < alternative.namedChildCount; i++) { + elseChildren.push(alternative.namedChild(i)); + } + if (elseChildren.length === 1 && isIfNode(elseChildren[0].type, cfgRules)) { + const falseBlock = S.makeBlock('branch_false', null, null, 'else-if'); + S.addEdge(condBlock, falseBlock, 'branch_false'); + const elseIfEnd = processIf(elseChildren[0], falseBlock, S, cfgRules); + if (elseIfEnd) S.addEdge(elseIfEnd, joinBlock, 'fallthrough'); + } else { + const falseBlock = S.makeBlock('branch_false', null, null, 'else'); + S.addEdge(condBlock, falseBlock, 'branch_false'); + const falseEnd = processStatements(elseChildren, falseBlock, S, cfgRules); + if (falseEnd) S.addEdge(falseEnd, joinBlock, 'fallthrough'); } - return currentBlock; } +} - // ── If/else-if/else ───────────────────────────────────────────────── - - function processIf(ifStmt, currentBlock) { - currentBlock.endLine = ifStmt.startPosition.row + 1; +function processElifSiblings(ifStmt, firstCondBlock, joinBlock, S, cfgRules) { + let lastCondBlock = firstCondBlock; + let foundElse = false; - const condBlock = S.makeBlock( - 'condition', - ifStmt.startPosition.row + 1, - ifStmt.startPosition.row + 1, - 'if', - ); - S.addEdge(currentBlock, condBlock, 'fallthrough'); - - const joinBlock = S.makeBlock('body'); - - // True branch - const consequentField = cfgRules.ifConsequentField || 'consequence'; - const consequent = ifStmt.childForFieldName(consequentField); - const trueBlock = S.makeBlock('branch_true', null, null, 'then'); - S.addEdge(condBlock, trueBlock, 'branch_true'); - const trueStmts = getBodyStatements(consequent); - const trueEnd = processStatements(trueStmts, trueBlock); - if (trueEnd) { - S.addEdge(trueEnd, joinBlock, 'fallthrough'); - } + for (let i = 0; i < ifStmt.namedChildCount; i++) { + const child = ifStmt.namedChild(i); - // False branch - if (cfgRules.elifNode) { - processElifSiblings(ifStmt, condBlock, joinBlock); - } else { - const alternative = ifStmt.childForFieldName('alternative'); - if (alternative) { - if (cfgRules.elseViaAlternative && alternative.type !== cfgRules.elseClause) { - // Pattern C: direct alternative (Go, Java, C#) - if (isIfNode(alternative.type)) { - const falseBlock = S.makeBlock('branch_false', null, null, 'else-if'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const elseIfEnd = processIf(alternative, falseBlock); - if (elseIfEnd) S.addEdge(elseIfEnd, joinBlock, 'fallthrough'); - } else { - const falseBlock = S.makeBlock('branch_false', null, null, 'else'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const falseStmts = getBodyStatements(alternative); - const falseEnd = processStatements(falseStmts, falseBlock); - if (falseEnd) S.addEdge(falseEnd, joinBlock, 'fallthrough'); - } - } else if (alternative.type === cfgRules.elseClause) { - // Pattern A: else_clause wrapper (JS/TS, Rust) - const elseChildren = []; - for (let i = 0; i < alternative.namedChildCount; i++) { - elseChildren.push(alternative.namedChild(i)); - } - if (elseChildren.length === 1 && isIfNode(elseChildren[0].type)) { - const falseBlock = S.makeBlock('branch_false', null, null, 'else-if'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const elseIfEnd = processIf(elseChildren[0], falseBlock); - if (elseIfEnd) S.addEdge(elseIfEnd, joinBlock, 'fallthrough'); - } else { - const falseBlock = S.makeBlock('branch_false', null, null, 'else'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const falseEnd = processStatements(elseChildren, falseBlock); - if (falseEnd) S.addEdge(falseEnd, joinBlock, 'fallthrough'); - } - } + if (child.type === cfgRules.elifNode) { + const elifCondBlock = S.makeBlock( + 'condition', + child.startPosition.row + 1, + child.startPosition.row + 1, + 'else-if', + ); + S.addEdge(lastCondBlock, elifCondBlock, 'branch_false'); + + const elifConsequentField = cfgRules.ifConsequentField || 'consequence'; + const elifConsequent = child.childForFieldName(elifConsequentField); + const elifTrueBlock = S.makeBlock('branch_true', null, null, 'then'); + S.addEdge(elifCondBlock, elifTrueBlock, 'branch_true'); + const elifTrueStmts = getBodyStatements(elifConsequent, cfgRules); + const elifTrueEnd = processStatements(elifTrueStmts, elifTrueBlock, S, cfgRules); + if (elifTrueEnd) S.addEdge(elifTrueEnd, joinBlock, 'fallthrough'); + + lastCondBlock = elifCondBlock; + } else if (child.type === cfgRules.elseClause) { + const elseBlock = S.makeBlock('branch_false', null, null, 'else'); + S.addEdge(lastCondBlock, elseBlock, 'branch_false'); + + const elseBody = child.childForFieldName('body'); + let elseStmts; + if (elseBody) { + elseStmts = getBodyStatements(elseBody, cfgRules); } else { - // No else - S.addEdge(condBlock, joinBlock, 'branch_false'); - } - } - - return joinBlock; - } - - function processElifSiblings(ifStmt, firstCondBlock, joinBlock) { - let lastCondBlock = firstCondBlock; - let foundElse = false; - - for (let i = 0; i < ifStmt.namedChildCount; i++) { - const child = ifStmt.namedChild(i); - - if (child.type === cfgRules.elifNode) { - const elifCondBlock = S.makeBlock( - 'condition', - child.startPosition.row + 1, - child.startPosition.row + 1, - 'else-if', - ); - S.addEdge(lastCondBlock, elifCondBlock, 'branch_false'); - - const elifConsequentField = cfgRules.ifConsequentField || 'consequence'; - const elifConsequent = child.childForFieldName(elifConsequentField); - const elifTrueBlock = S.makeBlock('branch_true', null, null, 'then'); - S.addEdge(elifCondBlock, elifTrueBlock, 'branch_true'); - const elifTrueStmts = getBodyStatements(elifConsequent); - const elifTrueEnd = processStatements(elifTrueStmts, elifTrueBlock); - if (elifTrueEnd) S.addEdge(elifTrueEnd, joinBlock, 'fallthrough'); - - lastCondBlock = elifCondBlock; - } else if (child.type === cfgRules.elseClause) { - const elseBlock = S.makeBlock('branch_false', null, null, 'else'); - S.addEdge(lastCondBlock, elseBlock, 'branch_false'); - - const elseBody = child.childForFieldName('body'); - let elseStmts; - if (elseBody) { - elseStmts = getBodyStatements(elseBody); - } else { - elseStmts = []; - for (let j = 0; j < child.namedChildCount; j++) { - elseStmts.push(child.namedChild(j)); - } + elseStmts = []; + for (let j = 0; j < child.namedChildCount; j++) { + elseStmts.push(child.namedChild(j)); } - const elseEnd = processStatements(elseStmts, elseBlock); - if (elseEnd) S.addEdge(elseEnd, joinBlock, 'fallthrough'); - - foundElse = true; } - } + const elseEnd = processStatements(elseStmts, elseBlock, S, cfgRules); + if (elseEnd) S.addEdge(elseEnd, joinBlock, 'fallthrough'); - if (!foundElse) { - S.addEdge(lastCondBlock, joinBlock, 'branch_false'); + foundElse = true; } } - // ── Loops ─────────────────────────────────────────────────────────── - - function processForLoop(forStmt, currentBlock) { - const headerBlock = S.makeBlock( - 'loop_header', - forStmt.startPosition.row + 1, - forStmt.startPosition.row + 1, - 'for', - ); - S.addEdge(currentBlock, headerBlock, 'fallthrough'); - - const loopExitBlock = S.makeBlock('body'); - const loopCtx = { headerBlock, exitBlock: loopExitBlock }; - S.loopStack.push(loopCtx); - registerLabelCtx(headerBlock, loopExitBlock); + if (!foundElse) { + S.addEdge(lastCondBlock, joinBlock, 'branch_false'); + } +} - const body = forStmt.childForFieldName('body'); - const bodyBlock = S.makeBlock('loop_body'); - S.addEdge(headerBlock, bodyBlock, 'branch_true'); +// ── Loops ─────────────────────────────────────────────────────────────── + +function processForLoop(forStmt, currentBlock, S, cfgRules) { + const headerBlock = S.makeBlock( + 'loop_header', + forStmt.startPosition.row + 1, + forStmt.startPosition.row + 1, + 'for', + ); + S.addEdge(currentBlock, headerBlock, 'fallthrough'); + + const loopExitBlock = S.makeBlock('body'); + const loopCtx = { headerBlock, exitBlock: loopExitBlock }; + S.loopStack.push(loopCtx); + registerLabelCtx(S, headerBlock, loopExitBlock); + + const body = forStmt.childForFieldName('body'); + const bodyBlock = S.makeBlock('loop_body'); + S.addEdge(headerBlock, bodyBlock, 'branch_true'); + + const bodyStmts = getBodyStatements(body, cfgRules); + const bodyEnd = processStatements(bodyStmts, bodyBlock, S, cfgRules); + if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); + + S.addEdge(headerBlock, loopExitBlock, 'loop_exit'); + S.loopStack.pop(); + return loopExitBlock; +} - const bodyStmts = getBodyStatements(body); - const bodyEnd = processStatements(bodyStmts, bodyBlock); - if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); +function processWhileLoop(whileStmt, currentBlock, S, cfgRules) { + const headerBlock = S.makeBlock( + 'loop_header', + whileStmt.startPosition.row + 1, + whileStmt.startPosition.row + 1, + 'while', + ); + S.addEdge(currentBlock, headerBlock, 'fallthrough'); + + const loopExitBlock = S.makeBlock('body'); + const loopCtx = { headerBlock, exitBlock: loopExitBlock }; + S.loopStack.push(loopCtx); + registerLabelCtx(S, headerBlock, loopExitBlock); + + const body = whileStmt.childForFieldName('body'); + const bodyBlock = S.makeBlock('loop_body'); + S.addEdge(headerBlock, bodyBlock, 'branch_true'); + + const bodyStmts = getBodyStatements(body, cfgRules); + const bodyEnd = processStatements(bodyStmts, bodyBlock, S, cfgRules); + if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); + + S.addEdge(headerBlock, loopExitBlock, 'loop_exit'); + S.loopStack.pop(); + return loopExitBlock; +} - S.addEdge(headerBlock, loopExitBlock, 'loop_exit'); - S.loopStack.pop(); - return loopExitBlock; - } +function processDoWhileLoop(doStmt, currentBlock, S, cfgRules) { + const bodyBlock = S.makeBlock('loop_body', doStmt.startPosition.row + 1, null, 'do'); + S.addEdge(currentBlock, bodyBlock, 'fallthrough'); - function processWhileLoop(whileStmt, currentBlock) { - const headerBlock = S.makeBlock( - 'loop_header', - whileStmt.startPosition.row + 1, - whileStmt.startPosition.row + 1, - 'while', - ); - S.addEdge(currentBlock, headerBlock, 'fallthrough'); + const condBlock = S.makeBlock('loop_header', null, null, 'do-while'); + const loopExitBlock = S.makeBlock('body'); - const loopExitBlock = S.makeBlock('body'); - const loopCtx = { headerBlock, exitBlock: loopExitBlock }; - S.loopStack.push(loopCtx); - registerLabelCtx(headerBlock, loopExitBlock); + const loopCtx = { headerBlock: condBlock, exitBlock: loopExitBlock }; + S.loopStack.push(loopCtx); + registerLabelCtx(S, condBlock, loopExitBlock); - const body = whileStmt.childForFieldName('body'); - const bodyBlock = S.makeBlock('loop_body'); - S.addEdge(headerBlock, bodyBlock, 'branch_true'); + const body = doStmt.childForFieldName('body'); + const bodyStmts = getBodyStatements(body, cfgRules); + const bodyEnd = processStatements(bodyStmts, bodyBlock, S, cfgRules); + if (bodyEnd) S.addEdge(bodyEnd, condBlock, 'fallthrough'); - const bodyStmts = getBodyStatements(body); - const bodyEnd = processStatements(bodyStmts, bodyBlock); - if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); + S.addEdge(condBlock, bodyBlock, 'loop_back'); + S.addEdge(condBlock, loopExitBlock, 'loop_exit'); - S.addEdge(headerBlock, loopExitBlock, 'loop_exit'); - S.loopStack.pop(); - return loopExitBlock; - } + S.loopStack.pop(); + return loopExitBlock; +} - function processDoWhileLoop(doStmt, currentBlock) { - const bodyBlock = S.makeBlock('loop_body', doStmt.startPosition.row + 1, null, 'do'); - S.addEdge(currentBlock, bodyBlock, 'fallthrough'); +function processInfiniteLoop(loopStmt, currentBlock, S, cfgRules) { + const headerBlock = S.makeBlock( + 'loop_header', + loopStmt.startPosition.row + 1, + loopStmt.startPosition.row + 1, + 'loop', + ); + S.addEdge(currentBlock, headerBlock, 'fallthrough'); + + const loopExitBlock = S.makeBlock('body'); + const loopCtx = { headerBlock, exitBlock: loopExitBlock }; + S.loopStack.push(loopCtx); + registerLabelCtx(S, headerBlock, loopExitBlock); + + const body = loopStmt.childForFieldName('body'); + const bodyBlock = S.makeBlock('loop_body'); + S.addEdge(headerBlock, bodyBlock, 'branch_true'); + + const bodyStmts = getBodyStatements(body, cfgRules); + const bodyEnd = processStatements(bodyStmts, bodyBlock, S, cfgRules); + if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); + + // No loop_exit from header — only via break + S.loopStack.pop(); + return loopExitBlock; +} - const condBlock = S.makeBlock('loop_header', null, null, 'do-while'); - const loopExitBlock = S.makeBlock('body'); +// ── Switch / match ────────────────────────────────────────────────────── - const loopCtx = { headerBlock: condBlock, exitBlock: loopExitBlock }; - S.loopStack.push(loopCtx); - registerLabelCtx(condBlock, loopExitBlock); +function processSwitch(switchStmt, currentBlock, S, cfgRules) { + currentBlock.endLine = switchStmt.startPosition.row + 1; - const body = doStmt.childForFieldName('body'); - const bodyStmts = getBodyStatements(body); - const bodyEnd = processStatements(bodyStmts, bodyBlock); - if (bodyEnd) S.addEdge(bodyEnd, condBlock, 'fallthrough'); + const switchHeader = S.makeBlock( + 'condition', + switchStmt.startPosition.row + 1, + switchStmt.startPosition.row + 1, + 'switch', + ); + S.addEdge(currentBlock, switchHeader, 'fallthrough'); - S.addEdge(condBlock, bodyBlock, 'loop_back'); - S.addEdge(condBlock, loopExitBlock, 'loop_exit'); + const joinBlock = S.makeBlock('body'); + const switchCtx = { headerBlock: switchHeader, exitBlock: joinBlock }; + S.loopStack.push(switchCtx); - S.loopStack.pop(); - return loopExitBlock; - } + const switchBody = switchStmt.childForFieldName('body'); + const container = switchBody || switchStmt; - function processInfiniteLoop(loopStmt, currentBlock) { - const headerBlock = S.makeBlock( - 'loop_header', - loopStmt.startPosition.row + 1, - loopStmt.startPosition.row + 1, - 'loop', - ); - S.addEdge(currentBlock, headerBlock, 'fallthrough'); + let hasDefault = false; + for (let i = 0; i < container.namedChildCount; i++) { + const caseClause = container.namedChild(i); - const loopExitBlock = S.makeBlock('body'); - const loopCtx = { headerBlock, exitBlock: loopExitBlock }; - S.loopStack.push(loopCtx); - registerLabelCtx(headerBlock, loopExitBlock); + const isDefault = caseClause.type === cfgRules.defaultNode; + const isCase = isDefault || isCaseNode(caseClause.type, cfgRules); + if (!isCase) continue; - const body = loopStmt.childForFieldName('body'); - const bodyBlock = S.makeBlock('loop_body'); - S.addEdge(headerBlock, bodyBlock, 'branch_true'); + const caseLabel = isDefault ? 'default' : 'case'; + const caseBlock = S.makeBlock('case', caseClause.startPosition.row + 1, null, caseLabel); + S.addEdge(switchHeader, caseBlock, isDefault ? 'branch_false' : 'branch_true'); + if (isDefault) hasDefault = true; - const bodyStmts = getBodyStatements(body); - const bodyEnd = processStatements(bodyStmts, bodyBlock); - if (bodyEnd) S.addEdge(bodyEnd, headerBlock, 'loop_back'); + const caseStmts = extractCaseBody(caseClause, cfgRules); + const caseEnd = processStatements(caseStmts, caseBlock, S, cfgRules); + if (caseEnd) S.addEdge(caseEnd, joinBlock, 'fallthrough'); + } - // No loop_exit from header — only via break - S.loopStack.pop(); - return loopExitBlock; + if (!hasDefault) { + S.addEdge(switchHeader, joinBlock, 'branch_false'); } - // ── Switch / match ────────────────────────────────────────────────── + S.loopStack.pop(); + return joinBlock; +} - function processSwitch(switchStmt, currentBlock) { - currentBlock.endLine = switchStmt.startPosition.row + 1; +function extractCaseBody(caseClause, cfgRules) { + const caseBodyNode = + caseClause.childForFieldName('body') || caseClause.childForFieldName('consequence'); + if (caseBodyNode) { + return getBodyStatements(caseBodyNode, cfgRules); + } - const switchHeader = S.makeBlock( - 'condition', - switchStmt.startPosition.row + 1, - switchStmt.startPosition.row + 1, - 'switch', - ); - S.addEdge(currentBlock, switchHeader, 'fallthrough'); - - const joinBlock = S.makeBlock('body'); - const switchCtx = { headerBlock: switchHeader, exitBlock: joinBlock }; - S.loopStack.push(switchCtx); - - const switchBody = switchStmt.childForFieldName('body'); - const container = switchBody || switchStmt; - - let hasDefault = false; - for (let i = 0; i < container.namedChildCount; i++) { - const caseClause = container.namedChild(i); - - const isDefault = caseClause.type === cfgRules.defaultNode; - const isCase = isDefault || isCaseNode(caseClause.type); - if (!isCase) continue; - - const caseLabel = isDefault ? 'default' : 'case'; - const caseBlock = S.makeBlock('case', caseClause.startPosition.row + 1, null, caseLabel); - S.addEdge(switchHeader, caseBlock, isDefault ? 'branch_false' : 'branch_true'); - if (isDefault) hasDefault = true; - - // Extract case body - const caseBodyNode = - caseClause.childForFieldName('body') || caseClause.childForFieldName('consequence'); - let caseStmts; - if (caseBodyNode) { - caseStmts = getBodyStatements(caseBodyNode); - } else { - caseStmts = []; - const valueNode = caseClause.childForFieldName('value'); - const patternNode = caseClause.childForFieldName('pattern'); - for (let j = 0; j < caseClause.namedChildCount; j++) { - const child = caseClause.namedChild(j); - if (child !== valueNode && child !== patternNode && child.type !== 'switch_label') { - if (child.type === 'statement_list') { - for (let k = 0; k < child.namedChildCount; k++) { - caseStmts.push(child.namedChild(k)); - } - } else { - caseStmts.push(child); - } - } + const stmts = []; + const valueNode = caseClause.childForFieldName('value'); + const patternNode = caseClause.childForFieldName('pattern'); + for (let j = 0; j < caseClause.namedChildCount; j++) { + const child = caseClause.namedChild(j); + if (child !== valueNode && child !== patternNode && child.type !== 'switch_label') { + if (child.type === 'statement_list') { + for (let k = 0; k < child.namedChildCount; k++) { + stmts.push(child.namedChild(k)); } + } else { + stmts.push(child); } - - const caseEnd = processStatements(caseStmts, caseBlock); - if (caseEnd) S.addEdge(caseEnd, joinBlock, 'fallthrough'); } + } + return stmts; +} - if (!hasDefault) { - S.addEdge(switchHeader, joinBlock, 'branch_false'); - } +// ── Try / catch / finally ─────────────────────────────────────────────── + +function processTryCatch(tryStmt, currentBlock, S, cfgRules) { + currentBlock.endLine = tryStmt.startPosition.row + 1; - S.loopStack.pop(); - return joinBlock; + const joinBlock = S.makeBlock('body'); + + // Try body + const tryBody = tryStmt.childForFieldName('body'); + let tryBodyStart; + let tryStmts; + if (tryBody) { + tryBodyStart = tryBody.startPosition.row + 1; + tryStmts = getBodyStatements(tryBody, cfgRules); + } else { + tryBodyStart = tryStmt.startPosition.row + 1; + tryStmts = []; + for (let i = 0; i < tryStmt.namedChildCount; i++) { + const child = tryStmt.namedChild(i); + if (cfgRules.catchNode && child.type === cfgRules.catchNode) continue; + if (cfgRules.finallyNode && child.type === cfgRules.finallyNode) continue; + tryStmts.push(child); + } } - // ── Try/catch/finally ─────────────────────────────────────────────── + const tryBlock = S.makeBlock('body', tryBodyStart, null, 'try'); + S.addEdge(currentBlock, tryBlock, 'fallthrough'); + const tryEnd = processStatements(tryStmts, tryBlock, S, cfgRules); - function processTryCatch(tryStmt, currentBlock) { - currentBlock.endLine = tryStmt.startPosition.row + 1; + // Find catch and finally handlers + const { catchHandler, finallyHandler } = findTryHandlers(tryStmt, cfgRules); - const joinBlock = S.makeBlock('body'); + if (catchHandler) { + processCatchHandler(catchHandler, tryBlock, tryEnd, finallyHandler, joinBlock, S, cfgRules); + } else if (finallyHandler) { + processFinallyOnly(finallyHandler, tryEnd, joinBlock, S, cfgRules); + } else { + if (tryEnd) S.addEdge(tryEnd, joinBlock, 'fallthrough'); + } - // Try body - const tryBody = tryStmt.childForFieldName('body'); - let tryBodyStart; - let tryStmts; - if (tryBody) { - tryBodyStart = tryBody.startPosition.row + 1; - tryStmts = getBodyStatements(tryBody); - } else { - tryBodyStart = tryStmt.startPosition.row + 1; - tryStmts = []; - for (let i = 0; i < tryStmt.namedChildCount; i++) { - const child = tryStmt.namedChild(i); - if (cfgRules.catchNode && child.type === cfgRules.catchNode) continue; - if (cfgRules.finallyNode && child.type === cfgRules.finallyNode) continue; - tryStmts.push(child); - } - } + return joinBlock; +} - const tryBlock = S.makeBlock('body', tryBodyStart, null, 'try'); - S.addEdge(currentBlock, tryBlock, 'fallthrough'); - const tryEnd = processStatements(tryStmts, tryBlock); +function findTryHandlers(tryStmt, cfgRules) { + let catchHandler = null; + let finallyHandler = null; + for (let i = 0; i < tryStmt.namedChildCount; i++) { + const child = tryStmt.namedChild(i); + if (cfgRules.catchNode && child.type === cfgRules.catchNode) catchHandler = child; + if (cfgRules.finallyNode && child.type === cfgRules.finallyNode) finallyHandler = child; + } + return { catchHandler, finallyHandler }; +} - // Find catch and finally handlers - let catchHandler = null; - let finallyHandler = null; - for (let i = 0; i < tryStmt.namedChildCount; i++) { - const child = tryStmt.namedChild(i); - if (cfgRules.catchNode && child.type === cfgRules.catchNode) catchHandler = child; - if (cfgRules.finallyNode && child.type === cfgRules.finallyNode) finallyHandler = child; +function processCatchHandler( + catchHandler, + tryBlock, + tryEnd, + finallyHandler, + joinBlock, + S, + cfgRules, +) { + const catchBlock = S.makeBlock('catch', catchHandler.startPosition.row + 1, null, 'catch'); + S.addEdge(tryBlock, catchBlock, 'exception'); + + const catchBodyNode = catchHandler.childForFieldName('body'); + let catchStmts; + if (catchBodyNode) { + catchStmts = getBodyStatements(catchBodyNode, cfgRules); + } else { + catchStmts = []; + for (let i = 0; i < catchHandler.namedChildCount; i++) { + catchStmts.push(catchHandler.namedChild(i)); } + } + const catchEnd = processStatements(catchStmts, catchBlock, S, cfgRules); + + if (finallyHandler) { + const finallyBlock = S.makeBlock( + 'finally', + finallyHandler.startPosition.row + 1, + null, + 'finally', + ); + if (tryEnd) S.addEdge(tryEnd, finallyBlock, 'fallthrough'); + if (catchEnd) S.addEdge(catchEnd, finallyBlock, 'fallthrough'); + + const finallyBodyNode = finallyHandler.childForFieldName('body'); + const finallyStmts = finallyBodyNode + ? getBodyStatements(finallyBodyNode, cfgRules) + : getBodyStatements(finallyHandler, cfgRules); + const finallyEnd = processStatements(finallyStmts, finallyBlock, S, cfgRules); + if (finallyEnd) S.addEdge(finallyEnd, joinBlock, 'fallthrough'); + } else { + if (tryEnd) S.addEdge(tryEnd, joinBlock, 'fallthrough'); + if (catchEnd) S.addEdge(catchEnd, joinBlock, 'fallthrough'); + } +} - if (catchHandler) { - const catchBlock = S.makeBlock('catch', catchHandler.startPosition.row + 1, null, 'catch'); - S.addEdge(tryBlock, catchBlock, 'exception'); +function processFinallyOnly(finallyHandler, tryEnd, joinBlock, S, cfgRules) { + const finallyBlock = S.makeBlock( + 'finally', + finallyHandler.startPosition.row + 1, + null, + 'finally', + ); + if (tryEnd) S.addEdge(tryEnd, finallyBlock, 'fallthrough'); + + const finallyBodyNode = finallyHandler.childForFieldName('body'); + const finallyStmts = finallyBodyNode + ? getBodyStatements(finallyBodyNode, cfgRules) + : getBodyStatements(finallyHandler, cfgRules); + const finallyEnd = processStatements(finallyStmts, finallyBlock, S, cfgRules); + if (finallyEnd) S.addEdge(finallyEnd, joinBlock, 'fallthrough'); +} - const catchBodyNode = catchHandler.childForFieldName('body'); - let catchStmts; - if (catchBodyNode) { - catchStmts = getBodyStatements(catchBodyNode); - } else { - catchStmts = []; - for (let i = 0; i < catchHandler.namedChildCount; i++) { - catchStmts.push(catchHandler.namedChild(i)); - } - } - const catchEnd = processStatements(catchStmts, catchBlock); - - if (finallyHandler) { - const finallyBlock = S.makeBlock( - 'finally', - finallyHandler.startPosition.row + 1, - null, - 'finally', - ); - if (tryEnd) S.addEdge(tryEnd, finallyBlock, 'fallthrough'); - if (catchEnd) S.addEdge(catchEnd, finallyBlock, 'fallthrough'); - - const finallyBodyNode = finallyHandler.childForFieldName('body'); - const finallyStmts = finallyBodyNode - ? getBodyStatements(finallyBodyNode) - : getBodyStatements(finallyHandler); - const finallyEnd = processStatements(finallyStmts, finallyBlock); - if (finallyEnd) S.addEdge(finallyEnd, joinBlock, 'fallthrough'); - } else { - if (tryEnd) S.addEdge(tryEnd, joinBlock, 'fallthrough'); - if (catchEnd) S.addEdge(catchEnd, joinBlock, 'fallthrough'); - } - } else if (finallyHandler) { - const finallyBlock = S.makeBlock( - 'finally', - finallyHandler.startPosition.row + 1, - null, - 'finally', - ); - if (tryEnd) S.addEdge(tryEnd, finallyBlock, 'fallthrough'); - - const finallyBodyNode = finallyHandler.childForFieldName('body'); - const finallyStmts = finallyBodyNode - ? getBodyStatements(finallyBodyNode) - : getBodyStatements(finallyHandler); - const finallyEnd = processStatements(finallyStmts, finallyBlock); - if (finallyEnd) S.addEdge(finallyEnd, joinBlock, 'fallthrough'); - } else { - if (tryEnd) S.addEdge(tryEnd, joinBlock, 'fallthrough'); - } +// ── Enter-function body processing ────────────────────────────────────── + +function processFunctionBody(funcNode, S, cfgRules) { + const body = funcNode.childForFieldName('body'); + if (!body) { + // No body — entry → exit + S.blocks.length = 2; + S.edges.length = 0; + S.addEdge(S.entryBlock, S.exitBlock, 'fallthrough'); + S.currentBlock = null; + return; + } - return joinBlock; + if (!isBlockNode(body.type, cfgRules)) { + // Expression body (e.g., arrow function `(x) => x + 1`) + const bodyBlock = S.blocks[2]; + bodyBlock.startLine = body.startPosition.row + 1; + bodyBlock.endLine = body.endPosition.row + 1; + S.addEdge(bodyBlock, S.exitBlock, 'fallthrough'); + S.currentBlock = null; + return; } - // ── Visitor interface ─────────────────────────────────────────────── + // Block body — process statements + const stmts = getBodyStatements(body, cfgRules); + if (stmts.length === 0) { + S.blocks.length = 2; + S.edges.length = 0; + S.addEdge(S.entryBlock, S.exitBlock, 'fallthrough'); + S.currentBlock = null; + return; + } + + const firstBody = S.blocks[2]; + const lastBlock = processStatements(stmts, firstBody, S, cfgRules); + if (lastBlock) { + S.addEdge(lastBlock, S.exitBlock, 'fallthrough'); + } + S.currentBlock = null; +} + +// ── Visitor factory ───────────────────────────────────────────────────── + +/** + * Create a CFG visitor for use with walkWithVisitors. + * + * @param {object} cfgRules - CFG_RULES for the language + * @returns {Visitor} + */ +export function createCfgVisitor(cfgRules) { + const funcStateStack = []; + let S = null; + const results = []; return { name: 'cfg', functionNodeTypes: cfgRules.functionNodes, enterFunction(funcNode, _funcName, _context) { - if (S) { - // Nested function — push current state - funcStateStack.push(S); - } + if (S) funcStateStack.push(S); S = makeFuncState(); S.funcNode = funcNode; - - // Check for expression body (arrow functions): no block body - const body = funcNode.childForFieldName('body'); - if (!body) { - // No body at all — entry → exit - // Remove the firstBody block and its edge - S.blocks.length = 2; // keep entry + exit - S.edges.length = 0; - S.addEdge(S.entryBlock, S.exitBlock, 'fallthrough'); - S.currentBlock = null; - return; - } - - if (!isBlockNode(body.type)) { - // Expression body (e.g., arrow function `(x) => x + 1`) - // entry → body → exit (body is the expression) - const bodyBlock = S.blocks[2]; // the firstBody we already created - bodyBlock.startLine = body.startPosition.row + 1; - bodyBlock.endLine = body.endPosition.row + 1; - S.addEdge(bodyBlock, S.exitBlock, 'fallthrough'); - S.currentBlock = null; // no further processing needed - return; - } - - // Block body — process statements - const stmts = getBodyStatements(body); - if (stmts.length === 0) { - // Empty function - S.blocks.length = 2; - S.edges.length = 0; - S.addEdge(S.entryBlock, S.exitBlock, 'fallthrough'); - S.currentBlock = null; - return; - } - - // Process all body statements using the statement-level processor - const firstBody = S.blocks[2]; // the firstBody block - const lastBlock = processStatements(stmts, firstBody); - if (lastBlock) { - S.addEdge(lastBlock, S.exitBlock, 'fallthrough'); - } - S.currentBlock = null; // done processing + processFunctionBody(funcNode, S, cfgRules); }, exitFunction(funcNode, _funcName, _context) { if (S && S.funcNode === funcNode) { - // Derive cyclomatic complexity from CFG: E - N + 2 const cyclomatic = S.edges.length - S.blocks.length + 2; results.push({ funcNode: S.funcNode, @@ -768,21 +758,17 @@ export function createCfgVisitor(cfgRules) { cyclomatic: Math.max(cyclomatic, 1), }); } - - // Pop to parent function state (if nested) S = funcStateStack.length > 0 ? funcStateStack.pop() : null; }, enterNode(_node, _context) { - // No-op — all CFG construction is done in enterFunction via - // processStatements. We intentionally do NOT return skipChildren here - // so that the walker still recurses into children, allowing nested - // function definitions to trigger enterFunction/exitFunction and get - // their own CFG computed via the funcStateStack. + // No-op — all CFG construction is done in enterFunction via processStatements. + // We intentionally do NOT return skipChildren so the walker recurses into + // children, allowing nested functions to trigger enterFunction/exitFunction. }, exitNode(_node, _context) { - // No-op — all work done in enterFunction/exitFunction + // No-op }, finish() { diff --git a/src/ast-analysis/visitors/complexity-visitor.js b/src/ast-analysis/visitors/complexity-visitor.js index df386afc..ca19c0c5 100644 --- a/src/ast-analysis/visitors/complexity-visitor.js +++ b/src/ast-analysis/visitors/complexity-visitor.js @@ -12,6 +12,122 @@ import { computeMaintainabilityIndex, } from '../metrics.js'; +// ── Halstead classification ───────────────────────────────────────────── + +function classifyHalstead(node, hRules, acc) { + const type = node.type; + if (hRules.skipTypes.has(type)) acc.halsteadSkipDepth++; + if (acc.halsteadSkipDepth > 0) return; + + if (hRules.compoundOperators.has(type)) { + acc.operators.set(type, (acc.operators.get(type) || 0) + 1); + } + if (node.childCount === 0) { + if (hRules.operatorLeafTypes.has(type)) { + acc.operators.set(type, (acc.operators.get(type) || 0) + 1); + } else if (hRules.operandLeafTypes.has(type)) { + const text = node.text; + acc.operands.set(text, (acc.operands.get(text) || 0) + 1); + } + } +} + +// ── Branch complexity classification ──────────────────────────────────── + +function classifyBranchNode(node, type, nestingLevel, cRules, acc) { + // Pattern A: else clause wraps if (JS/C#/Rust) + if (cRules.elseNodeType && type === cRules.elseNodeType) { + const firstChild = node.namedChild(0); + if (firstChild && firstChild.type === cRules.ifNodeType) { + // else-if: the if_statement child handles its own increment + return; + } + acc.cognitive++; + return; + } + + // Pattern B: explicit elif node (Python/Ruby/PHP) + if (cRules.elifNodeType && type === cRules.elifNodeType) { + acc.cognitive++; + acc.cyclomatic++; + return; + } + + // Detect else-if via Pattern A or C + let isElseIf = false; + if (type === cRules.ifNodeType) { + if (cRules.elseViaAlternative) { + isElseIf = + node.parent?.type === cRules.ifNodeType && + node.parent.childForFieldName('alternative')?.id === node.id; + } else if (cRules.elseNodeType) { + isElseIf = node.parent?.type === cRules.elseNodeType; + } + } + + if (isElseIf) { + acc.cognitive++; + acc.cyclomatic++; + return; + } + + // Regular branch node + acc.cognitive += 1 + nestingLevel; + acc.cyclomatic++; + + if (cRules.switchLikeNodes?.has(type)) { + acc.cyclomatic--; + } +} + +// ── Plain-else detection (Pattern C: Go/Java) ────────────────────────── + +function classifyPlainElse(node, type, cRules, acc) { + if ( + cRules.elseViaAlternative && + type !== cRules.ifNodeType && + node.parent?.type === cRules.ifNodeType && + node.parent.childForFieldName('alternative')?.id === node.id + ) { + acc.cognitive++; + } +} + +// ── Result collection ─────────────────────────────────────────────────── + +function collectResult(funcNode, acc, hRules, langId) { + const halstead = + hRules && acc.operators && acc.operands + ? computeHalsteadDerived(acc.operators, acc.operands) + : null; + const loc = computeLOCMetrics(funcNode, langId); + const volume = halstead ? halstead.volume : 0; + const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; + const mi = computeMaintainabilityIndex(volume, acc.cyclomatic, loc.sloc, commentRatio); + + return { + cognitive: acc.cognitive, + cyclomatic: acc.cyclomatic, + maxNesting: acc.maxNesting, + halstead, + loc, + mi, + }; +} + +function resetAccumulators(hRules) { + return { + cognitive: 0, + cyclomatic: 1, + maxNesting: 0, + operators: hRules ? new Map() : null, + operands: hRules ? new Map() : null, + halsteadSkipDepth: 0, + }; +} + +// ── Visitor factory ───────────────────────────────────────────────────── + /** * Create a complexity visitor for use with walkWithVisitors. * @@ -28,43 +144,12 @@ import { export function createComplexityVisitor(cRules, hRules, options = {}) { const { fileLevelWalk = false, langId = null } = options; - // Per-function accumulators - let cognitive = 0; - let cyclomatic = 1; - let maxNesting = 0; - let operators = hRules ? new Map() : null; - let operands = hRules ? new Map() : null; - let halsteadSkipDepth = 0; - - // In file-level mode, we only count when inside a function + let acc = resetAccumulators(hRules); let activeFuncNode = null; let activeFuncName = null; - // Nesting depth relative to the active function (for nested functions) let funcDepth = 0; - - // Collected results (one per function) const results = []; - function reset() { - cognitive = 0; - cyclomatic = 1; - maxNesting = 0; - operators = hRules ? new Map() : null; - operands = hRules ? new Map() : null; - halsteadSkipDepth = 0; - } - - function collectResult(funcNode) { - const halstead = - hRules && operators && operands ? computeHalsteadDerived(operators, operands) : null; - const loc = computeLOCMetrics(funcNode, langId); - const volume = halstead ? halstead.volume : 0; - const commentRatio = loc.loc > 0 ? loc.commentLines / loc.loc : 0; - const mi = computeMaintainabilityIndex(volume, cyclomatic, loc.sloc, commentRatio); - - return { cognitive, cyclomatic, maxNesting, halstead, loc, mi }; - } - return { name: 'complexity', functionNodeTypes: cRules.functionNodes, @@ -72,17 +157,14 @@ export function createComplexityVisitor(cRules, hRules, options = {}) { enterFunction(funcNode, funcName, _context) { if (fileLevelWalk) { if (!activeFuncNode) { - // Top-level function: start fresh - reset(); + acc = resetAccumulators(hRules); activeFuncNode = funcNode; activeFuncName = funcName; funcDepth = 0; } else { - // Nested function: increase nesting for complexity funcDepth++; } } else { - // Function-level mode: track nested functions for correct nesting depth funcDepth++; } }, @@ -90,11 +172,10 @@ export function createComplexityVisitor(cRules, hRules, options = {}) { exitFunction(funcNode, _funcName, _context) { if (fileLevelWalk) { if (funcNode === activeFuncNode) { - // Leaving the top-level function: emit result results.push({ funcNode, funcName: activeFuncName, - metrics: collectResult(funcNode), + metrics: collectResult(funcNode, acc, hRules, langId), }); activeFuncNode = null; activeFuncName = null; @@ -107,137 +188,52 @@ export function createComplexityVisitor(cRules, hRules, options = {}) { }, enterNode(node, context) { - // In file-level mode, skip nodes outside any function if (fileLevelWalk && !activeFuncNode) return; const type = node.type; const nestingLevel = fileLevelWalk ? context.nestingLevel + funcDepth : context.nestingLevel; - // ── Halstead classification ── - if (hRules) { - if (hRules.skipTypes.has(type)) halsteadSkipDepth++; - if (halsteadSkipDepth === 0) { - if (hRules.compoundOperators.has(type)) { - operators.set(type, (operators.get(type) || 0) + 1); - } - if (node.childCount === 0) { - if (hRules.operatorLeafTypes.has(type)) { - operators.set(type, (operators.get(type) || 0) + 1); - } else if (hRules.operandLeafTypes.has(type)) { - const text = node.text; - operands.set(text, (operands.get(text) || 0) + 1); - } - } - } - } + if (hRules) classifyHalstead(node, hRules, acc); - // ── Complexity: track nesting depth ── - if (nestingLevel > maxNesting) maxNesting = nestingLevel; + if (nestingLevel > acc.maxNesting) acc.maxNesting = nestingLevel; - // Handle logical operators in binary expressions + // Logical operators in binary expressions if (type === cRules.logicalNodeType) { const op = node.child(1)?.type; if (op && cRules.logicalOperators.has(op)) { - cyclomatic++; + acc.cyclomatic++; const parent = node.parent; let sameSequence = false; if (parent && parent.type === cRules.logicalNodeType) { const parentOp = parent.child(1)?.type; if (parentOp === op) sameSequence = true; } - if (!sameSequence) cognitive++; - // Don't skip children — walker handles recursion + if (!sameSequence) acc.cognitive++; } } - // Handle optional chaining (cyclomatic only) - if (type === cRules.optionalChainType) { - cyclomatic++; - } + // Optional chaining (cyclomatic only) + if (type === cRules.optionalChainType) acc.cyclomatic++; - // Handle branch/control flow nodes (skip keyword leaf tokens) + // Branch/control flow nodes (skip keyword leaf tokens) if (cRules.branchNodes.has(type) && node.childCount > 0) { - // Pattern A: else clause wraps if (JS/C#/Rust) - if (cRules.elseNodeType && type === cRules.elseNodeType) { - const firstChild = node.namedChild(0); - if (firstChild && firstChild.type === cRules.ifNodeType) { - // else-if: the if_statement child handles its own increment - return; - } - cognitive++; - return; - } - - // Pattern B: explicit elif node (Python/Ruby/PHP) - if (cRules.elifNodeType && type === cRules.elifNodeType) { - cognitive++; - cyclomatic++; - return; - } - - // Detect else-if via Pattern A or C - let isElseIf = false; - if (type === cRules.ifNodeType) { - if (cRules.elseViaAlternative) { - isElseIf = - node.parent?.type === cRules.ifNodeType && - node.parent.childForFieldName('alternative')?.id === node.id; - } else if (cRules.elseNodeType) { - isElseIf = node.parent?.type === cRules.elseNodeType; - } - } - - if (isElseIf) { - cognitive++; - cyclomatic++; - return; - } - - // Regular branch node - cognitive += 1 + nestingLevel; - cyclomatic++; - - if (cRules.switchLikeNodes?.has(type)) { - cyclomatic--; - } - - // Nesting nodes are handled by the walker's nestingNodeTypes option - // But we still need them to count in complexity — they already do above - } - - // Pattern C plain else: block that is the alternative of an if_statement (Go/Java) - if ( - cRules.elseViaAlternative && - type !== cRules.ifNodeType && - node.parent?.type === cRules.ifNodeType && - node.parent.childForFieldName('alternative')?.id === node.id - ) { - cognitive++; + classifyBranchNode(node, type, nestingLevel, cRules, acc); } - // Handle case nodes (cyclomatic only, skip keyword leaves) - if (cRules.caseNodes.has(type) && node.childCount > 0) { - cyclomatic++; - } + // Pattern C plain else (Go/Java) + classifyPlainElse(node, type, cRules, acc); - // Handle nested function definitions (increase nesting) - // In file-level mode funcDepth handles this; in function-level mode the - // nestingNodeTypes option should include function nodes + // Case nodes (cyclomatic only, skip keyword leaves) + if (cRules.caseNodes.has(type) && node.childCount > 0) acc.cyclomatic++; }, exitNode(node) { - // Decrement skip depth when leaving a skip-type subtree - if (hRules?.skipTypes.has(node.type)) { - halsteadSkipDepth--; - } + if (hRules?.skipTypes.has(node.type)) acc.halsteadSkipDepth--; }, finish() { - if (fileLevelWalk) { - return results; - } - // Function-level mode: return single result (no funcNode reference needed) - return collectResult({ text: '' }); + if (fileLevelWalk) return results; + return collectResult({ text: '' }, acc, hRules, langId); }, }; } diff --git a/src/ast-analysis/visitors/dataflow-visitor.js b/src/ast-analysis/visitors/dataflow-visitor.js index c6fe9fa9..644490be 100644 --- a/src/ast-analysis/visitors/dataflow-visitor.js +++ b/src/ast-analysis/visitors/dataflow-visitor.js @@ -21,254 +21,280 @@ import { truncate, } from '../visitor-utils.js'; -/** - * Create a dataflow visitor for use with walkWithVisitors. - * - * @param {object} rules - DATAFLOW_RULES for the language - * @returns {Visitor} - */ -export function createDataflowVisitor(rules) { - const isCallNode = rules.callNodes ? (t) => rules.callNodes.has(t) : (t) => t === rules.callNode; - - const parameters = []; - const returns = []; - const assignments = []; - const argFlows = []; - const mutations = []; +// ── Scope helpers ─────────────────────────────────────────────────────── - const scopeStack = []; +function currentScope(scopeStack) { + return scopeStack.length > 0 ? scopeStack[scopeStack.length - 1] : null; +} - function currentScope() { - return scopeStack.length > 0 ? scopeStack[scopeStack.length - 1] : null; +function findBinding(name, scopeStack) { + for (let i = scopeStack.length - 1; i >= 0; i--) { + const scope = scopeStack[i]; + if (scope.params.has(name)) + return { type: 'param', index: scope.params.get(name), funcName: scope.funcName }; + if (scope.locals.has(name)) + return { type: 'local', source: scope.locals.get(name), funcName: scope.funcName }; } + return null; +} - function findBinding(name) { - for (let i = scopeStack.length - 1; i >= 0; i--) { - const scope = scopeStack[i]; - if (scope.params.has(name)) - return { type: 'param', index: scope.params.get(name), funcName: scope.funcName }; - if (scope.locals.has(name)) - return { type: 'local', source: scope.locals.get(name), funcName: scope.funcName }; - } - return null; +function bindingConfidence(binding) { + if (!binding) return 0.5; + if (binding.type === 'param') return 1.0; + if (binding.type === 'local') { + if (binding.source?.type === 'call_return') return 0.9; + if (binding.source?.type === 'destructured') return 0.8; + return 0.9; } + return 0.5; +} - function bindingConfidence(binding) { - if (!binding) return 0.5; - if (binding.type === 'param') return 1.0; - if (binding.type === 'local') { - if (binding.source?.type === 'call_return') return 0.9; - if (binding.source?.type === 'destructured') return 0.8; - return 0.9; - } - return 0.5; - } +// ── Node helpers ──────────────────────────────────────────────────────── - function unwrapAwait(node) { - if (rules.awaitNode && node.type === rules.awaitNode) { - return node.namedChildren[0] || node; - } - return node; +function unwrapAwait(node, rules) { + if (rules.awaitNode && node.type === rules.awaitNode) { + return node.namedChildren[0] || node; } + return node; +} - function isCall(node) { - return node && isCallNode(node.type); - } +function isCall(node, isCallNode) { + return node && isCallNode(node.type); +} - function handleVarDeclarator(node) { - let nameNode = node.childForFieldName(rules.varNameField); - let valueNode = rules.varValueField ? node.childForFieldName(rules.varValueField) : null; +// ── Node handlers ─────────────────────────────────────────────────────── - if (!valueNode && rules.equalsClauseType) { - for (const child of node.namedChildren) { - if (child.type === rules.equalsClauseType) { - valueNode = child.childForFieldName('value') || child.namedChildren[0]; - break; - } - } - } +function handleVarDeclarator(node, rules, scopeStack, assignments, isCallNode) { + let nameNode = node.childForFieldName(rules.varNameField); + let valueNode = rules.varValueField ? node.childForFieldName(rules.varValueField) : null; - if (!valueNode) { - for (const child of node.namedChildren) { - if (child !== nameNode && isCall(unwrapAwait(child))) { - valueNode = child; - break; - } + if (!valueNode && rules.equalsClauseType) { + for (const child of node.namedChildren) { + if (child.type === rules.equalsClauseType) { + valueNode = child.childForFieldName('value') || child.namedChildren[0]; + break; } } + } - if (rules.expressionListType) { - if (nameNode?.type === rules.expressionListType) nameNode = nameNode.namedChildren[0]; - if (valueNode?.type === rules.expressionListType) valueNode = valueNode.namedChildren[0]; + if (!valueNode) { + for (const child of node.namedChildren) { + if (child !== nameNode && isCall(unwrapAwait(child, rules), isCallNode)) { + valueNode = child; + break; + } } + } - const scope = currentScope(); - if (!nameNode || !valueNode || !scope) return; - - const unwrapped = unwrapAwait(valueNode); - const callExpr = isCall(unwrapped) ? unwrapped : null; + if (rules.expressionListType) { + if (nameNode?.type === rules.expressionListType) nameNode = nameNode.namedChildren[0]; + if (valueNode?.type === rules.expressionListType) valueNode = valueNode.namedChildren[0]; + } - if (callExpr) { - const callee = resolveCalleeName(callExpr, rules); - if (callee && scope.funcName) { - if ( - (rules.objectDestructType && nameNode.type === rules.objectDestructType) || - (rules.arrayDestructType && nameNode.type === rules.arrayDestructType) - ) { - const names = extractParamNames(nameNode, rules); - for (const n of names) { - assignments.push({ - varName: n, - callerFunc: scope.funcName, - sourceCallName: callee, - expression: truncate(node.text), - line: node.startPosition.row + 1, - }); - scope.locals.set(n, { type: 'destructured', callee }); - } - } else { - const varName = - nameNode.type === 'identifier' || nameNode.type === rules.paramIdentifier - ? nameNode.text - : nameNode.text; + const scope = currentScope(scopeStack); + if (!nameNode || !valueNode || !scope) return; + + const unwrapped = unwrapAwait(valueNode, rules); + const callExpr = isCall(unwrapped, isCallNode) ? unwrapped : null; + + if (callExpr) { + const callee = resolveCalleeName(callExpr, rules); + if (callee && scope.funcName) { + if ( + (rules.objectDestructType && nameNode.type === rules.objectDestructType) || + (rules.arrayDestructType && nameNode.type === rules.arrayDestructType) + ) { + const names = extractParamNames(nameNode, rules); + for (const n of names) { assignments.push({ - varName, + varName: n, callerFunc: scope.funcName, sourceCallName: callee, expression: truncate(node.text), line: node.startPosition.row + 1, }); - scope.locals.set(varName, { type: 'call_return', callee }); + scope.locals.set(n, { type: 'destructured', callee }); } + } else { + const varName = + nameNode.type === 'identifier' || nameNode.type === rules.paramIdentifier + ? nameNode.text + : nameNode.text; + assignments.push({ + varName, + callerFunc: scope.funcName, + sourceCallName: callee, + expression: truncate(node.text), + line: node.startPosition.row + 1, + }); + scope.locals.set(varName, { type: 'call_return', callee }); } } } +} - function handleAssignment(node) { - const left = node.childForFieldName(rules.assignLeftField); - const right = node.childForFieldName(rules.assignRightField); - const scope = currentScope(); - if (!scope?.funcName) return; - - if (left && rules.memberNode && left.type === rules.memberNode) { - const receiver = memberReceiver(left, rules); - if (receiver) { - const binding = findBinding(receiver); - if (binding) { - mutations.push({ - funcName: scope.funcName, - receiverName: receiver, - binding, - mutatingExpr: truncate(node.text), - line: node.startPosition.row + 1, - }); - } +function handleAssignment(node, rules, scopeStack, assignments, mutations, isCallNode) { + const left = node.childForFieldName(rules.assignLeftField); + const right = node.childForFieldName(rules.assignRightField); + const scope = currentScope(scopeStack); + if (!scope?.funcName) return; + + if (left && rules.memberNode && left.type === rules.memberNode) { + const receiver = memberReceiver(left, rules); + if (receiver) { + const binding = findBinding(receiver, scopeStack); + if (binding) { + mutations.push({ + funcName: scope.funcName, + receiverName: receiver, + binding, + mutatingExpr: truncate(node.text), + line: node.startPosition.row + 1, + }); } } + } - if (left && isIdent(left.type, rules) && right) { - const unwrapped = unwrapAwait(right); - const callExpr = isCall(unwrapped) ? unwrapped : null; - if (callExpr) { - const callee = resolveCalleeName(callExpr, rules); - if (callee) { - assignments.push({ - varName: left.text, - callerFunc: scope.funcName, - sourceCallName: callee, - expression: truncate(node.text), - line: node.startPosition.row + 1, - }); - scope.locals.set(left.text, { type: 'call_return', callee }); - } + if (left && isIdent(left.type, rules) && right) { + const unwrapped = unwrapAwait(right, rules); + const callExpr = isCall(unwrapped, isCallNode) ? unwrapped : null; + if (callExpr) { + const callee = resolveCalleeName(callExpr, rules); + if (callee) { + assignments.push({ + varName: left.text, + callerFunc: scope.funcName, + sourceCallName: callee, + expression: truncate(node.text), + line: node.startPosition.row + 1, + }); + scope.locals.set(left.text, { type: 'call_return', callee }); } } } +} - function handleCallExpr(node) { - const callee = resolveCalleeName(node, rules); - const argsNode = node.childForFieldName(rules.callArgsField); - const scope = currentScope(); - if (!callee || !argsNode || !scope?.funcName) return; +function handleCallExpr(node, rules, scopeStack, argFlows) { + const callee = resolveCalleeName(node, rules); + const argsNode = node.childForFieldName(rules.callArgsField); + const scope = currentScope(scopeStack); + if (!callee || !argsNode || !scope?.funcName) return; - let argIndex = 0; - for (let arg of argsNode.namedChildren) { - if (rules.argumentWrapperType && arg.type === rules.argumentWrapperType) { - arg = arg.namedChildren[0] || arg; - } - const unwrapped = - rules.spreadType && arg.type === rules.spreadType ? arg.namedChildren[0] || arg : arg; - if (!unwrapped) { - argIndex++; - continue; - } + let argIndex = 0; + for (let arg of argsNode.namedChildren) { + if (rules.argumentWrapperType && arg.type === rules.argumentWrapperType) { + arg = arg.namedChildren[0] || arg; + } + const unwrapped = + rules.spreadType && arg.type === rules.spreadType ? arg.namedChildren[0] || arg : arg; + if (!unwrapped) { + argIndex++; + continue; + } - const argName = isIdent(unwrapped.type, rules) ? unwrapped.text : null; - const argMember = - rules.memberNode && unwrapped.type === rules.memberNode - ? memberReceiver(unwrapped, rules) - : null; - const trackedName = argName || argMember; - - if (trackedName) { - const binding = findBinding(trackedName); - if (binding) { - argFlows.push({ - callerFunc: scope.funcName, - calleeName: callee, - argIndex, - argName: trackedName, - binding, - confidence: bindingConfidence(binding), - expression: truncate(arg.text), - line: node.startPosition.row + 1, - }); - } + const argName = isIdent(unwrapped.type, rules) ? unwrapped.text : null; + const argMember = + rules.memberNode && unwrapped.type === rules.memberNode + ? memberReceiver(unwrapped, rules) + : null; + const trackedName = argName || argMember; + + if (trackedName) { + const binding = findBinding(trackedName, scopeStack); + if (binding) { + argFlows.push({ + callerFunc: scope.funcName, + calleeName: callee, + argIndex, + argName: trackedName, + binding, + confidence: bindingConfidence(binding), + expression: truncate(arg.text), + line: node.startPosition.row + 1, + }); } - argIndex++; } + argIndex++; } +} - function handleExprStmtMutation(node) { - if (rules.mutatingMethods.size === 0) return; - const expr = node.namedChildren[0]; - if (!expr || !isCall(expr)) return; +function handleExprStmtMutation(node, rules, scopeStack, mutations, isCallNode) { + if (rules.mutatingMethods.size === 0) return; + const expr = node.namedChildren[0]; + if (!expr || !isCall(expr, isCallNode)) return; - let methodName = null; - let receiver = null; + let methodName = null; + let receiver = null; - const fn = expr.childForFieldName(rules.callFunctionField); - if (fn && fn.type === rules.memberNode) { - const prop = fn.childForFieldName(rules.memberPropertyField); - methodName = prop ? prop.text : null; - receiver = memberReceiver(fn, rules); - } + const fn = expr.childForFieldName(rules.callFunctionField); + if (fn && fn.type === rules.memberNode) { + const prop = fn.childForFieldName(rules.memberPropertyField); + methodName = prop ? prop.text : null; + receiver = memberReceiver(fn, rules); + } - if (!receiver && rules.callObjectField) { - const obj = expr.childForFieldName(rules.callObjectField); - const name = expr.childForFieldName(rules.callFunctionField); - if (obj && name) { - methodName = name.text; - receiver = isIdent(obj.type, rules) ? obj.text : null; - } + if (!receiver && rules.callObjectField) { + const obj = expr.childForFieldName(rules.callObjectField); + const name = expr.childForFieldName(rules.callFunctionField); + if (obj && name) { + methodName = name.text; + receiver = isIdent(obj.type, rules) ? obj.text : null; } + } - if (!methodName || !rules.mutatingMethods.has(methodName)) return; + if (!methodName || !rules.mutatingMethods.has(methodName)) return; - const scope = currentScope(); - if (!receiver || !scope?.funcName) return; + const scope = currentScope(scopeStack); + if (!receiver || !scope?.funcName) return; - const binding = findBinding(receiver); - if (binding) { - mutations.push({ - funcName: scope.funcName, - receiverName: receiver, - binding, - mutatingExpr: truncate(expr.text), - line: node.startPosition.row + 1, - }); - } + const binding = findBinding(receiver, scopeStack); + if (binding) { + mutations.push({ + funcName: scope.funcName, + receiverName: receiver, + binding, + mutatingExpr: truncate(expr.text), + line: node.startPosition.row + 1, + }); } +} + +// ── Return statement handler ──────────────────────────────────────────── + +function handleReturn(node, rules, scopeStack, returns) { + if (node.parent?.type === rules.returnNode) return; // keyword token, not statement + + const scope = currentScope(scopeStack); + if (scope?.funcName) { + const expr = node.namedChildren[0]; + const referencedNames = []; + if (expr) collectIdentifiers(expr, referencedNames, rules); + returns.push({ + funcName: scope.funcName, + expression: truncate(expr ? expr.text : ''), + referencedNames, + line: node.startPosition.row + 1, + }); + } +} + +// ── Visitor factory ───────────────────────────────────────────────────── + +/** + * Create a dataflow visitor for use with walkWithVisitors. + * + * @param {object} rules - DATAFLOW_RULES for the language + * @returns {Visitor} + */ +export function createDataflowVisitor(rules) { + const isCallNode = rules.callNodes ? (t) => rules.callNodes.has(t) : (t) => t === rules.callNode; + + const parameters = []; + const returns = []; + const assignments = []; + const argFlows = []; + const mutations = []; + const scopeStack = []; return { name: 'dataflow', @@ -300,54 +326,34 @@ export function createDataflowVisitor(rules) { enterNode(node, _context) { const t = node.type; - // Skip function nodes — handled by enterFunction/exitFunction if (rules.functionNodes.has(t)) return; - // Return statements (skip keyword tokens inside return statements, e.g. Ruby's - // `return` node nests a `return` keyword child with the same type string) if (rules.returnNode && t === rules.returnNode) { - if (node.parent?.type === rules.returnNode) return; // keyword token, not statement - - const scope = currentScope(); - if (scope?.funcName) { - const expr = node.namedChildren[0]; - const referencedNames = []; - if (expr) collectIdentifiers(expr, referencedNames, rules); - returns.push({ - funcName: scope.funcName, - expression: truncate(expr ? expr.text : ''), - referencedNames, - line: node.startPosition.row + 1, - }); - } + handleReturn(node, rules, scopeStack, returns); return; } - // Variable declarations if (rules.varDeclaratorNode && t === rules.varDeclaratorNode) { - handleVarDeclarator(node); + handleVarDeclarator(node, rules, scopeStack, assignments, isCallNode); return; } if (rules.varDeclaratorNodes?.has(t)) { - handleVarDeclarator(node); + handleVarDeclarator(node, rules, scopeStack, assignments, isCallNode); return; } - // Call expressions if (isCallNode(t)) { - handleCallExpr(node); + handleCallExpr(node, rules, scopeStack, argFlows); return; } - // Assignment expressions if (rules.assignmentNode && t === rules.assignmentNode) { - handleAssignment(node); + handleAssignment(node, rules, scopeStack, assignments, mutations, isCallNode); return; } - // Mutation detection via expression_statement if (rules.expressionStmtNode && t === rules.expressionStmtNode) { - handleExprStmtMutation(node); + handleExprStmtMutation(node, rules, scopeStack, mutations, isCallNode); } }, From 68fb0b3b50f8211d5fdcac860f41d565794a6808 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:24:42 -0600 Subject: [PATCH 02/22] refactor: decompose domain builder stages into focused helpers Extract edge-building by type (import, call-native, call-JS, class hierarchy) from buildEdges. Extract per-phase insertion logic from insertNodes. Extract scoped/incremental/full-build paths and reverse-dep cascade from detectChanges. Extract setup, engine init, alias loading from pipeline.js. Extract node/edge-building helpers from incremental.js rebuildFile. Impact: 44 functions changed, 19 affected Impact: 44 functions changed, 32 affected --- src/domain/graph/builder/incremental.js | 206 ++++--- src/domain/graph/builder/pipeline.js | 186 +++--- .../graph/builder/stages/build-edges.js | 557 ++++++++++-------- .../graph/builder/stages/detect-changes.js | 372 ++++++------ .../graph/builder/stages/insert-nodes.js | 286 ++++----- 5 files changed, 889 insertions(+), 718 deletions(-) diff --git a/src/domain/graph/builder/incremental.js b/src/domain/graph/builder/incremental.js index f04a136e..63694385 100644 --- a/src/domain/graph/builder/incremental.js +++ b/src/domain/graph/builder/incremental.js @@ -12,10 +12,121 @@ import { parseFileIncremental } from '../../parser.js'; import { computeConfidence, resolveImportPath } from '../resolve.js'; import { BUILTIN_RECEIVERS, readFileSafe } from './helpers.js'; +// ── Node insertion ────────────────────────────────────────────────────── + +function insertFileNodes(stmts, relPath, symbols) { + stmts.insertNode.run(relPath, 'file', relPath, 0, null); + for (const def of symbols.definitions) { + stmts.insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null); + } + for (const exp of symbols.exports) { + stmts.insertNode.run(exp.name, exp.kind, relPath, exp.line, null); + } +} + +// ── Import edge building ──────────────────────────────────────────────── + +function buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeId, aliases) { + let edgesAdded = 0; + for (const imp of symbols.imports) { + const resolvedPath = resolveImportPath( + path.join(rootDir, relPath), + imp.source, + rootDir, + aliases, + ); + const targetRow = stmts.getNodeId.get(resolvedPath, 'file', resolvedPath, 0); + if (targetRow) { + const edgeKind = imp.reexport ? 'reexports' : imp.typeOnly ? 'imports-type' : 'imports'; + stmts.insertEdge.run(fileNodeId, targetRow.id, edgeKind, 1.0, 0); + edgesAdded++; + } + } + return edgesAdded; +} + +function buildImportedNamesMap(symbols, rootDir, relPath, aliases) { + const importedNames = new Map(); + for (const imp of symbols.imports) { + const resolvedPath = resolveImportPath( + path.join(rootDir, relPath), + imp.source, + rootDir, + aliases, + ); + for (const name of imp.names) { + importedNames.set(name.replace(/^\*\s+as\s+/, ''), resolvedPath); + } + } + return importedNames; +} + +// ── Call edge building ────────────────────────────────────────────────── + +function findCaller(call, definitions, relPath, stmts) { + let caller = null; + let callerSpan = Infinity; + for (const def of definitions) { + if (def.line <= call.line) { + const end = def.endLine || Infinity; + if (call.line <= end) { + const span = end - def.line; + if (span < callerSpan) { + const row = stmts.getNodeId.get(def.name, def.kind, relPath, def.line); + if (row) { + caller = row; + callerSpan = span; + } + } + } else if (!caller) { + const row = stmts.getNodeId.get(def.name, def.kind, relPath, def.line); + if (row) caller = row; + } + } + } + return caller; +} + +function resolveCallTargets(stmts, call, relPath, importedNames) { + const importedFrom = importedNames.get(call.name); + let targets; + if (importedFrom) { + targets = stmts.findNodeInFile.all(call.name, importedFrom); + } + if (!targets || targets.length === 0) { + targets = stmts.findNodeInFile.all(call.name, relPath); + if (targets.length === 0) { + targets = stmts.findNodeByName.all(call.name); + } + } + return { targets, importedFrom }; +} + +function buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames) { + let edgesAdded = 0; + for (const call of symbols.calls) { + if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; + + const caller = findCaller(call, symbols.definitions, relPath, stmts) || fileNodeRow; + const { targets, importedFrom } = resolveCallTargets(stmts, call, relPath, importedNames); + + for (const t of targets) { + if (t.id !== caller.id) { + const confidence = computeConfidence(relPath, t.file, importedFrom ?? null); + stmts.insertEdge.run(caller.id, t.id, 'calls', confidence, call.dynamic ? 1 : 0); + edgesAdded++; + } + } + } + return edgesAdded; +} + +// ── Main entry point ──────────────────────────────────────────────────── + /** * Parse a single file and update the database incrementally. * - * @param {import('better-sqlite3').Database} db + * @param {import('better-sqlite3').Database} _db * @param {string} rootDir - Absolute root directory * @param {string} filePath - Absolute file path * @param {object} stmts - Prepared DB statements @@ -61,105 +172,20 @@ export async function rebuildFile(_db, rootDir, filePath, stmts, engineOpts, cac const symbols = await parseFileIncremental(cache, filePath, code, engineOpts); if (!symbols) return null; - // Insert nodes - stmts.insertNode.run(relPath, 'file', relPath, 0, null); - for (const def of symbols.definitions) { - stmts.insertNode.run(def.name, def.kind, relPath, def.line, def.endLine || null); - } - for (const exp of symbols.exports) { - stmts.insertNode.run(exp.name, exp.kind, relPath, exp.line, null); - } + insertFileNodes(stmts, relPath, symbols); const newNodes = stmts.countNodes.get(relPath)?.c || 0; const newSymbols = diffSymbols ? stmts.listSymbols.all(relPath) : []; - let edgesAdded = 0; const fileNodeRow = stmts.getNodeId.get(relPath, 'file', relPath, 0); if (!fileNodeRow) return { file: relPath, nodesAdded: newNodes, nodesRemoved: oldNodes, edgesAdded: 0 }; - const fileNodeId = fileNodeRow.id; - // Load aliases for import resolution const aliases = { baseUrl: null, paths: {} }; - // Import edges - for (const imp of symbols.imports) { - const resolvedPath = resolveImportPath( - path.join(rootDir, relPath), - imp.source, - rootDir, - aliases, - ); - const targetRow = stmts.getNodeId.get(resolvedPath, 'file', resolvedPath, 0); - if (targetRow) { - const edgeKind = imp.reexport ? 'reexports' : imp.typeOnly ? 'imports-type' : 'imports'; - stmts.insertEdge.run(fileNodeId, targetRow.id, edgeKind, 1.0, 0); - edgesAdded++; - } - } - - // Build import name → resolved file mapping - const importedNames = new Map(); - for (const imp of symbols.imports) { - const resolvedPath = resolveImportPath( - path.join(rootDir, relPath), - imp.source, - rootDir, - aliases, - ); - for (const name of imp.names) { - importedNames.set(name.replace(/^\*\s+as\s+/, ''), resolvedPath); - } - } - - // Call edges - for (const call of symbols.calls) { - if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; - - let caller = null; - let callerSpan = Infinity; - for (const def of symbols.definitions) { - if (def.line <= call.line) { - const end = def.endLine || Infinity; - if (call.line <= end) { - const span = end - def.line; - if (span < callerSpan) { - const row = stmts.getNodeId.get(def.name, def.kind, relPath, def.line); - if (row) { - caller = row; - callerSpan = span; - } - } - } else if (!caller) { - const row = stmts.getNodeId.get(def.name, def.kind, relPath, def.line); - if (row) caller = row; - } - } - } - if (!caller) caller = fileNodeRow; - - const importedFrom = importedNames.get(call.name); - let targets; - if (importedFrom) { - targets = stmts.findNodeInFile.all(call.name, importedFrom); - } - if (!targets || targets.length === 0) { - targets = stmts.findNodeInFile.all(call.name, relPath); - if (targets.length === 0) { - targets = stmts.findNodeByName.all(call.name); - } - } - - for (const t of targets) { - if (t.id !== caller.id) { - const confidence = importedFrom - ? computeConfidence(relPath, t.file, importedFrom) - : computeConfidence(relPath, t.file, null); - stmts.insertEdge.run(caller.id, t.id, 'calls', confidence, call.dynamic ? 1 : 0); - edgesAdded++; - } - } - } + let edgesAdded = buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeRow.id, aliases); + const importedNames = buildImportedNamesMap(symbols, rootDir, relPath, aliases); + edgesAdded += buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames); const symbolDiff = diffSymbols ? diffSymbols(oldSymbols, newSymbols) : null; const event = oldNodes === 0 ? 'added' : 'modified'; diff --git a/src/domain/graph/builder/pipeline.js b/src/domain/graph/builder/pipeline.js index ea9848c5..963a0086 100644 --- a/src/domain/graph/builder/pipeline.js +++ b/src/domain/graph/builder/pipeline.js @@ -23,94 +23,73 @@ import { parseFiles } from './stages/parse-files.js'; import { resolveImports } from './stages/resolve-imports.js'; import { runAnalyses } from './stages/run-analyses.js'; -/** - * Build the dependency graph for a codebase. - * - * Signature and return value are identical to the original monolithic buildGraph(). - * - * @param {string} rootDir - Root directory to scan - * @param {object} [opts] - Build options - * @returns {Promise<{ phases: object } | undefined>} - */ -export async function buildGraph(rootDir, opts = {}) { - const ctx = new PipelineContext(); - ctx.buildStart = performance.now(); - ctx.opts = opts; +// ── Setup helpers ─────────────────────────────────────────────────────── - // ── Setup (creates DB, loads config, selects engine) ────────────── - ctx.rootDir = path.resolve(rootDir); - ctx.dbPath = path.join(ctx.rootDir, '.codegraph', 'graph.db'); - ctx.db = openDb(ctx.dbPath); - try { - initSchema(ctx.db); - - ctx.config = loadConfig(ctx.rootDir); - ctx.incremental = - opts.incremental !== false && ctx.config.build && ctx.config.build.incremental !== false; - - ctx.engineOpts = { - engine: opts.engine || 'auto', - dataflow: opts.dataflow !== false, - ast: opts.ast !== false, - }; - const { name: engineName, version: engineVersion } = getActiveEngine(ctx.engineOpts); - ctx.engineName = engineName; - ctx.engineVersion = engineVersion; - info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`); - - // Engine/schema mismatch detection - ctx.schemaVersion = MIGRATIONS[MIGRATIONS.length - 1].version; - ctx.forceFullRebuild = false; - if (ctx.incremental) { - const prevEngine = getBuildMeta(ctx.db, 'engine'); - if (prevEngine && prevEngine !== engineName) { - info(`Engine changed (${prevEngine} → ${engineName}), promoting to full rebuild.`); - ctx.forceFullRebuild = true; - } - const prevSchema = getBuildMeta(ctx.db, 'schema_version'); - if (prevSchema && Number(prevSchema) !== ctx.schemaVersion) { - info( - `Schema version changed (${prevSchema} → ${ctx.schemaVersion}), promoting to full rebuild.`, - ); - ctx.forceFullRebuild = true; - } - } +function initializeEngine(ctx) { + ctx.engineOpts = { + engine: ctx.opts.engine || 'auto', + dataflow: ctx.opts.dataflow !== false, + ast: ctx.opts.ast !== false, + }; + const { name: engineName, version: engineVersion } = getActiveEngine(ctx.engineOpts); + ctx.engineName = engineName; + ctx.engineVersion = engineVersion; + info(`Using ${engineName} engine${engineVersion ? ` (v${engineVersion})` : ''}`); +} - // Path aliases - ctx.aliases = loadPathAliases(ctx.rootDir); - if (ctx.config.aliases) { - for (const [key, value] of Object.entries(ctx.config.aliases)) { - const pattern = key.endsWith('/') ? `${key}*` : key; - const target = path.resolve(ctx.rootDir, value); - ctx.aliases.paths[pattern] = [target.endsWith('/') ? `${target}*` : `${target}/*`]; - } - } - if (ctx.aliases.baseUrl || Object.keys(ctx.aliases.paths).length > 0) { - info( - `Loaded path aliases: baseUrl=${ctx.aliases.baseUrl || 'none'}, ${Object.keys(ctx.aliases.paths).length} path mappings`, - ); +function checkEngineSchemaMismatch(ctx) { + ctx.schemaVersion = MIGRATIONS[MIGRATIONS.length - 1].version; + ctx.forceFullRebuild = false; + if (!ctx.incremental) return; + + const prevEngine = getBuildMeta(ctx.db, 'engine'); + if (prevEngine && prevEngine !== ctx.engineName) { + info(`Engine changed (${prevEngine} → ${ctx.engineName}), promoting to full rebuild.`); + ctx.forceFullRebuild = true; + } + const prevSchema = getBuildMeta(ctx.db, 'schema_version'); + if (prevSchema && Number(prevSchema) !== ctx.schemaVersion) { + info( + `Schema version changed (${prevSchema} → ${ctx.schemaVersion}), promoting to full rebuild.`, + ); + ctx.forceFullRebuild = true; + } +} + +function loadAliases(ctx) { + ctx.aliases = loadPathAliases(ctx.rootDir); + if (ctx.config.aliases) { + for (const [key, value] of Object.entries(ctx.config.aliases)) { + const pattern = key.endsWith('/') ? `${key}*` : key; + const target = path.resolve(ctx.rootDir, value); + ctx.aliases.paths[pattern] = [target.endsWith('/') ? `${target}*` : `${target}/*`]; } + } + if (ctx.aliases.baseUrl || Object.keys(ctx.aliases.paths).length > 0) { + info( + `Loaded path aliases: baseUrl=${ctx.aliases.baseUrl || 'none'}, ${Object.keys(ctx.aliases.paths).length} path mappings`, + ); + } +} - ctx.timing.setupMs = performance.now() - ctx.buildStart; +function setupPipeline(ctx) { + ctx.rootDir = path.resolve(ctx.rootDir); + ctx.dbPath = path.join(ctx.rootDir, '.codegraph', 'graph.db'); + ctx.db = openDb(ctx.dbPath); + initSchema(ctx.db); - // ── Pipeline stages ───────────────────────────────────────────── - await collectFiles(ctx); - await detectChanges(ctx); + ctx.config = loadConfig(ctx.rootDir); + ctx.incremental = + ctx.opts.incremental !== false && ctx.config.build && ctx.config.build.incremental !== false; - if (ctx.earlyExit) return; + initializeEngine(ctx); + checkEngineSchemaMismatch(ctx); + loadAliases(ctx); - await parseFiles(ctx); - await insertNodes(ctx); - await resolveImports(ctx); - await buildEdges(ctx); - await buildStructure(ctx); - await runAnalyses(ctx); - await finalize(ctx); - } catch (err) { - if (!ctx.earlyExit) closeDb(ctx.db); - throw err; - } + ctx.timing.setupMs = performance.now() - ctx.buildStart; +} +function formatTimingResult(ctx) { return { phases: { setupMs: +ctx.timing.setupMs.toFixed(1), @@ -128,3 +107,50 @@ export async function buildGraph(rootDir, opts = {}) { }, }; } + +// ── Pipeline stages execution ─────────────────────────────────────────── + +async function runPipelineStages(ctx) { + await collectFiles(ctx); + await detectChanges(ctx); + + if (ctx.earlyExit) return; + + await parseFiles(ctx); + await insertNodes(ctx); + await resolveImports(ctx); + await buildEdges(ctx); + await buildStructure(ctx); + await runAnalyses(ctx); + await finalize(ctx); +} + +// ── Main entry point ──────────────────────────────────────────────────── + +/** + * Build the dependency graph for a codebase. + * + * Signature and return value are identical to the original monolithic buildGraph(). + * + * @param {string} rootDir - Root directory to scan + * @param {object} [opts] - Build options + * @returns {Promise<{ phases: object } | undefined>} + */ +export async function buildGraph(rootDir, opts = {}) { + const ctx = new PipelineContext(); + ctx.buildStart = performance.now(); + ctx.opts = opts; + ctx.rootDir = rootDir; + + try { + setupPipeline(ctx); + await runPipelineStages(ctx); + } catch (err) { + if (!ctx.earlyExit) closeDb(ctx.db); + throw err; + } + + if (ctx.earlyExit) return; + + return formatTimingResult(ctx); +} diff --git a/src/domain/graph/builder/stages/build-edges.js b/src/domain/graph/builder/stages/build-edges.js index a1529454..f830ed1c 100644 --- a/src/domain/graph/builder/stages/build-edges.js +++ b/src/domain/graph/builder/stages/build-edges.js @@ -12,25 +12,18 @@ import { computeConfidence } from '../../resolve.js'; import { BUILTIN_RECEIVERS, batchInsertEdges } from '../helpers.js'; import { getResolved, isBarrelFile, resolveBarrelExport } from './resolve-imports.js'; -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function buildEdges(ctx) { - const { db, fileSymbols, barrelOnlyFiles, rootDir, engineName } = ctx; +// ── Node lookup setup ─────────────────────────────────────────────────── - const getNodeIdStmt = { +function makeGetNodeIdStmt(db) { + return { get: (name, kind, file, line) => { const id = getNodeId(db, name, kind, file, line); return id != null ? { id } : undefined; }, }; +} - // Pre-load all nodes into lookup maps - const allNodes = db - .prepare( - `SELECT id, name, kind, file, line FROM nodes WHERE kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`, - ) - .all(); +function setupNodeLookups(ctx, allNodes) { ctx.nodesByName = new Map(); for (const node of allNodes) { if (!ctx.nodesByName.has(node.name)) ctx.nodesByName.set(node.name, []); @@ -42,253 +35,339 @@ export async function buildEdges(ctx) { if (!ctx.nodesByNameAndFile.has(key)) ctx.nodesByNameAndFile.set(key, []); ctx.nodesByNameAndFile.get(key).push(node); } +} - const t0 = performance.now(); - const buildEdgesTx = db.transaction(() => { - const allEdgeRows = []; +// ── Import edges ──────────────────────────────────────────────────────── - // ── Import edges ──────────────────────────────────────────────── - for (const [relPath, symbols] of fileSymbols) { - if (barrelOnlyFiles.has(relPath)) continue; - const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); - if (!fileNodeRow) continue; - const fileNodeId = fileNodeRow.id; - - for (const imp of symbols.imports) { - const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); - const targetRow = getNodeIdStmt.get(resolvedPath, 'file', resolvedPath, 0); - if (targetRow) { - const edgeKind = imp.reexport - ? 'reexports' - : imp.typeOnly - ? 'imports-type' - : imp.dynamicImport - ? 'dynamic-imports' - : 'imports'; - allEdgeRows.push([fileNodeId, targetRow.id, edgeKind, 1.0, 0]); - - if (!imp.reexport && isBarrelFile(ctx, resolvedPath)) { - const resolvedSources = new Set(); - for (const name of imp.names) { - const cleanName = name.replace(/^\*\s+as\s+/, ''); - const actualSource = resolveBarrelExport(ctx, resolvedPath, cleanName); - if ( - actualSource && - actualSource !== resolvedPath && - !resolvedSources.has(actualSource) - ) { - resolvedSources.add(actualSource); - const actualRow = getNodeIdStmt.get(actualSource, 'file', actualSource, 0); - if (actualRow) { - allEdgeRows.push([ - fileNodeId, - actualRow.id, - edgeKind === 'imports-type' - ? 'imports-type' - : edgeKind === 'dynamic-imports' - ? 'dynamic-imports' - : 'imports', - 0.9, - 0, - ]); - } - } - } - } - } +function buildImportEdges(ctx, getNodeIdStmt, allEdgeRows) { + const { fileSymbols, barrelOnlyFiles, rootDir } = ctx; + + for (const [relPath, symbols] of fileSymbols) { + if (barrelOnlyFiles.has(relPath)) continue; + const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); + if (!fileNodeRow) continue; + const fileNodeId = fileNodeRow.id; + + for (const imp of symbols.imports) { + const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); + const targetRow = getNodeIdStmt.get(resolvedPath, 'file', resolvedPath, 0); + if (!targetRow) continue; + + const edgeKind = imp.reexport + ? 'reexports' + : imp.typeOnly + ? 'imports-type' + : imp.dynamicImport + ? 'dynamic-imports' + : 'imports'; + allEdgeRows.push([fileNodeId, targetRow.id, edgeKind, 1.0, 0]); + + if (!imp.reexport && isBarrelFile(ctx, resolvedPath)) { + buildBarrelEdges(ctx, imp, resolvedPath, fileNodeId, edgeKind, getNodeIdStmt, allEdgeRows); } } + } +} - // ── Call/receiver/extends/implements edges ─────────────────────── - const native = engineName === 'native' ? loadNative() : null; - if (native?.buildCallEdges) { - const nativeFiles = []; - for (const [relPath, symbols] of fileSymbols) { - if (barrelOnlyFiles.has(relPath)) continue; - const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); - if (!fileNodeRow) continue; - - const importedNames = []; - for (const imp of symbols.imports) { - const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); - for (const name of imp.names) { - const cleanName = name.replace(/^\*\s+as\s+/, ''); - let targetFile = resolvedPath; - if (isBarrelFile(ctx, resolvedPath)) { - const actual = resolveBarrelExport(ctx, resolvedPath, cleanName); - if (actual) targetFile = actual; - } - importedNames.push({ name: cleanName, file: targetFile }); +function buildBarrelEdges(ctx, imp, resolvedPath, fileNodeId, edgeKind, getNodeIdStmt, edgeRows) { + const resolvedSources = new Set(); + for (const name of imp.names) { + const cleanName = name.replace(/^\*\s+as\s+/, ''); + const actualSource = resolveBarrelExport(ctx, resolvedPath, cleanName); + if (actualSource && actualSource !== resolvedPath && !resolvedSources.has(actualSource)) { + resolvedSources.add(actualSource); + const actualRow = getNodeIdStmt.get(actualSource, 'file', actualSource, 0); + if (actualRow) { + const kind = + edgeKind === 'imports-type' + ? 'imports-type' + : edgeKind === 'dynamic-imports' + ? 'dynamic-imports' + : 'imports'; + edgeRows.push([fileNodeId, actualRow.id, kind, 0.9, 0]); + } + } + } +} + +// ── Call edges (native engine) ────────────────────────────────────────── + +function buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native) { + const { fileSymbols, barrelOnlyFiles, rootDir } = ctx; + const nativeFiles = []; + + for (const [relPath, symbols] of fileSymbols) { + if (barrelOnlyFiles.has(relPath)) continue; + const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); + if (!fileNodeRow) continue; + + const importedNames = buildImportedNamesForNative(ctx, relPath, symbols, rootDir); + nativeFiles.push({ + file: relPath, + fileNodeId: fileNodeRow.id, + definitions: symbols.definitions.map((d) => ({ + name: d.name, + kind: d.kind, + line: d.line, + endLine: d.endLine ?? null, + })), + calls: symbols.calls, + importedNames, + classes: symbols.classes, + }); + } + + const nativeEdges = native.buildCallEdges(nativeFiles, allNodes, [...BUILTIN_RECEIVERS]); + for (const e of nativeEdges) { + allEdgeRows.push([e.sourceId, e.targetId, e.kind, e.confidence, e.dynamic]); + } +} + +function buildImportedNamesForNative(ctx, relPath, symbols, rootDir) { + const importedNames = []; + for (const imp of symbols.imports) { + const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); + for (const name of imp.names) { + const cleanName = name.replace(/^\*\s+as\s+/, ''); + let targetFile = resolvedPath; + if (isBarrelFile(ctx, resolvedPath)) { + const actual = resolveBarrelExport(ctx, resolvedPath, cleanName); + if (actual) targetFile = actual; + } + importedNames.push({ name: cleanName, file: targetFile }); + } + } + return importedNames; +} + +// ── Call edges (JS fallback) ──────────────────────────────────────────── + +function buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows) { + const { fileSymbols, barrelOnlyFiles, rootDir } = ctx; + + for (const [relPath, symbols] of fileSymbols) { + if (barrelOnlyFiles.has(relPath)) continue; + const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); + if (!fileNodeRow) continue; + + const importedNames = buildImportedNamesMap(ctx, relPath, symbols, rootDir); + const seenCallEdges = new Set(); + + buildFileCallEdges( + ctx, + relPath, + symbols, + fileNodeRow, + importedNames, + seenCallEdges, + getNodeIdStmt, + allEdgeRows, + ); + buildClassHierarchyEdges(ctx, relPath, symbols, allEdgeRows); + } +} + +function buildImportedNamesMap(ctx, relPath, symbols, rootDir) { + const importedNames = new Map(); + for (const imp of symbols.imports) { + const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); + for (const name of imp.names) { + importedNames.set(name.replace(/^\*\s+as\s+/, ''), resolvedPath); + } + } + return importedNames; +} + +function findCaller(call, definitions, relPath, getNodeIdStmt, fileNodeRow) { + let caller = null; + let callerSpan = Infinity; + for (const def of definitions) { + if (def.line <= call.line) { + const end = def.endLine || Infinity; + if (call.line <= end) { + const span = end - def.line; + if (span < callerSpan) { + const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line); + if (row) { + caller = row; + callerSpan = span; } } + } else if (!caller) { + const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line); + if (row) caller = row; + } + } + } + return caller || fileNodeRow; +} - nativeFiles.push({ - file: relPath, - fileNodeId: fileNodeRow.id, - definitions: symbols.definitions.map((d) => ({ - name: d.name, - kind: d.kind, - line: d.line, - endLine: d.endLine ?? null, - })), - calls: symbols.calls, - importedNames, - classes: symbols.classes, - }); +function resolveCallTargets(ctx, call, relPath, importedNames) { + const importedFrom = importedNames.get(call.name); + let targets; + + if (importedFrom) { + targets = ctx.nodesByNameAndFile.get(`${call.name}|${importedFrom}`) || []; + if (targets.length === 0 && isBarrelFile(ctx, importedFrom)) { + const actualSource = resolveBarrelExport(ctx, importedFrom, call.name); + if (actualSource) { + targets = ctx.nodesByNameAndFile.get(`${call.name}|${actualSource}`) || []; } + } + } - const nativeEdges = native.buildCallEdges(nativeFiles, allNodes, [...BUILTIN_RECEIVERS]); - for (const e of nativeEdges) { - allEdgeRows.push([e.sourceId, e.targetId, e.kind, e.confidence, e.dynamic]); + if (!targets || targets.length === 0) { + targets = ctx.nodesByNameAndFile.get(`${call.name}|${relPath}`) || []; + if (targets.length === 0) { + targets = resolveByMethodOrGlobal(ctx, call, relPath); + } + } + + if (targets.length > 1) { + targets.sort((a, b) => { + const confA = computeConfidence(relPath, a.file, importedFrom); + const confB = computeConfidence(relPath, b.file, importedFrom); + return confB - confA; + }); + } + + return { targets, importedFrom }; +} + +function resolveByMethodOrGlobal(ctx, call, relPath) { + const methodCandidates = (ctx.nodesByName.get(call.name) || []).filter( + (n) => n.name.endsWith(`.${call.name}`) && n.kind === 'method', + ); + if (methodCandidates.length > 0) return methodCandidates; + + if ( + !call.receiver || + call.receiver === 'this' || + call.receiver === 'self' || + call.receiver === 'super' + ) { + return (ctx.nodesByName.get(call.name) || []).filter( + (n) => computeConfidence(relPath, n.file, null) >= 0.5, + ); + } + return []; +} + +function buildFileCallEdges( + ctx, + relPath, + symbols, + fileNodeRow, + importedNames, + seenCallEdges, + getNodeIdStmt, + allEdgeRows, +) { + for (const call of symbols.calls) { + if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; + + const caller = findCaller(call, symbols.definitions, relPath, getNodeIdStmt, fileNodeRow); + const isDynamic = call.dynamic ? 1 : 0; + const { targets, importedFrom } = resolveCallTargets(ctx, call, relPath, importedNames); + + for (const t of targets) { + const edgeKey = `${caller.id}|${t.id}`; + if (t.id !== caller.id && !seenCallEdges.has(edgeKey)) { + seenCallEdges.add(edgeKey); + const confidence = computeConfidence(relPath, t.file, importedFrom); + allEdgeRows.push([caller.id, t.id, 'calls', confidence, isDynamic]); } - } else { - // JS fallback - for (const [relPath, symbols] of fileSymbols) { - if (barrelOnlyFiles.has(relPath)) continue; - const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); - if (!fileNodeRow) continue; - - const importedNames = new Map(); - for (const imp of symbols.imports) { - const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source); - for (const name of imp.names) { - const cleanName = name.replace(/^\*\s+as\s+/, ''); - importedNames.set(cleanName, resolvedPath); - } - } + } - const seenCallEdges = new Set(); - for (const call of symbols.calls) { - if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; - let caller = null; - let callerSpan = Infinity; - for (const def of symbols.definitions) { - if (def.line <= call.line) { - const end = def.endLine || Infinity; - if (call.line <= end) { - const span = end - def.line; - if (span < callerSpan) { - const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line); - if (row) { - caller = row; - callerSpan = span; - } - } - } else if (!caller) { - const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line); - if (row) caller = row; - } - } - } - if (!caller) caller = fileNodeRow; - - const isDynamic = call.dynamic ? 1 : 0; - let targets; - const importedFrom = importedNames.get(call.name); - - if (importedFrom) { - targets = ctx.nodesByNameAndFile.get(`${call.name}|${importedFrom}`) || []; - if (targets.length === 0 && isBarrelFile(ctx, importedFrom)) { - const actualSource = resolveBarrelExport(ctx, importedFrom, call.name); - if (actualSource) { - targets = ctx.nodesByNameAndFile.get(`${call.name}|${actualSource}`) || []; - } - } - } - if (!targets || targets.length === 0) { - targets = ctx.nodesByNameAndFile.get(`${call.name}|${relPath}`) || []; - if (targets.length === 0) { - const methodCandidates = (ctx.nodesByName.get(call.name) || []).filter( - (n) => n.name.endsWith(`.${call.name}`) && n.kind === 'method', - ); - if (methodCandidates.length > 0) { - targets = methodCandidates; - } else if ( - !call.receiver || - call.receiver === 'this' || - call.receiver === 'self' || - call.receiver === 'super' - ) { - targets = (ctx.nodesByName.get(call.name) || []).filter( - (n) => computeConfidence(relPath, n.file, null) >= 0.5, - ); - } - } - } + // Receiver edge + if ( + call.receiver && + !BUILTIN_RECEIVERS.has(call.receiver) && + call.receiver !== 'this' && + call.receiver !== 'self' && + call.receiver !== 'super' + ) { + buildReceiverEdge(ctx, call, caller, relPath, seenCallEdges, allEdgeRows); + } + } +} - if (targets.length > 1) { - targets.sort((a, b) => { - const confA = computeConfidence(relPath, a.file, importedFrom); - const confB = computeConfidence(relPath, b.file, importedFrom); - return confB - confA; - }); - } +function buildReceiverEdge(ctx, call, caller, relPath, seenCallEdges, allEdgeRows) { + const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']); + const samefile = ctx.nodesByNameAndFile.get(`${call.receiver}|${relPath}`) || []; + const candidates = samefile.length > 0 ? samefile : ctx.nodesByName.get(call.receiver) || []; + const receiverNodes = candidates.filter((n) => receiverKinds.has(n.kind)); + if (receiverNodes.length > 0 && caller) { + const recvTarget = receiverNodes[0]; + const recvKey = `recv|${caller.id}|${recvTarget.id}`; + if (!seenCallEdges.has(recvKey)) { + seenCallEdges.add(recvKey); + allEdgeRows.push([caller.id, recvTarget.id, 'receiver', 0.7, 0]); + } + } +} - for (const t of targets) { - const edgeKey = `${caller.id}|${t.id}`; - if (t.id !== caller.id && !seenCallEdges.has(edgeKey)) { - seenCallEdges.add(edgeKey); - const confidence = computeConfidence(relPath, t.file, importedFrom); - allEdgeRows.push([caller.id, t.id, 'calls', confidence, isDynamic]); - } - } +// ── Class hierarchy edges ─────────────────────────────────────────────── - // Receiver edge - if ( - call.receiver && - !BUILTIN_RECEIVERS.has(call.receiver) && - call.receiver !== 'this' && - call.receiver !== 'self' && - call.receiver !== 'super' - ) { - const receiverKinds = new Set(['class', 'struct', 'interface', 'type', 'module']); - const samefile = ctx.nodesByNameAndFile.get(`${call.receiver}|${relPath}`) || []; - const candidates = - samefile.length > 0 ? samefile : ctx.nodesByName.get(call.receiver) || []; - const receiverNodes = candidates.filter((n) => receiverKinds.has(n.kind)); - if (receiverNodes.length > 0 && caller) { - const recvTarget = receiverNodes[0]; - const recvKey = `recv|${caller.id}|${recvTarget.id}`; - if (!seenCallEdges.has(recvKey)) { - seenCallEdges.add(recvKey); - allEdgeRows.push([caller.id, recvTarget.id, 'receiver', 0.7, 0]); - } - } - } +function buildClassHierarchyEdges(ctx, relPath, symbols, allEdgeRows) { + for (const cls of symbols.classes) { + if (cls.extends) { + const sourceRow = (ctx.nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find( + (n) => n.kind === 'class', + ); + const targetRows = (ctx.nodesByName.get(cls.extends) || []).filter((n) => n.kind === 'class'); + if (sourceRow) { + for (const t of targetRows) { + allEdgeRows.push([sourceRow.id, t.id, 'extends', 1.0, 0]); } + } + } - // Class extends edges - for (const cls of symbols.classes) { - if (cls.extends) { - const sourceRow = (ctx.nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find( - (n) => n.kind === 'class', - ); - const targetCandidates = ctx.nodesByName.get(cls.extends) || []; - const targetRows = targetCandidates.filter((n) => n.kind === 'class'); - if (sourceRow) { - for (const t of targetRows) { - allEdgeRows.push([sourceRow.id, t.id, 'extends', 1.0, 0]); - } - } - } - - if (cls.implements) { - const sourceRow = (ctx.nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find( - (n) => n.kind === 'class', - ); - const targetCandidates = ctx.nodesByName.get(cls.implements) || []; - const targetRows = targetCandidates.filter( - (n) => n.kind === 'interface' || n.kind === 'class', - ); - if (sourceRow) { - for (const t of targetRows) { - allEdgeRows.push([sourceRow.id, t.id, 'implements', 1.0, 0]); - } - } - } + if (cls.implements) { + const sourceRow = (ctx.nodesByNameAndFile.get(`${cls.name}|${relPath}`) || []).find( + (n) => n.kind === 'class', + ); + const targetRows = (ctx.nodesByName.get(cls.implements) || []).filter( + (n) => n.kind === 'interface' || n.kind === 'class', + ); + if (sourceRow) { + for (const t of targetRows) { + allEdgeRows.push([sourceRow.id, t.id, 'implements', 1.0, 0]); } } } + } +} + +// ── Main entry point ──────────────────────────────────────────────────── + +/** + * @param {import('../context.js').PipelineContext} ctx + */ +export async function buildEdges(ctx) { + const { db, engineName } = ctx; + + const getNodeIdStmt = makeGetNodeIdStmt(db); + + const allNodes = db + .prepare( + `SELECT id, name, kind, file, line FROM nodes WHERE kind IN ('function','method','class','interface','struct','type','module','enum','trait')`, + ) + .all(); + setupNodeLookups(ctx, allNodes); + + const t0 = performance.now(); + const buildEdgesTx = db.transaction(() => { + const allEdgeRows = []; + + buildImportEdges(ctx, getNodeIdStmt, allEdgeRows); + + const native = engineName === 'native' ? loadNative() : null; + if (native?.buildCallEdges) { + buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodes, native); + } else { + buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows); + } batchInsertEdges(db, allEdgeRows); }); diff --git a/src/domain/graph/builder/stages/detect-changes.js b/src/domain/graph/builder/stages/detect-changes.js index 50ffbd1d..23d15245 100644 --- a/src/domain/graph/builder/stages/detect-changes.js +++ b/src/domain/graph/builder/stages/detect-changes.js @@ -13,12 +13,13 @@ import { parseFilesAuto } from '../../../parser.js'; import { readJournal, writeJournalHeader } from '../../journal.js'; import { fileHash, fileStat, purgeFilesFromGraph, readFileSafe } from '../helpers.js'; +// ── Three-tier change detection ───────────────────────────────────────── + /** * Determine which files have changed since last build. - * Three-tier cascade: - * Tier 0 — Journal: O(changed) when watcher was running - * Tier 1 — mtime+size: O(n) stats, O(changed) reads - * Tier 2 — Hash comparison: O(changed) reads (fallback from Tier 1) + * Tier 0 — Journal: O(changed) when watcher was running + * Tier 1 — mtime+size: O(n) stats, O(changed) reads + * Tier 2 — Hash comparison: O(changed) reads (fallback from Tier 1) */ function getChangedFiles(db, allFiles, rootDir) { let hasTable = false; @@ -44,6 +45,17 @@ function getChangedFiles(db, allFiles, rootDir) { .map((r) => [r.file, r]), ); + const removed = detectRemovedFiles(existing, allFiles, rootDir); + + // Tier 0: Journal + const journalResult = tryJournalTier(db, existing, rootDir, removed); + if (journalResult) return journalResult; + + // Tier 1 + 2: mtime/size fast-path → hash comparison + return mtimeAndHashTiers(existing, allFiles, rootDir, removed); +} + +function detectRemovedFiles(existing, allFiles, rootDir) { const currentFiles = new Set(); for (const file of allFiles) { currentFiles.add(normalizePath(path.relative(rootDir, file))); @@ -55,51 +67,57 @@ function getChangedFiles(db, allFiles, rootDir) { removed.push(existingFile); } } + return removed; +} - // ── Tier 0: Journal ────────────────────────────────────────────── +function tryJournalTier(db, existing, rootDir, removed) { const journal = readJournal(rootDir); - if (journal.valid) { - const dbMtimes = db.prepare('SELECT MAX(mtime) as latest FROM file_hashes').get(); - const latestDbMtime = dbMtimes?.latest || 0; - const hasJournalEntries = journal.changed.length > 0 || journal.removed.length > 0; - - if (hasJournalEntries && journal.timestamp >= latestDbMtime) { - debug( - `Tier 0: journal valid, ${journal.changed.length} changed, ${journal.removed.length} removed`, - ); - const changed = []; - - for (const relPath of journal.changed) { - const absPath = path.join(rootDir, relPath); - const stat = fileStat(absPath); - if (!stat) continue; - - let content; - try { - content = readFileSafe(absPath); - } catch { - continue; - } - const hash = fileHash(content); - const record = existing.get(relPath); - if (!record || record.hash !== hash) { - changed.push({ file: absPath, content, hash, relPath, stat }); - } - } + if (!journal.valid) return null; - const removedSet = new Set(removed); - for (const relPath of journal.removed) { - if (existing.has(relPath)) removedSet.add(relPath); - } + const dbMtimes = db.prepare('SELECT MAX(mtime) as latest FROM file_hashes').get(); + const latestDbMtime = dbMtimes?.latest || 0; + const hasJournalEntries = journal.changed.length > 0 || journal.removed.length > 0; - return { changed, removed: [...removedSet], isFullBuild: false }; - } + if (!hasJournalEntries || journal.timestamp < latestDbMtime) { debug( `Tier 0: skipped (${hasJournalEntries ? 'timestamp stale' : 'no entries'}), falling to Tier 1`, ); + return null; } - // ── Tier 1: mtime+size fast-path ───────────────────────────────── + debug( + `Tier 0: journal valid, ${journal.changed.length} changed, ${journal.removed.length} removed`, + ); + const changed = []; + + for (const relPath of journal.changed) { + const absPath = path.join(rootDir, relPath); + const stat = fileStat(absPath); + if (!stat) continue; + + let content; + try { + content = readFileSafe(absPath); + } catch { + continue; + } + const hash = fileHash(content); + const record = existing.get(relPath); + if (!record || record.hash !== hash) { + changed.push({ file: absPath, content, hash, relPath, stat }); + } + } + + const removedSet = new Set(removed); + for (const relPath of journal.removed) { + if (existing.has(relPath)) removedSet.add(relPath); + } + + return { changed, removed: [...removedSet], isFullBuild: false }; +} + +function mtimeAndHashTiers(existing, allFiles, rootDir, removed) { + // Tier 1: mtime+size fast-path const needsHash = []; const skipped = []; @@ -130,7 +148,7 @@ function getChangedFiles(db, allFiles, rootDir) { debug(`Tier 1: ${skipped.length} skipped by mtime+size, ${needsHash.length} need hash check`); } - // ── Tier 2: Hash comparison ────────────────────────────────────── + // Tier 2: Hash comparison const changed = []; for (const item of needsHash) { @@ -168,9 +186,10 @@ function getChangedFiles(db, allFiles, rootDir) { return { changed, removed, isFullBuild: false }; } +// ── Pending analysis ──────────────────────────────────────────────────── + /** * Run pending analysis pass when no file changes but analysis tables are empty. - * @returns {boolean} true if analysis was run and we should early-exit */ async function runPendingAnalysis(ctx) { const { db, opts, engineOpts, allFiles, rootDir } = ctx; @@ -213,9 +232,8 @@ async function runPendingAnalysis(ctx) { return true; } -/** - * Self-heal metadata-only updates (mtime/size) without re-parsing. - */ +// ── Metadata self-heal ────────────────────────────────────────────────── + function healMetadata(ctx) { const { db, metadataUpdates } = ctx; if (!metadataUpdates || metadataUpdates.length === 0) return; @@ -237,104 +255,91 @@ function healMetadata(ctx) { } } -/** - * @param {import('../context.js').PipelineContext} ctx - */ -export async function detectChanges(ctx) { - const { db, allFiles, rootDir, incremental, forceFullRebuild, opts } = ctx; - - // Scoped builds already set parseChanges in collectFiles. - // Still need to purge removed files and set hasEmbeddings. - if (opts.scope) { - let hasEmbeddings = false; - try { - db.prepare('SELECT 1 FROM embeddings LIMIT 1').get(); - hasEmbeddings = true; - } catch { - /* table doesn't exist */ - } - ctx.hasEmbeddings = hasEmbeddings; +// ── Reverse-dependency cascade ────────────────────────────────────────── - // Reverse-dependency cascade BEFORE purging (needs existing edges to find importers) - const changePaths = ctx.parseChanges.map( - (item) => item.relPath || normalizePath(path.relative(rootDir, item.file)), - ); - const reverseDeps = new Set(); - if (!opts.noReverseDeps) { - const changedRelPaths = new Set([...changePaths, ...ctx.removed]); - if (changedRelPaths.size > 0) { - const findReverseDeps = db.prepare(` - SELECT DISTINCT n_src.file FROM edges e - JOIN nodes n_src ON e.source_id = n_src.id - JOIN nodes n_tgt ON e.target_id = n_tgt.id - WHERE n_tgt.file = ? AND n_src.file != n_tgt.file AND n_src.kind != 'directory' - `); - for (const relPath of changedRelPaths) { - for (const row of findReverseDeps.all(relPath)) { - if (!changedRelPaths.has(row.file) && !reverseDeps.has(row.file)) { - const absPath = path.join(rootDir, row.file); - if (fs.existsSync(absPath)) { - reverseDeps.add(row.file); - } - } - } +function findReverseDependencies(db, changedRelPaths, rootDir) { + const reverseDeps = new Set(); + if (changedRelPaths.size === 0) return reverseDeps; + + const findReverseDepsStmt = db.prepare(` + SELECT DISTINCT n_src.file FROM edges e + JOIN nodes n_src ON e.source_id = n_src.id + JOIN nodes n_tgt ON e.target_id = n_tgt.id + WHERE n_tgt.file = ? AND n_src.file != n_tgt.file AND n_src.kind != 'directory' + `); + for (const relPath of changedRelPaths) { + for (const row of findReverseDepsStmt.all(relPath)) { + if (!changedRelPaths.has(row.file) && !reverseDeps.has(row.file)) { + const absPath = path.join(rootDir, row.file); + if (fs.existsSync(absPath)) { + reverseDeps.add(row.file); } } } + } + return reverseDeps; +} - // Now purge changed + removed files - if (changePaths.length > 0 || ctx.removed.length > 0) { - purgeFilesFromGraph(db, [...ctx.removed, ...changePaths], { purgeHashes: false }); - } +function purgeAndAddReverseDeps(ctx, changePaths, reverseDeps) { + const { db, rootDir } = ctx; - // Delete outgoing edges for reverse-dep files and add to parse list - if (reverseDeps.size > 0) { - const deleteOutgoingEdgesForFile = db.prepare( - 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', - ); - for (const relPath of reverseDeps) { - deleteOutgoingEdgesForFile.run(relPath); - } - for (const relPath of reverseDeps) { - const absPath = path.join(rootDir, relPath); - ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true }); - } - info( - `Scoped rebuild: ${changePaths.length} changed, ${ctx.removed.length} removed, ${reverseDeps.size} reverse-deps`, - ); + if (changePaths.length > 0 || ctx.removed.length > 0) { + purgeFilesFromGraph(db, [...ctx.removed, ...changePaths], { purgeHashes: false }); + } + + if (reverseDeps.size > 0) { + const deleteOutgoingEdgesForFile = db.prepare( + 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', + ); + for (const relPath of reverseDeps) { + deleteOutgoingEdgesForFile.run(relPath); + } + for (const relPath of reverseDeps) { + const absPath = path.join(rootDir, relPath); + ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true }); } - return; } +} - const increResult = - incremental && !forceFullRebuild - ? getChangedFiles(db, allFiles, rootDir) - : { changed: allFiles.map((f) => ({ file: f })), removed: [], isFullBuild: true }; +// ── Scoped build path ─────────────────────────────────────────────────── - ctx.removed = increResult.removed; - ctx.isFullBuild = increResult.isFullBuild; - ctx.parseChanges = increResult.changed.filter((c) => !c.metadataOnly); - ctx.metadataUpdates = increResult.changed.filter((c) => c.metadataOnly); +function handleScopedBuild(ctx) { + const { db, rootDir, opts } = ctx; - // Early exit: no changes detected - if (!ctx.isFullBuild && ctx.parseChanges.length === 0 && ctx.removed.length === 0) { - const ranAnalysis = await runPendingAnalysis(ctx); - if (ranAnalysis) { - closeDb(db); - writeJournalHeader(rootDir, Date.now()); - ctx.earlyExit = true; - return; - } + let hasEmbeddings = false; + try { + db.prepare('SELECT 1 FROM embeddings LIMIT 1').get(); + hasEmbeddings = true; + } catch { + /* table doesn't exist */ + } + ctx.hasEmbeddings = hasEmbeddings; - healMetadata(ctx); - info('No changes detected. Graph is up to date.'); - closeDb(db); - writeJournalHeader(rootDir, Date.now()); - ctx.earlyExit = true; - return; + const changePaths = ctx.parseChanges.map( + (item) => item.relPath || normalizePath(path.relative(rootDir, item.file)), + ); + + let reverseDeps = new Set(); + if (!opts.noReverseDeps) { + const changedRelPaths = new Set([...changePaths, ...ctx.removed]); + reverseDeps = findReverseDependencies(db, changedRelPaths, rootDir); + } + + // Purge changed + removed files, then add reverse-deps + purgeAndAddReverseDeps(ctx, changePaths, reverseDeps); + + if (reverseDeps.size > 0) { + info( + `Scoped rebuild: ${changePaths.length} changed, ${ctx.removed.length} removed, ${reverseDeps.size} reverse-deps`, + ); } +} + +// ── Full/incremental build path ───────────────────────────────────────── + +function handleFullBuild(ctx) { + const { db } = ctx; - // ── Full build: truncate all tables ────────────────────────────── let hasEmbeddings = false; try { db.prepare('SELECT 1 FROM embeddings LIMIT 1').get(); @@ -344,19 +349,28 @@ export async function detectChanges(ctx) { } ctx.hasEmbeddings = hasEmbeddings; - if (ctx.isFullBuild) { - const deletions = - 'PRAGMA foreign_keys = OFF; DELETE FROM cfg_edges; DELETE FROM cfg_blocks; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM ast_nodes; DELETE FROM nodes; PRAGMA foreign_keys = ON;'; - db.exec( - hasEmbeddings - ? `${deletions.replace('PRAGMA foreign_keys = ON;', '')} DELETE FROM embeddings; PRAGMA foreign_keys = ON;` - : deletions, - ); - return; + const deletions = + 'PRAGMA foreign_keys = OFF; DELETE FROM cfg_edges; DELETE FROM cfg_blocks; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM function_complexity; DELETE FROM dataflow; DELETE FROM ast_nodes; DELETE FROM nodes; PRAGMA foreign_keys = ON;'; + db.exec( + hasEmbeddings + ? `${deletions.replace('PRAGMA foreign_keys = ON;', '')} DELETE FROM embeddings; PRAGMA foreign_keys = ON;` + : deletions, + ); +} + +function handleIncrementalBuild(ctx) { + const { db, rootDir, opts } = ctx; + + let hasEmbeddings = false; + try { + db.prepare('SELECT 1 FROM embeddings LIMIT 1').get(); + hasEmbeddings = true; + } catch { + /* table doesn't exist */ } + ctx.hasEmbeddings = hasEmbeddings; - // ── Reverse-dependency cascade (incremental) ───────────────────── - const reverseDeps = new Set(); + let reverseDeps = new Set(); if (!opts.noReverseDeps) { const changedRelPaths = new Set(); for (const item of ctx.parseChanges) { @@ -365,25 +379,7 @@ export async function detectChanges(ctx) { for (const relPath of ctx.removed) { changedRelPaths.add(relPath); } - - if (changedRelPaths.size > 0) { - const findReverseDeps = db.prepare(` - SELECT DISTINCT n_src.file FROM edges e - JOIN nodes n_src ON e.source_id = n_src.id - JOIN nodes n_tgt ON e.target_id = n_tgt.id - WHERE n_tgt.file = ? AND n_src.file != n_tgt.file AND n_src.kind != 'directory' - `); - for (const relPath of changedRelPaths) { - for (const row of findReverseDeps.all(relPath)) { - if (!changedRelPaths.has(row.file) && !reverseDeps.has(row.file)) { - const absPath = path.join(rootDir, row.file); - if (fs.existsSync(absPath)) { - reverseDeps.add(row.file); - } - } - } - } - } + reverseDeps = findReverseDependencies(db, changedRelPaths, rootDir); } info( @@ -393,21 +389,57 @@ export async function detectChanges(ctx) { debug(`Changed files: ${ctx.parseChanges.map((c) => c.relPath).join(', ')}`); if (ctx.removed.length > 0) debug(`Removed files: ${ctx.removed.join(', ')}`); - // Purge changed and removed files const changePaths = ctx.parseChanges.map( (item) => item.relPath || normalizePath(path.relative(rootDir, item.file)), ); - purgeFilesFromGraph(db, [...ctx.removed, ...changePaths], { purgeHashes: false }); + purgeAndAddReverseDeps(ctx, changePaths, reverseDeps); +} - // Delete outgoing edges for reverse-dep files, then add them to parse list - const deleteOutgoingEdgesForFile = db.prepare( - 'DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?)', - ); - for (const relPath of reverseDeps) { - deleteOutgoingEdgesForFile.run(relPath); +// ── Main entry point ──────────────────────────────────────────────────── + +/** + * @param {import('../context.js').PipelineContext} ctx + */ +export async function detectChanges(ctx) { + const { db, allFiles, rootDir, incremental, forceFullRebuild, opts } = ctx; + + // Scoped builds already set parseChanges in collectFiles + if (opts.scope) { + handleScopedBuild(ctx); + return; } - for (const relPath of reverseDeps) { - const absPath = path.join(rootDir, relPath); - ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true }); + + const increResult = + incremental && !forceFullRebuild + ? getChangedFiles(db, allFiles, rootDir) + : { changed: allFiles.map((f) => ({ file: f })), removed: [], isFullBuild: true }; + + ctx.removed = increResult.removed; + ctx.isFullBuild = increResult.isFullBuild; + ctx.parseChanges = increResult.changed.filter((c) => !c.metadataOnly); + ctx.metadataUpdates = increResult.changed.filter((c) => c.metadataOnly); + + // Early exit: no changes detected + if (!ctx.isFullBuild && ctx.parseChanges.length === 0 && ctx.removed.length === 0) { + const ranAnalysis = await runPendingAnalysis(ctx); + if (ranAnalysis) { + closeDb(db); + writeJournalHeader(rootDir, Date.now()); + ctx.earlyExit = true; + return; + } + + healMetadata(ctx); + info('No changes detected. Graph is up to date.'); + closeDb(db); + writeJournalHeader(rootDir, Date.now()); + ctx.earlyExit = true; + return; + } + + if (ctx.isFullBuild) { + handleFullBuild(ctx); + } else { + handleIncrementalBuild(ctx); } } diff --git a/src/domain/graph/builder/stages/insert-nodes.js b/src/domain/graph/builder/stages/insert-nodes.js index 2eaf6a73..6e22c966 100644 --- a/src/domain/graph/builder/stages/insert-nodes.js +++ b/src/domain/graph/builder/stages/insert-nodes.js @@ -15,23 +15,159 @@ import { readFileSafe, } from '../helpers.js'; +// ── Phase 1: Insert file nodes, definitions, exports ──────────────────── + +function insertDefinitionsAndExports(db, allSymbols) { + const phase1Rows = []; + for (const [relPath, symbols] of allSymbols) { + phase1Rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]); + for (const def of symbols.definitions) { + const dotIdx = def.name.lastIndexOf('.'); + const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null; + phase1Rows.push([ + def.name, + def.kind, + relPath, + def.line, + def.endLine || null, + null, + def.name, + scope, + def.visibility || null, + ]); + } + for (const exp of symbols.exports) { + phase1Rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]); + } + } + batchInsertNodes(db, phase1Rows); + + // Mark exported symbols + const markExported = db.prepare( + 'UPDATE nodes SET exported = 1 WHERE name = ? AND kind = ? AND file = ? AND line = ?', + ); + for (const [relPath, symbols] of allSymbols) { + for (const exp of symbols.exports) { + markExported.run(exp.name, exp.kind, relPath, exp.line); + } + } +} + +// ── Phase 2: Insert children (needs parent IDs) ──────────────────────── + +function insertChildren(db, allSymbols) { + const childRows = []; + for (const [relPath, symbols] of allSymbols) { + const nodeIdMap = new Map(); + for (const row of bulkNodeIdsByFile(db, relPath)) { + nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); + } + for (const def of symbols.definitions) { + if (!def.children?.length) continue; + const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`); + if (!defId) continue; + for (const child of def.children) { + const qualifiedName = `${def.name}.${child.name}`; + childRows.push([ + child.name, + child.kind, + relPath, + child.line, + child.endLine || null, + defId, + qualifiedName, + def.name, + child.visibility || null, + ]); + } + } + } + batchInsertNodes(db, childRows); +} + +// ── Phase 3: Insert containment + parameter_of edges ──────────────────── + +function insertContainmentEdges(db, allSymbols) { + const edgeRows = []; + for (const [relPath, symbols] of allSymbols) { + const nodeIdMap = new Map(); + for (const row of bulkNodeIdsByFile(db, relPath)) { + nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); + } + const fileId = nodeIdMap.get(`${relPath}|file|0`); + for (const def of symbols.definitions) { + const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`); + if (fileId && defId) { + edgeRows.push([fileId, defId, 'contains', 1.0, 0]); + } + if (def.children?.length && defId) { + for (const child of def.children) { + const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`); + if (childId) { + edgeRows.push([defId, childId, 'contains', 1.0, 0]); + if (child.kind === 'parameter') { + edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]); + } + } + } + } + } + } + batchInsertEdges(db, edgeRows); +} + +// ── Phase 4: Update file hashes ───────────────────────────────────────── + +function updateFileHashes(_db, allSymbols, precomputedData, metadataUpdates, rootDir, upsertHash) { + if (!upsertHash) return; + + for (const [relPath] of allSymbols) { + const precomputed = precomputedData.get(relPath); + if (precomputed?._reverseDepOnly) { + // no-op: file unchanged, hash already correct + } else if (precomputed?.hash) { + const stat = precomputed.stat || fileStat(path.join(rootDir, relPath)); + const mtime = stat ? Math.floor(stat.mtimeMs) : 0; + const size = stat ? stat.size : 0; + upsertHash.run(relPath, precomputed.hash, mtime, size); + } else { + const absPath = path.join(rootDir, relPath); + let code; + try { + code = readFileSafe(absPath); + } catch { + code = null; + } + if (code !== null) { + const stat = fileStat(absPath); + const mtime = stat ? Math.floor(stat.mtimeMs) : 0; + const size = stat ? stat.size : 0; + upsertHash.run(relPath, fileHash(code), mtime, size); + } + } + } + + // Also update metadata-only entries (self-heal mtime/size without re-parse) + for (const item of metadataUpdates) { + const mtime = item.stat ? Math.floor(item.stat.mtimeMs) : 0; + const size = item.stat ? item.stat.size : 0; + upsertHash.run(item.relPath, item.hash, mtime, size); + } +} + +// ── Main entry point ──────────────────────────────────────────────────── + /** * @param {import('../context.js').PipelineContext} ctx */ export async function insertNodes(ctx) { const { db, allSymbols, filesToParse, metadataUpdates, rootDir, removed } = ctx; - // Build lookup from incremental data (pre-computed hashes + stats) const precomputedData = new Map(); for (const item of filesToParse) { - if (item.relPath) { - precomputedData.set(item.relPath, item); - } + if (item.relPath) precomputedData.set(item.relPath, item); } - const bulkGetNodeIds = { all: (file) => bulkNodeIdsByFile(db, file) }; - - // Prepare hash upsert let upsertHash; try { upsertHash = db.prepare( @@ -42,143 +178,15 @@ export async function insertNodes(ctx) { } // Populate fileSymbols before the transaction so it is a pure input - // to (rather than a side-effect of) the DB write — avoids partial - // population if the transaction rolls back. for (const [relPath, symbols] of allSymbols) { ctx.fileSymbols.set(relPath, symbols); } const insertAll = db.transaction(() => { - // Phase 1: Batch insert all file nodes + definitions + exports - // Row format: [name, kind, file, line, end_line, parent_id, qualified_name, scope, visibility] - const phase1Rows = []; - for (const [relPath, symbols] of allSymbols) { - phase1Rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]); - for (const def of symbols.definitions) { - // Methods already have 'Class.method' as name — use as qualified_name. - // For methods, scope is the class portion; for top-level defs, scope is null. - const dotIdx = def.name.lastIndexOf('.'); - const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null; - phase1Rows.push([ - def.name, - def.kind, - relPath, - def.line, - def.endLine || null, - null, - def.name, - scope, - def.visibility || null, - ]); - } - for (const exp of symbols.exports) { - phase1Rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]); - } - } - batchInsertNodes(db, phase1Rows); - - // Phase 1b: Mark exported symbols - const markExported = db.prepare( - 'UPDATE nodes SET exported = 1 WHERE name = ? AND kind = ? AND file = ? AND line = ?', - ); - for (const [relPath, symbols] of allSymbols) { - for (const exp of symbols.exports) { - markExported.run(exp.name, exp.kind, relPath, exp.line); - } - } - - // Phase 3: Batch insert children (needs parent IDs from Phase 2) - const childRows = []; - for (const [relPath, symbols] of allSymbols) { - const nodeIdMap = new Map(); - for (const row of bulkGetNodeIds.all(relPath)) { - nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); - } - for (const def of symbols.definitions) { - if (!def.children?.length) continue; - const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`); - if (!defId) continue; - for (const child of def.children) { - const qualifiedName = `${def.name}.${child.name}`; - childRows.push([ - child.name, - child.kind, - relPath, - child.line, - child.endLine || null, - defId, - qualifiedName, - def.name, - child.visibility || null, - ]); - } - } - } - batchInsertNodes(db, childRows); - - // Phase 5: Batch insert contains/parameter_of edges - const edgeRows = []; - for (const [relPath, symbols] of allSymbols) { - const nodeIdMap = new Map(); - for (const row of bulkGetNodeIds.all(relPath)) { - nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); - } - const fileId = nodeIdMap.get(`${relPath}|file|0`); - for (const def of symbols.definitions) { - const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`); - if (fileId && defId) { - edgeRows.push([fileId, defId, 'contains', 1.0, 0]); - } - if (def.children?.length && defId) { - for (const child of def.children) { - const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`); - if (childId) { - edgeRows.push([defId, childId, 'contains', 1.0, 0]); - if (child.kind === 'parameter') { - edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]); - } - } - } - } - } - - // Update file hash — skip reverse-dep files (unchanged) - if (upsertHash) { - const precomputed = precomputedData.get(relPath); - if (precomputed?._reverseDepOnly) { - // no-op: file unchanged, hash already correct - } else if (precomputed?.hash) { - const stat = precomputed.stat || fileStat(path.join(rootDir, relPath)); - const mtime = stat ? Math.floor(stat.mtimeMs) : 0; - const size = stat ? stat.size : 0; - upsertHash.run(relPath, precomputed.hash, mtime, size); - } else { - const absPath = path.join(rootDir, relPath); - let code; - try { - code = readFileSafe(absPath); - } catch { - code = null; - } - if (code !== null) { - const stat = fileStat(absPath); - const mtime = stat ? Math.floor(stat.mtimeMs) : 0; - const size = stat ? stat.size : 0; - upsertHash.run(relPath, fileHash(code), mtime, size); - } - } - } - } - batchInsertEdges(db, edgeRows); - - // Also update metadata-only entries (self-heal mtime/size without re-parse) - if (upsertHash) { - for (const item of metadataUpdates) { - const mtime = item.stat ? Math.floor(item.stat.mtimeMs) : 0; - const size = item.stat ? item.stat.size : 0; - upsertHash.run(item.relPath, item.hash, mtime, size); - } - } + insertDefinitionsAndExports(db, allSymbols); + insertChildren(db, allSymbols); + insertContainmentEdges(db, allSymbols); + updateFileHashes(db, allSymbols, precomputedData, metadataUpdates, rootDir, upsertHash); }); const t0 = performance.now(); From cbdba7c5877d12399d5c30491c634efd9fe69413 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:30:02 -0600 Subject: [PATCH 03/22] refactor: decompose domain analysis functions into focused helpers Impact: 37 functions changed, 29 affected --- src/domain/analysis/context.js | 361 ++++++++++----------- src/domain/analysis/dependencies.js | 346 ++++++++++++--------- src/domain/analysis/impact.js | 418 ++++++++++++++++--------- src/domain/analysis/module-map.js | 467 +++++++++++++++------------- 4 files changed, 884 insertions(+), 708 deletions(-) diff --git a/src/domain/analysis/context.js b/src/domain/analysis/context.js index a6721b35..e8b5a869 100644 --- a/src/domain/analysis/context.js +++ b/src/domain/analysis/context.js @@ -27,6 +27,149 @@ import { normalizeSymbol } from '../../shared/normalize.js'; import { paginateResult } from '../../shared/paginate.js'; import { findMatchingNodes } from './symbol-lookup.js'; +function buildCallees(db, node, repoRoot, getFileLines, opts) { + const { noTests, depth } = opts; + const calleeRows = findCallees(db, node.id); + const filteredCallees = noTests ? calleeRows.filter((c) => !isTestFile(c.file)) : calleeRows; + + const callees = filteredCallees.map((c) => { + const cLines = getFileLines(c.file); + const summary = cLines ? extractSummary(cLines, c.line) : null; + let calleeSource = null; + if (depth >= 1) { + calleeSource = readSourceRange(repoRoot, c.file, c.line, c.end_line); + } + return { + name: c.name, + kind: c.kind, + file: c.file, + line: c.line, + endLine: c.end_line || null, + summary, + source: calleeSource, + }; + }); + + if (depth > 1) { + const visited = new Set(filteredCallees.map((c) => c.id)); + visited.add(node.id); + let frontier = filteredCallees.map((c) => c.id); + const maxDepth = Math.min(depth, 5); + for (let d = 2; d <= maxDepth; d++) { + const nextFrontier = []; + for (const fid of frontier) { + const deeper = findCallees(db, fid); + for (const c of deeper) { + if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) { + visited.add(c.id); + nextFrontier.push(c.id); + const cLines = getFileLines(c.file); + callees.push({ + name: c.name, + kind: c.kind, + file: c.file, + line: c.line, + endLine: c.end_line || null, + summary: cLines ? extractSummary(cLines, c.line) : null, + source: readSourceRange(repoRoot, c.file, c.line, c.end_line), + }); + } + } + } + frontier = nextFrontier; + if (frontier.length === 0) break; + } + } + + return callees; +} + +function buildCallers(db, node, noTests) { + let callerRows = findCallers(db, node.id); + + if (node.kind === 'method' && node.name.includes('.')) { + const methodName = node.name.split('.').pop(); + const relatedMethods = resolveMethodViaHierarchy(db, methodName); + for (const rm of relatedMethods) { + if (rm.id === node.id) continue; + const extraCallers = findCallers(db, rm.id); + callerRows.push(...extraCallers.map((c) => ({ ...c, viaHierarchy: rm.name }))); + } + } + if (noTests) callerRows = callerRows.filter((c) => !isTestFile(c.file)); + + return callerRows.map((c) => ({ + name: c.name, + kind: c.kind, + file: c.file, + line: c.line, + viaHierarchy: c.viaHierarchy || undefined, + })); +} + +function buildRelatedTests(db, node, getFileLines, includeTests) { + const testCallerRows = findCallers(db, node.id); + const testCallers = testCallerRows.filter((c) => isTestFile(c.file)); + + const testsByFile = new Map(); + for (const tc of testCallers) { + if (!testsByFile.has(tc.file)) testsByFile.set(tc.file, []); + testsByFile.get(tc.file).push(tc); + } + + const relatedTests = []; + for (const [file] of testsByFile) { + const tLines = getFileLines(file); + const testNames = []; + if (tLines) { + for (const tl of tLines) { + const tm = tl.match(/(?:it|test|describe)\s*\(\s*['"`]([^'"`]+)['"`]/); + if (tm) testNames.push(tm[1]); + } + } + const testSource = includeTests && tLines ? tLines.join('\n') : undefined; + relatedTests.push({ + file, + testCount: testNames.length, + testNames, + source: testSource, + }); + } + + return relatedTests; +} + +function getComplexityMetrics(db, nodeId) { + try { + const cRow = getComplexityForNode(db, nodeId); + if (!cRow) return null; + return { + cognitive: cRow.cognitive, + cyclomatic: cRow.cyclomatic, + maxNesting: cRow.max_nesting, + maintainabilityIndex: cRow.maintainability_index || 0, + halsteadVolume: cRow.halstead_volume || 0, + }; + } catch (e) { + debug(`complexity lookup failed for node ${nodeId}: ${e.message}`); + return null; + } +} + +function getNodeChildrenSafe(db, nodeId) { + try { + return findNodeChildren(db, nodeId).map((c) => ({ + name: c.name, + kind: c.kind, + line: c.line, + endLine: c.end_line || null, + })); + } catch (e) { + debug(`findNodeChildren failed for node ${nodeId}: ${e.message}`); + return []; + } +} + function explainFileImpl(db, target, getFileLines) { const fileNodes = findFileNodes(db, `%${target}%`); if (fileNodes.length === 0) return []; @@ -50,14 +193,10 @@ function explainFileImpl(db, target, getFileLines) { const publicApi = symbols.filter((s) => publicIds.has(s.id)).map(mapSymbol); const internal = symbols.filter((s) => !publicIds.has(s.id)).map(mapSymbol); - // Imports / importedBy const imports = findImportTargets(db, fn.id).map((r) => ({ file: r.file })); - const importedBy = findImportSources(db, fn.id).map((r) => ({ file: r.file })); - // Intra-file data flow const intraEdges = findIntraFileCallEdges(db, fn.file); - const dataFlowMap = new Map(); for (const edge of intraEdges) { if (!dataFlowMap.has(edge.caller_name)) dataFlowMap.set(edge.caller_name, []); @@ -68,7 +207,6 @@ function explainFileImpl(db, target, getFileLines) { callees, })); - // Line count: prefer node_metrics (actual), fall back to MAX(end_line) const metric = db .prepare(`SELECT nm.line_count FROM node_metrics nm WHERE nm.node_id = ?`) .get(fn.id); @@ -130,29 +268,12 @@ function explainFunctionImpl(db, target, noTests, getFileLines) { .filter((r) => isTestFile(r.file) && !seenFiles.has(r.file) && seenFiles.add(r.file)) .map((r) => ({ file: r.file })); - // Complexity metrics - let complexityMetrics = null; - try { - const cRow = getComplexityForNode(db, node.id); - if (cRow) { - complexityMetrics = { - cognitive: cRow.cognitive, - cyclomatic: cRow.cyclomatic, - maxNesting: cRow.max_nesting, - maintainabilityIndex: cRow.maintainability_index || 0, - halsteadVolume: cRow.halstead_volume || 0, - }; - } - } catch (e) { - debug(`complexity lookup failed for node ${node.id}: ${e.message}`); - } - return { ...normalizeSymbol(node, db, hc), lineCount, summary, signature, - complexity: complexityMetrics, + complexity: getComplexityMetrics(db, node.id), callees, callers, relatedTests, @@ -160,6 +281,28 @@ function explainFunctionImpl(db, target, noTests, getFileLines) { }); } +function explainCallees(parentResults, currentDepth, visited, db, noTests, getFileLines) { + if (currentDepth <= 0) return; + for (const r of parentResults) { + const newCallees = []; + for (const callee of r.callees) { + const key = `${callee.name}:${callee.file}:${callee.line}`; + if (visited.has(key)) continue; + visited.add(key); + const calleeResults = explainFunctionImpl(db, callee.name, noTests, getFileLines); + const exact = calleeResults.find((cr) => cr.file === callee.file && cr.line === callee.line); + if (exact) { + exact._depth = (r._depth || 0) + 1; + newCallees.push(exact); + } + } + if (newCallees.length > 0) { + r.depDetails = newCallees; + explainCallees(newCallees, currentDepth - 1, visited, db, noTests, getFileLines); + } + } +} + // ─── Exported functions ────────────────────────────────────────────────── export function contextData(name, customDbPath, opts = {}) { @@ -178,156 +321,22 @@ export function contextData(name, customDbPath, opts = {}) { return { name, results: [] }; } - // No hardcoded slice — pagination handles bounding via limit/offset - const getFileLines = createFileLinesReader(repoRoot); const results = nodes.map((node) => { const fileLines = getFileLines(node.file); - // Source const source = noSource ? null : readSourceRange(repoRoot, node.file, node.line, node.end_line); - // Signature const signature = fileLines ? extractSignature(fileLines, node.line) : null; - // Callees - const calleeRows = findCallees(db, node.id); - const filteredCallees = noTests ? calleeRows.filter((c) => !isTestFile(c.file)) : calleeRows; - - const callees = filteredCallees.map((c) => { - const cLines = getFileLines(c.file); - const summary = cLines ? extractSummary(cLines, c.line) : null; - let calleeSource = null; - if (depth >= 1) { - calleeSource = readSourceRange(repoRoot, c.file, c.line, c.end_line); - } - return { - name: c.name, - kind: c.kind, - file: c.file, - line: c.line, - endLine: c.end_line || null, - summary, - source: calleeSource, - }; - }); - - // Deep callee expansion via BFS (depth > 1, capped at 5) - if (depth > 1) { - const visited = new Set(filteredCallees.map((c) => c.id)); - visited.add(node.id); - let frontier = filteredCallees.map((c) => c.id); - const maxDepth = Math.min(depth, 5); - for (let d = 2; d <= maxDepth; d++) { - const nextFrontier = []; - for (const fid of frontier) { - const deeper = findCallees(db, fid); - for (const c of deeper) { - if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) { - visited.add(c.id); - nextFrontier.push(c.id); - const cLines = getFileLines(c.file); - callees.push({ - name: c.name, - kind: c.kind, - file: c.file, - line: c.line, - endLine: c.end_line || null, - summary: cLines ? extractSummary(cLines, c.line) : null, - source: readSourceRange(repoRoot, c.file, c.line, c.end_line), - }); - } - } - } - frontier = nextFrontier; - if (frontier.length === 0) break; - } - } - - // Callers - let callerRows = findCallers(db, node.id); - - // Method hierarchy resolution - if (node.kind === 'method' && node.name.includes('.')) { - const methodName = node.name.split('.').pop(); - const relatedMethods = resolveMethodViaHierarchy(db, methodName); - for (const rm of relatedMethods) { - if (rm.id === node.id) continue; - const extraCallers = findCallers(db, rm.id); - callerRows.push(...extraCallers.map((c) => ({ ...c, viaHierarchy: rm.name }))); - } - } - if (noTests) callerRows = callerRows.filter((c) => !isTestFile(c.file)); - - const callers = callerRows.map((c) => ({ - name: c.name, - kind: c.kind, - file: c.file, - line: c.line, - viaHierarchy: c.viaHierarchy || undefined, - })); - - // Related tests: callers that live in test files - const testCallerRows = findCallers(db, node.id); - const testCallers = testCallerRows.filter((c) => isTestFile(c.file)); - - const testsByFile = new Map(); - for (const tc of testCallers) { - if (!testsByFile.has(tc.file)) testsByFile.set(tc.file, []); - testsByFile.get(tc.file).push(tc); - } - - const relatedTests = []; - for (const [file] of testsByFile) { - const tLines = getFileLines(file); - const testNames = []; - if (tLines) { - for (const tl of tLines) { - const tm = tl.match(/(?:it|test|describe)\s*\(\s*['"`]([^'"`]+)['"`]/); - if (tm) testNames.push(tm[1]); - } - } - const testSource = includeTests && tLines ? tLines.join('\n') : undefined; - relatedTests.push({ - file, - testCount: testNames.length, - testNames, - source: testSource, - }); - } - - // Complexity metrics - let complexityMetrics = null; - try { - const cRow = getComplexityForNode(db, node.id); - if (cRow) { - complexityMetrics = { - cognitive: cRow.cognitive, - cyclomatic: cRow.cyclomatic, - maxNesting: cRow.max_nesting, - maintainabilityIndex: cRow.maintainability_index || 0, - halsteadVolume: cRow.halstead_volume || 0, - }; - } - } catch (e) { - debug(`complexity lookup failed for node ${node.id}: ${e.message}`); - } - - // Children (parameters, properties, constants) - let nodeChildren = []; - try { - nodeChildren = findNodeChildren(db, node.id).map((c) => ({ - name: c.name, - kind: c.kind, - line: c.line, - endLine: c.end_line || null, - })); - } catch (e) { - debug(`findNodeChildren failed for node ${node.id}: ${e.message}`); - } + const callees = buildCallees(db, node, repoRoot, getFileLines, { noTests, depth }); + const callers = buildCallers(db, node, noTests); + const relatedTests = buildRelatedTests(db, node, getFileLines, includeTests); + const complexityMetrics = getComplexityMetrics(db, node.id); + const nodeChildren = getNodeChildrenSafe(db, node.id); return { name: node.name, @@ -370,35 +379,9 @@ export function explainData(target, customDbPath, opts = {}) { ? explainFileImpl(db, target, getFileLines) : explainFunctionImpl(db, target, noTests, getFileLines); - // Recursive dependency explanation for function targets if (kind === 'function' && depth > 0 && results.length > 0) { const visited = new Set(results.map((r) => `${r.name}:${r.file}:${r.line}`)); - - function explainCallees(parentResults, currentDepth) { - if (currentDepth <= 0) return; - for (const r of parentResults) { - const newCallees = []; - for (const callee of r.callees) { - const key = `${callee.name}:${callee.file}:${callee.line}`; - if (visited.has(key)) continue; - visited.add(key); - const calleeResults = explainFunctionImpl(db, callee.name, noTests, getFileLines); - const exact = calleeResults.find( - (cr) => cr.file === callee.file && cr.line === callee.line, - ); - if (exact) { - exact._depth = (r._depth || 0) + 1; - newCallees.push(exact); - } - } - if (newCallees.length > 0) { - r.depDetails = newCallees; - explainCallees(newCallees, currentDepth - 1); - } - } - } - - explainCallees(results, depth); + explainCallees(results, depth, visited, db, noTests, getFileLines); } const base = { target, kind, results }; diff --git a/src/domain/analysis/dependencies.js b/src/domain/analysis/dependencies.js index e632470f..867cd5bd 100644 --- a/src/domain/analysis/dependencies.js +++ b/src/domain/analysis/dependencies.js @@ -46,6 +46,61 @@ export function fileDepsData(file, customDbPath, opts = {}) { } } +/** + * BFS transitive caller traversal starting from `callers` of `nodeId`. + * Returns an object keyed by depth (2..depth) → array of caller descriptors. + */ +function buildTransitiveCallers(db, callers, nodeId, depth, noTests) { + const transitiveCallers = {}; + if (depth <= 1) return transitiveCallers; + + const visited = new Set([nodeId]); + let frontier = callers + .map((c) => { + const row = db + .prepare('SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?') + .get(c.name, c.kind, c.file, c.line); + return row ? { ...c, id: row.id } : null; + }) + .filter(Boolean); + + for (let d = 2; d <= depth; d++) { + const nextFrontier = []; + for (const f of frontier) { + if (visited.has(f.id)) continue; + visited.add(f.id); + const upstream = db + .prepare(` + SELECT n.name, n.kind, n.file, n.line + FROM edges e JOIN nodes n ON e.source_id = n.id + WHERE e.target_id = ? AND e.kind = 'calls' + `) + .all(f.id); + for (const u of upstream) { + if (noTests && isTestFile(u.file)) continue; + const uid = db + .prepare('SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?') + .get(u.name, u.kind, u.file, u.line)?.id; + if (uid && !visited.has(uid)) { + nextFrontier.push({ ...u, id: uid }); + } + } + } + if (nextFrontier.length > 0) { + transitiveCallers[d] = nextFrontier.map((n) => ({ + name: n.name, + kind: n.kind, + file: n.file, + line: n.line, + })); + } + frontier = nextFrontier; + if (frontier.length === 0) break; + } + + return transitiveCallers; +} + export function fnDepsData(name, customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); try { @@ -75,55 +130,7 @@ export function fnDepsData(name, customDbPath, opts = {}) { } if (noTests) callers = callers.filter((c) => !isTestFile(c.file)); - // Transitive callers - const transitiveCallers = {}; - if (depth > 1) { - const visited = new Set([node.id]); - let frontier = callers - .map((c) => { - const row = db - .prepare('SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?') - .get(c.name, c.kind, c.file, c.line); - return row ? { ...c, id: row.id } : null; - }) - .filter(Boolean); - - for (let d = 2; d <= depth; d++) { - const nextFrontier = []; - for (const f of frontier) { - if (visited.has(f.id)) continue; - visited.add(f.id); - const upstream = db - .prepare(` - SELECT n.name, n.kind, n.file, n.line - FROM edges e JOIN nodes n ON e.source_id = n.id - WHERE e.target_id = ? AND e.kind = 'calls' - `) - .all(f.id); - for (const u of upstream) { - if (noTests && isTestFile(u.file)) continue; - const uid = db - .prepare( - 'SELECT id FROM nodes WHERE name = ? AND kind = ? AND file = ? AND line = ?', - ) - .get(u.name, u.kind, u.file, u.line)?.id; - if (uid && !visited.has(uid)) { - nextFrontier.push({ ...u, id: uid }); - } - } - } - if (nextFrontier.length > 0) { - transitiveCallers[d] = nextFrontier.map((n) => ({ - name: n.name, - kind: n.kind, - file: n.file, - line: n.line, - })); - } - frontier = nextFrontier; - if (frontier.length === 0) break; - } - } + const transitiveCallers = buildTransitiveCallers(db, callers, node.id, depth, noTests); return { ...normalizeSymbol(node, db, hc), @@ -151,37 +158,40 @@ export function fnDepsData(name, customDbPath, opts = {}) { } } -export function pathData(from, to, customDbPath, opts = {}) { - const db = openReadonlyOrFail(customDbPath); - try { - const noTests = opts.noTests || false; - const maxDepth = opts.maxDepth || 10; - const edgeKinds = opts.edgeKinds || ['calls']; - const reverse = opts.reverse || false; +/** + * Resolve from/to symbol names to node records. + * Returns { sourceNode, targetNode, fromCandidates, toCandidates } on success, + * or { earlyResult } when a caller-facing error/not-found response should be returned immediately. + */ +function resolveEndpoints(db, from, to, opts) { + const { noTests = false } = opts; - const fromNodes = findMatchingNodes(db, from, { - noTests, - file: opts.fromFile, - kind: opts.kind, - }); - if (fromNodes.length === 0) { - return { + const fromNodes = findMatchingNodes(db, from, { + noTests, + file: opts.fromFile, + kind: opts.kind, + }); + if (fromNodes.length === 0) { + return { + earlyResult: { from, to, found: false, error: `No symbol matching "${from}"`, fromCandidates: [], toCandidates: [], - }; - } + }, + }; + } - const toNodes = findMatchingNodes(db, to, { - noTests, - file: opts.toFile, - kind: opts.kind, - }); - if (toNodes.length === 0) { - return { + const toNodes = findMatchingNodes(db, to, { + noTests, + file: opts.toFile, + kind: opts.kind, + }); + if (toNodes.length === 0) { + return { + earlyResult: { from, to, found: false, @@ -190,18 +200,118 @@ export function pathData(from, to, customDbPath, opts = {}) { .slice(0, 5) .map((n) => ({ name: n.name, kind: n.kind, file: n.file, line: n.line })), toCandidates: [], - }; + }, + }; + } + + const fromCandidates = fromNodes + .slice(0, 5) + .map((n) => ({ name: n.name, kind: n.kind, file: n.file, line: n.line })); + const toCandidates = toNodes + .slice(0, 5) + .map((n) => ({ name: n.name, kind: n.kind, file: n.file, line: n.line })); + + return { + sourceNode: fromNodes[0], + targetNode: toNodes[0], + fromCandidates, + toCandidates, + }; +} + +/** + * BFS from sourceId toward targetId. + * Returns { found, parent, alternateCount, foundDepth }. + * `parent` maps nodeId → { parentId, edgeKind }. + */ +function bfsShortestPath(db, sourceId, targetId, edgeKinds, reverse, maxDepth, noTests) { + const kindPlaceholders = edgeKinds.map(() => '?').join(', '); + + // Forward: source_id → target_id (A calls... calls B) + // Reverse: target_id → source_id (B is called by... called by A) + const neighborQuery = reverse + ? `SELECT n.id, n.name, n.kind, n.file, n.line, e.kind AS edge_kind + FROM edges e JOIN nodes n ON e.source_id = n.id + WHERE e.target_id = ? AND e.kind IN (${kindPlaceholders})` + : `SELECT n.id, n.name, n.kind, n.file, n.line, e.kind AS edge_kind + FROM edges e JOIN nodes n ON e.target_id = n.id + WHERE e.source_id = ? AND e.kind IN (${kindPlaceholders})`; + const neighborStmt = db.prepare(neighborQuery); + + const visited = new Set([sourceId]); + const parent = new Map(); + let queue = [sourceId]; + let found = false; + let alternateCount = 0; + let foundDepth = -1; + + for (let depth = 1; depth <= maxDepth; depth++) { + const nextQueue = []; + for (const currentId of queue) { + const neighbors = neighborStmt.all(currentId, ...edgeKinds); + for (const n of neighbors) { + if (noTests && isTestFile(n.file)) continue; + if (n.id === targetId) { + if (!found) { + found = true; + foundDepth = depth; + parent.set(n.id, { parentId: currentId, edgeKind: n.edge_kind }); + } + alternateCount++; + continue; + } + if (!visited.has(n.id)) { + visited.add(n.id); + parent.set(n.id, { parentId: currentId, edgeKind: n.edge_kind }); + nextQueue.push(n.id); + } + } } + if (found) break; + queue = nextQueue; + if (queue.length === 0) break; + } + + return { found, parent, alternateCount, foundDepth }; +} + +/** + * Walk the parent map from targetId back to sourceId and return an ordered + * array of node IDs source → target. + */ +function reconstructPath(db, pathIds, parent) { + const nodeCache = new Map(); + const getNode = (id) => { + if (nodeCache.has(id)) return nodeCache.get(id); + const row = db.prepare('SELECT name, kind, file, line FROM nodes WHERE id = ?').get(id); + nodeCache.set(id, row); + return row; + }; + + return pathIds.map((id, idx) => { + const node = getNode(id); + const edgeKind = idx === 0 ? null : parent.get(id).edgeKind; + return { name: node.name, kind: node.kind, file: node.file, line: node.line, edgeKind }; + }); +} + +export function pathData(from, to, customDbPath, opts = {}) { + const db = openReadonlyOrFail(customDbPath); + try { + const noTests = opts.noTests || false; + const maxDepth = opts.maxDepth || 10; + const edgeKinds = opts.edgeKinds || ['calls']; + const reverse = opts.reverse || false; - const sourceNode = fromNodes[0]; - const targetNode = toNodes[0]; + const resolved = resolveEndpoints(db, from, to, { + noTests, + fromFile: opts.fromFile, + toFile: opts.toFile, + kind: opts.kind, + }); + if (resolved.earlyResult) return resolved.earlyResult; - const fromCandidates = fromNodes - .slice(0, 5) - .map((n) => ({ name: n.name, kind: n.kind, file: n.file, line: n.line })); - const toCandidates = toNodes - .slice(0, 5) - .map((n) => ({ name: n.name, kind: n.kind, file: n.file, line: n.line })); + const { sourceNode, targetNode, fromCandidates, toCandidates } = resolved; // Self-path if (sourceNode.id === targetNode.id) { @@ -228,55 +338,12 @@ export function pathData(from, to, customDbPath, opts = {}) { }; } - // Build edge kind filter - const kindPlaceholders = edgeKinds.map(() => '?').join(', '); - - // BFS — direction depends on `reverse` flag - // Forward: source_id → target_id (A calls... calls B) - // Reverse: target_id → source_id (B is called by... called by A) - const neighborQuery = reverse - ? `SELECT n.id, n.name, n.kind, n.file, n.line, e.kind AS edge_kind - FROM edges e JOIN nodes n ON e.source_id = n.id - WHERE e.target_id = ? AND e.kind IN (${kindPlaceholders})` - : `SELECT n.id, n.name, n.kind, n.file, n.line, e.kind AS edge_kind - FROM edges e JOIN nodes n ON e.target_id = n.id - WHERE e.source_id = ? AND e.kind IN (${kindPlaceholders})`; - const neighborStmt = db.prepare(neighborQuery); - - const visited = new Set([sourceNode.id]); - // parent map: nodeId → { parentId, edgeKind } - const parent = new Map(); - let queue = [sourceNode.id]; - let found = false; - let alternateCount = 0; - let foundDepth = -1; - - for (let depth = 1; depth <= maxDepth; depth++) { - const nextQueue = []; - for (const currentId of queue) { - const neighbors = neighborStmt.all(currentId, ...edgeKinds); - for (const n of neighbors) { - if (noTests && isTestFile(n.file)) continue; - if (n.id === targetNode.id) { - if (!found) { - found = true; - foundDepth = depth; - parent.set(n.id, { parentId: currentId, edgeKind: n.edge_kind }); - } - alternateCount++; - continue; - } - if (!visited.has(n.id)) { - visited.add(n.id); - parent.set(n.id, { parentId: currentId, edgeKind: n.edge_kind }); - nextQueue.push(n.id); - } - } - } - if (found) break; - queue = nextQueue; - if (queue.length === 0) break; - } + const { + found, + parent, + alternateCount: rawAlternateCount, + foundDepth, + } = bfsShortestPath(db, sourceNode.id, targetNode.id, edgeKinds, reverse, maxDepth, noTests); if (!found) { return { @@ -294,8 +361,8 @@ export function pathData(from, to, customDbPath, opts = {}) { }; } - // alternateCount includes the one we kept; subtract 1 for "alternates" - alternateCount = Math.max(0, alternateCount - 1); + // rawAlternateCount includes the one we kept; subtract 1 for "alternates" + const alternateCount = Math.max(0, rawAlternateCount - 1); // Reconstruct path from target back to source const pathIds = [targetNode.id]; @@ -307,20 +374,7 @@ export function pathData(from, to, customDbPath, opts = {}) { } pathIds.reverse(); - // Build path with node info - const nodeCache = new Map(); - const getNode = (id) => { - if (nodeCache.has(id)) return nodeCache.get(id); - const row = db.prepare('SELECT name, kind, file, line FROM nodes WHERE id = ?').get(id); - nodeCache.set(id, row); - return row; - }; - - const resultPath = pathIds.map((id, idx) => { - const node = getNode(id); - const edgeKind = idx === 0 ? null : parent.get(id).edgeKind; - return { name: node.name, kind: node.kind, file: node.file, line: node.line, edgeKind }; - }); + const resultPath = reconstructPath(db, pathIds, parent); return { from, diff --git a/src/domain/analysis/impact.js b/src/domain/analysis/impact.js index bd3bbe1d..6bdd5464 100644 --- a/src/domain/analysis/impact.js +++ b/src/domain/analysis/impact.js @@ -134,6 +134,251 @@ export function fnImpactData(name, customDbPath, opts = {}) { } } +// ─── diffImpactData helpers ───────────────────────────────────────────── + +/** + * Walk up from repoRoot until a .git directory is found. + * Returns true if a git root exists, false otherwise. + * + * @param {string} repoRoot + * @returns {boolean} + */ +function findGitRoot(repoRoot) { + let checkDir = repoRoot; + while (checkDir) { + if (fs.existsSync(path.join(checkDir, '.git'))) { + return true; + } + const parent = path.dirname(checkDir); + if (parent === checkDir) break; + checkDir = parent; + } + return false; +} + +/** + * Execute git diff and return the raw output string. + * Returns `{ output: string }` on success or `{ error: string }` on failure. + * + * @param {string} repoRoot + * @param {{ staged?: boolean, ref?: string }} opts + * @returns {{ output: string } | { error: string }} + */ +function runGitDiff(repoRoot, opts) { + try { + const args = opts.staged + ? ['diff', '--cached', '--unified=0', '--no-color'] + : ['diff', opts.ref || 'HEAD', '--unified=0', '--no-color']; + const output = execFileSync('git', args, { + cwd: repoRoot, + encoding: 'utf-8', + maxBuffer: 10 * 1024 * 1024, + stdio: ['pipe', 'pipe', 'pipe'], + }); + return { output }; + } catch (e) { + return { error: `Failed to run git diff: ${e.message}` }; + } +} + +/** + * Parse raw git diff output into a changedRanges map and newFiles set. + * + * @param {string} diffOutput + * @returns {{ changedRanges: Map>, newFiles: Set }} + */ +function parseGitDiff(diffOutput) { + const changedRanges = new Map(); + const newFiles = new Set(); + let currentFile = null; + let prevIsDevNull = false; + + for (const line of diffOutput.split('\n')) { + if (line.startsWith('--- /dev/null')) { + prevIsDevNull = true; + continue; + } + if (line.startsWith('--- ')) { + prevIsDevNull = false; + continue; + } + const fileMatch = line.match(/^\+\+\+ b\/(.+)/); + if (fileMatch) { + currentFile = fileMatch[1]; + if (!changedRanges.has(currentFile)) changedRanges.set(currentFile, []); + if (prevIsDevNull) newFiles.add(currentFile); + prevIsDevNull = false; + continue; + } + const hunkMatch = line.match(/^@@ .+ \+(\d+)(?:,(\d+))? @@/); + if (hunkMatch && currentFile) { + const start = parseInt(hunkMatch[1], 10); + const count = parseInt(hunkMatch[2] || '1', 10); + changedRanges.get(currentFile).push({ start, end: start + count - 1 }); + } + } + + return { changedRanges, newFiles }; +} + +/** + * Find all function/method/class nodes whose line ranges overlap any changed range. + * + * @param {import('better-sqlite3').Database} db + * @param {Map} changedRanges + * @param {boolean} noTests + * @returns {Array} + */ +function findAffectedFunctions(db, changedRanges, noTests) { + const affectedFunctions = []; + for (const [file, ranges] of changedRanges) { + if (noTests && isTestFile(file)) continue; + const defs = db + .prepare( + `SELECT * FROM nodes WHERE file = ? AND kind IN ('function', 'method', 'class') ORDER BY line`, + ) + .all(file); + for (let i = 0; i < defs.length; i++) { + const def = defs[i]; + const endLine = def.end_line || (defs[i + 1] ? defs[i + 1].line - 1 : 999999); + for (const range of ranges) { + if (range.start <= endLine && range.end >= def.line) { + affectedFunctions.push(def); + break; + } + } + } + } + return affectedFunctions; +} + +/** + * Run BFS per affected function, collecting per-function results and the full affected set. + * + * @param {import('better-sqlite3').Database} db + * @param {Array} affectedFunctions + * @param {boolean} noTests + * @param {number} maxDepth + * @returns {{ functionResults: Array, allAffected: Set }} + */ +function buildFunctionImpactResults(db, affectedFunctions, noTests, maxDepth) { + const allAffected = new Set(); + const functionResults = affectedFunctions.map((fn) => { + const edges = []; + const idToKey = new Map(); + idToKey.set(fn.id, `${fn.file}::${fn.name}:${fn.line}`); + + const { levels, totalDependents } = bfsTransitiveCallers(db, fn.id, { + noTests, + maxDepth, + onVisit(c, parentId) { + allAffected.add(`${c.file}:${c.name}`); + const callerKey = `${c.file}::${c.name}:${c.line}`; + idToKey.set(c.id, callerKey); + edges.push({ from: idToKey.get(parentId), to: callerKey }); + }, + }); + + return { + name: fn.name, + kind: fn.kind, + file: fn.file, + line: fn.line, + transitiveCallers: totalDependents, + levels, + edges, + }; + }); + + return { functionResults, allAffected }; +} + +/** + * Look up historically co-changed files for the set of changed files. + * Returns an empty array if the co_changes table is unavailable. + * + * @param {import('better-sqlite3').Database} db + * @param {Map} changedRanges + * @param {Set} affectedFiles + * @param {boolean} noTests + * @returns {Array} + */ +function lookupCoChanges(db, changedRanges, affectedFiles, noTests) { + try { + db.prepare('SELECT 1 FROM co_changes LIMIT 1').get(); + const changedFilesList = [...changedRanges.keys()]; + const coResults = coChangeForFiles(changedFilesList, db, { + minJaccard: 0.3, + limit: 20, + noTests, + }); + return coResults.filter((r) => !affectedFiles.has(r.file)); + } catch (e) { + debug(`co_changes lookup skipped: ${e.message}`); + return []; + } +} + +/** + * Look up CODEOWNERS for changed and affected files. + * Returns null if no owners are found or lookup fails. + * + * @param {Map} changedRanges + * @param {Set} affectedFiles + * @param {string} repoRoot + * @returns {{ owners: object, affectedOwners: Array, suggestedReviewers: Array } | null} + */ +function lookupOwnership(changedRanges, affectedFiles, repoRoot) { + try { + const allFilePaths = [...new Set([...changedRanges.keys(), ...affectedFiles])]; + const ownerResult = ownersForFiles(allFilePaths, repoRoot); + if (ownerResult.affectedOwners.length > 0) { + return { + owners: Object.fromEntries(ownerResult.owners), + affectedOwners: ownerResult.affectedOwners, + suggestedReviewers: ownerResult.suggestedReviewers, + }; + } + return null; + } catch (e) { + debug(`CODEOWNERS lookup skipped: ${e.message}`); + return null; + } +} + +/** + * Check manifesto boundary violations scoped to the changed files. + * Returns `{ boundaryViolations, boundaryViolationCount }`. + * + * @param {import('better-sqlite3').Database} db + * @param {Map} changedRanges + * @param {boolean} noTests + * @param {object} opts — full diffImpactData opts (may contain `opts.config`) + * @param {string} repoRoot + * @returns {{ boundaryViolations: Array, boundaryViolationCount: number }} + */ +function checkBoundaryViolations(db, changedRanges, noTests, opts, repoRoot) { + try { + const cfg = opts.config || loadConfig(repoRoot); + const boundaryConfig = cfg.manifesto?.boundaries; + if (boundaryConfig) { + const result = evaluateBoundaries(db, boundaryConfig, { + scopeFiles: [...changedRanges.keys()], + noTests, + }); + return { + boundaryViolations: result.violations, + boundaryViolationCount: result.violationCount, + }; + } + } catch (e) { + debug(`boundary check skipped: ${e.message}`); + } + return { boundaryViolations: [], boundaryViolationCount: 0 }; +} + +// ─── diffImpactData ───────────────────────────────────────────────────── + /** * Fix #2: Shell injection vulnerability. * Uses execFileSync instead of execSync to prevent shell interpretation of user input. @@ -147,38 +392,14 @@ export function diffImpactData(customDbPath, opts = {}) { const dbPath = findDbPath(customDbPath); const repoRoot = path.resolve(path.dirname(dbPath), '..'); - // Verify we're in a git repository before running git diff - let checkDir = repoRoot; - let isGitRepo = false; - while (checkDir) { - if (fs.existsSync(path.join(checkDir, '.git'))) { - isGitRepo = true; - break; - } - const parent = path.dirname(checkDir); - if (parent === checkDir) break; - checkDir = parent; - } - if (!isGitRepo) { + if (!findGitRoot(repoRoot)) { return { error: `Not a git repository: ${repoRoot}` }; } - let diffOutput; - try { - const args = opts.staged - ? ['diff', '--cached', '--unified=0', '--no-color'] - : ['diff', opts.ref || 'HEAD', '--unified=0', '--no-color']; - diffOutput = execFileSync('git', args, { - cwd: repoRoot, - encoding: 'utf-8', - maxBuffer: 10 * 1024 * 1024, - stdio: ['pipe', 'pipe', 'pipe'], - }); - } catch (e) { - return { error: `Failed to run git diff: ${e.message}` }; - } + const gitResult = runGitDiff(repoRoot, opts); + if (gitResult.error) return { error: gitResult.error }; - if (!diffOutput.trim()) { + if (!gitResult.output.trim()) { return { changedFiles: 0, newFiles: [], @@ -188,34 +409,7 @@ export function diffImpactData(customDbPath, opts = {}) { }; } - const changedRanges = new Map(); - const newFiles = new Set(); - let currentFile = null; - let prevIsDevNull = false; - for (const line of diffOutput.split('\n')) { - if (line.startsWith('--- /dev/null')) { - prevIsDevNull = true; - continue; - } - if (line.startsWith('--- ')) { - prevIsDevNull = false; - continue; - } - const fileMatch = line.match(/^\+\+\+ b\/(.+)/); - if (fileMatch) { - currentFile = fileMatch[1]; - if (!changedRanges.has(currentFile)) changedRanges.set(currentFile, []); - if (prevIsDevNull) newFiles.add(currentFile); - prevIsDevNull = false; - continue; - } - const hunkMatch = line.match(/^@@ .+ \+(\d+)(?:,(\d+))? @@/); - if (hunkMatch && currentFile) { - const start = parseInt(hunkMatch[1], 10); - const count = parseInt(hunkMatch[2] || '1', 10); - changedRanges.get(currentFile).push({ start, end: start + count - 1 }); - } - } + const { changedRanges, newFiles } = parseGitDiff(gitResult.output); if (changedRanges.size === 0) { return { @@ -227,106 +421,26 @@ export function diffImpactData(customDbPath, opts = {}) { }; } - const affectedFunctions = []; - for (const [file, ranges] of changedRanges) { - if (noTests && isTestFile(file)) continue; - const defs = db - .prepare( - `SELECT * FROM nodes WHERE file = ? AND kind IN ('function', 'method', 'class') ORDER BY line`, - ) - .all(file); - for (let i = 0; i < defs.length; i++) { - const def = defs[i]; - const endLine = def.end_line || (defs[i + 1] ? defs[i + 1].line - 1 : 999999); - for (const range of ranges) { - if (range.start <= endLine && range.end >= def.line) { - affectedFunctions.push(def); - break; - } - } - } - } - - const allAffected = new Set(); - const functionResults = affectedFunctions.map((fn) => { - const edges = []; - const idToKey = new Map(); - idToKey.set(fn.id, `${fn.file}::${fn.name}:${fn.line}`); - - const { levels, totalDependents } = bfsTransitiveCallers(db, fn.id, { - noTests, - maxDepth, - onVisit(c, parentId) { - allAffected.add(`${c.file}:${c.name}`); - const callerKey = `${c.file}::${c.name}:${c.line}`; - idToKey.set(c.id, callerKey); - edges.push({ from: idToKey.get(parentId), to: callerKey }); - }, - }); - - return { - name: fn.name, - kind: fn.kind, - file: fn.file, - line: fn.line, - transitiveCallers: totalDependents, - levels, - edges, - }; - }); + const affectedFunctions = findAffectedFunctions(db, changedRanges, noTests); + const { functionResults, allAffected } = buildFunctionImpactResults( + db, + affectedFunctions, + noTests, + maxDepth, + ); const affectedFiles = new Set(); for (const key of allAffected) affectedFiles.add(key.split(':')[0]); - // Look up historically coupled files from co-change data - let historicallyCoupled = []; - try { - db.prepare('SELECT 1 FROM co_changes LIMIT 1').get(); - const changedFilesList = [...changedRanges.keys()]; - const coResults = coChangeForFiles(changedFilesList, db, { - minJaccard: 0.3, - limit: 20, - noTests, - }); - // Exclude files already found via static analysis - historicallyCoupled = coResults.filter((r) => !affectedFiles.has(r.file)); - } catch (e) { - debug(`co_changes lookup skipped: ${e.message}`); - } - - // Look up CODEOWNERS for changed + affected files - let ownership = null; - try { - const allFilePaths = [...new Set([...changedRanges.keys(), ...affectedFiles])]; - const ownerResult = ownersForFiles(allFilePaths, repoRoot); - if (ownerResult.affectedOwners.length > 0) { - ownership = { - owners: Object.fromEntries(ownerResult.owners), - affectedOwners: ownerResult.affectedOwners, - suggestedReviewers: ownerResult.suggestedReviewers, - }; - } - } catch (e) { - debug(`CODEOWNERS lookup skipped: ${e.message}`); - } - - // Check boundary violations scoped to changed files - let boundaryViolations = []; - let boundaryViolationCount = 0; - try { - const cfg = opts.config || loadConfig(repoRoot); - const boundaryConfig = cfg.manifesto?.boundaries; - if (boundaryConfig) { - const result = evaluateBoundaries(db, boundaryConfig, { - scopeFiles: [...changedRanges.keys()], - noTests, - }); - boundaryViolations = result.violations; - boundaryViolationCount = result.violationCount; - } - } catch (e) { - debug(`boundary check skipped: ${e.message}`); - } + const historicallyCoupled = lookupCoChanges(db, changedRanges, affectedFiles, noTests); + const ownership = lookupOwnership(changedRanges, affectedFiles, repoRoot); + const { boundaryViolations, boundaryViolationCount } = checkBoundaryViolations( + db, + changedRanges, + noTests, + opts, + repoRoot, + ); const base = { changedFiles: changedRanges.size, diff --git a/src/domain/analysis/module-map.js b/src/domain/analysis/module-map.js index d2bc613b..daf09b33 100644 --- a/src/domain/analysis/module-map.js +++ b/src/domain/analysis/module-map.js @@ -37,6 +37,241 @@ export const FALSE_POSITIVE_NAMES = new Set([ ]); export const FALSE_POSITIVE_CALLER_THRESHOLD = 20; +// --------------------------------------------------------------------------- +// Section helpers +// --------------------------------------------------------------------------- + +function buildTestFileIds(db) { + const allFileNodes = db.prepare("SELECT id, file FROM nodes WHERE kind = 'file'").all(); + const testFileIds = new Set(); + const testFiles = new Set(); + for (const n of allFileNodes) { + if (isTestFile(n.file)) { + testFileIds.add(n.id); + testFiles.add(n.file); + } + } + const allNodes = db.prepare('SELECT id, file FROM nodes').all(); + for (const n of allNodes) { + if (testFiles.has(n.file)) testFileIds.add(n.id); + } + return testFileIds; +} + +function countNodesByKind(db, testFileIds) { + let nodeRows; + if (testFileIds) { + const allNodes = db.prepare('SELECT id, kind, file FROM nodes').all(); + const filtered = allNodes.filter((n) => !testFileIds.has(n.id)); + const counts = {}; + for (const n of filtered) counts[n.kind] = (counts[n.kind] || 0) + 1; + nodeRows = Object.entries(counts).map(([kind, c]) => ({ kind, c })); + } else { + nodeRows = db.prepare('SELECT kind, COUNT(*) as c FROM nodes GROUP BY kind').all(); + } + const byKind = {}; + let total = 0; + for (const r of nodeRows) { + byKind[r.kind] = r.c; + total += r.c; + } + return { total, byKind }; +} + +function countEdgesByKind(db, testFileIds) { + let edgeRows; + if (testFileIds) { + const allEdges = db.prepare('SELECT source_id, target_id, kind FROM edges').all(); + const filtered = allEdges.filter( + (e) => !testFileIds.has(e.source_id) && !testFileIds.has(e.target_id), + ); + const counts = {}; + for (const e of filtered) counts[e.kind] = (counts[e.kind] || 0) + 1; + edgeRows = Object.entries(counts).map(([kind, c]) => ({ kind, c })); + } else { + edgeRows = db.prepare('SELECT kind, COUNT(*) as c FROM edges GROUP BY kind').all(); + } + const byKind = {}; + let total = 0; + for (const r of edgeRows) { + byKind[r.kind] = r.c; + total += r.c; + } + return { total, byKind }; +} + +function countFilesByLanguage(db, noTests) { + const extToLang = new Map(); + for (const entry of LANGUAGE_REGISTRY) { + for (const ext of entry.extensions) { + extToLang.set(ext, entry.id); + } + } + let fileNodes = db.prepare("SELECT file FROM nodes WHERE kind = 'file'").all(); + if (noTests) fileNodes = fileNodes.filter((n) => !isTestFile(n.file)); + const byLanguage = {}; + for (const row of fileNodes) { + const ext = path.extname(row.file).toLowerCase(); + const lang = extToLang.get(ext) || 'other'; + byLanguage[lang] = (byLanguage[lang] || 0) + 1; + } + return { total: fileNodes.length, languages: Object.keys(byLanguage).length, byLanguage }; +} + +function findHotspots(db, noTests, limit) { + const testFilter = testFilterSQL('n.file', noTests); + const hotspotRows = db + .prepare(` + SELECT n.file, + (SELECT COUNT(*) FROM edges WHERE target_id = n.id) as fan_in, + (SELECT COUNT(*) FROM edges WHERE source_id = n.id) as fan_out + FROM nodes n + WHERE n.kind = 'file' ${testFilter} + ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id) + + (SELECT COUNT(*) FROM edges WHERE source_id = n.id) DESC + `) + .all(); + const filtered = noTests ? hotspotRows.filter((r) => !isTestFile(r.file)) : hotspotRows; + return filtered.slice(0, limit).map((r) => ({ + file: r.file, + fanIn: r.fan_in, + fanOut: r.fan_out, + })); +} + +function getEmbeddingsInfo(db) { + try { + const count = db.prepare('SELECT COUNT(*) as c FROM embeddings').get(); + if (count && count.c > 0) { + const meta = {}; + const metaRows = db.prepare('SELECT key, value FROM embedding_meta').all(); + for (const r of metaRows) meta[r.key] = r.value; + return { + count: count.c, + model: meta.model || null, + dim: meta.dim ? parseInt(meta.dim, 10) : null, + builtAt: meta.built_at || null, + }; + } + } catch (e) { + debug(`embeddings lookup skipped: ${e.message}`); + } + return null; +} + +function computeQualityMetrics(db, testFilter) { + const qualityTestFilter = testFilter.replace(/n\.file/g, 'file'); + + const totalCallable = db + .prepare( + `SELECT COUNT(*) as c FROM nodes WHERE kind IN ('function', 'method') ${qualityTestFilter}`, + ) + .get().c; + const callableWithCallers = db + .prepare(` + SELECT COUNT(DISTINCT e.target_id) as c FROM edges e + JOIN nodes n ON e.target_id = n.id + WHERE e.kind = 'calls' AND n.kind IN ('function', 'method') ${testFilter} + `) + .get().c; + const callerCoverage = totalCallable > 0 ? callableWithCallers / totalCallable : 0; + + const totalCallEdges = db.prepare("SELECT COUNT(*) as c FROM edges WHERE kind = 'calls'").get().c; + const highConfCallEdges = db + .prepare("SELECT COUNT(*) as c FROM edges WHERE kind = 'calls' AND confidence >= 0.7") + .get().c; + const callConfidence = totalCallEdges > 0 ? highConfCallEdges / totalCallEdges : 0; + + const fpRows = db + .prepare(` + SELECT n.name, n.file, n.line, COUNT(e.source_id) as caller_count + FROM nodes n + LEFT JOIN edges e ON n.id = e.target_id AND e.kind = 'calls' + WHERE n.kind IN ('function', 'method') + GROUP BY n.id + HAVING caller_count > ? + ORDER BY caller_count DESC + `) + .all(FALSE_POSITIVE_CALLER_THRESHOLD); + const falsePositiveWarnings = fpRows + .filter((r) => + FALSE_POSITIVE_NAMES.has(r.name.includes('.') ? r.name.split('.').pop() : r.name), + ) + .map((r) => ({ name: r.name, file: r.file, line: r.line, callerCount: r.caller_count })); + + let fpEdgeCount = 0; + for (const fp of falsePositiveWarnings) fpEdgeCount += fp.callerCount; + const falsePositiveRatio = totalCallEdges > 0 ? fpEdgeCount / totalCallEdges : 0; + + const score = Math.round( + callerCoverage * 40 + callConfidence * 40 + (1 - falsePositiveRatio) * 20, + ); + + return { + score, + callerCoverage: { + ratio: callerCoverage, + covered: callableWithCallers, + total: totalCallable, + }, + callConfidence: { + ratio: callConfidence, + highConf: highConfCallEdges, + total: totalCallEdges, + }, + falsePositiveWarnings, + }; +} + +function countRoles(db, noTests) { + let roleRows; + if (noTests) { + const allRoleNodes = db.prepare('SELECT role, file FROM nodes WHERE role IS NOT NULL').all(); + const filtered = allRoleNodes.filter((n) => !isTestFile(n.file)); + const counts = {}; + for (const n of filtered) counts[n.role] = (counts[n.role] || 0) + 1; + roleRows = Object.entries(counts).map(([role, c]) => ({ role, c })); + } else { + roleRows = db + .prepare('SELECT role, COUNT(*) as c FROM nodes WHERE role IS NOT NULL GROUP BY role') + .all(); + } + const roles = {}; + for (const r of roleRows) roles[r.role] = r.c; + return roles; +} + +function getComplexitySummary(db, testFilter) { + try { + const cRows = db + .prepare( + `SELECT fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.maintainability_index + FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id + WHERE n.kind IN ('function','method') ${testFilter}`, + ) + .all(); + if (cRows.length > 0) { + const miValues = cRows.map((r) => r.maintainability_index || 0); + return { + analyzed: cRows.length, + avgCognitive: +(cRows.reduce((s, r) => s + r.cognitive, 0) / cRows.length).toFixed(1), + avgCyclomatic: +(cRows.reduce((s, r) => s + r.cyclomatic, 0) / cRows.length).toFixed(1), + maxCognitive: Math.max(...cRows.map((r) => r.cognitive)), + maxCyclomatic: Math.max(...cRows.map((r) => r.cyclomatic)), + avgMI: +(miValues.reduce((s, v) => s + v, 0) / miValues.length).toFixed(1), + minMI: +Math.min(...miValues).toFixed(1), + }; + } + } catch (e) { + debug(`complexity summary skipped: ${e.message}`); + } + return null; +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + export function moduleMapData(customDbPath, limit = 20, opts = {}) { const db = openReadonlyOrFail(customDbPath); try { @@ -79,237 +314,27 @@ export function statsData(customDbPath, opts = {}) { const db = openReadonlyOrFail(customDbPath); try { const noTests = opts.noTests || false; + const testFilter = testFilterSQL('n.file', noTests); - // Build set of test file IDs for filtering nodes and edges - let testFileIds = null; - if (noTests) { - const allFileNodes = db.prepare("SELECT id, file FROM nodes WHERE kind = 'file'").all(); - testFileIds = new Set(); - const testFiles = new Set(); - for (const n of allFileNodes) { - if (isTestFile(n.file)) { - testFileIds.add(n.id); - testFiles.add(n.file); - } - } - - // Also collect non-file node IDs that belong to test files - const allNodes = db.prepare('SELECT id, file FROM nodes').all(); - for (const n of allNodes) { - if (testFiles.has(n.file)) testFileIds.add(n.id); - } - } - - // Node breakdown by kind - let nodeRows; - if (noTests) { - const allNodes = db.prepare('SELECT id, kind, file FROM nodes').all(); - const filtered = allNodes.filter((n) => !testFileIds.has(n.id)); - const counts = {}; - for (const n of filtered) counts[n.kind] = (counts[n.kind] || 0) + 1; - nodeRows = Object.entries(counts).map(([kind, c]) => ({ kind, c })); - } else { - nodeRows = db.prepare('SELECT kind, COUNT(*) as c FROM nodes GROUP BY kind').all(); - } - const nodesByKind = {}; - let totalNodes = 0; - for (const r of nodeRows) { - nodesByKind[r.kind] = r.c; - totalNodes += r.c; - } - - // Edge breakdown by kind - let edgeRows; - if (noTests) { - const allEdges = db.prepare('SELECT source_id, target_id, kind FROM edges').all(); - const filtered = allEdges.filter( - (e) => !testFileIds.has(e.source_id) && !testFileIds.has(e.target_id), - ); - const counts = {}; - for (const e of filtered) counts[e.kind] = (counts[e.kind] || 0) + 1; - edgeRows = Object.entries(counts).map(([kind, c]) => ({ kind, c })); - } else { - edgeRows = db.prepare('SELECT kind, COUNT(*) as c FROM edges GROUP BY kind').all(); - } - const edgesByKind = {}; - let totalEdges = 0; - for (const r of edgeRows) { - edgesByKind[r.kind] = r.c; - totalEdges += r.c; - } + const testFileIds = noTests ? buildTestFileIds(db) : null; - // File/language distribution — map extensions via LANGUAGE_REGISTRY - const extToLang = new Map(); - for (const entry of LANGUAGE_REGISTRY) { - for (const ext of entry.extensions) { - extToLang.set(ext, entry.id); - } - } - let fileNodes = db.prepare("SELECT file FROM nodes WHERE kind = 'file'").all(); - if (noTests) fileNodes = fileNodes.filter((n) => !isTestFile(n.file)); - const byLanguage = {}; - for (const row of fileNodes) { - const ext = path.extname(row.file).toLowerCase(); - const lang = extToLang.get(ext) || 'other'; - byLanguage[lang] = (byLanguage[lang] || 0) + 1; - } - const langCount = Object.keys(byLanguage).length; + const { total: totalNodes, byKind: nodesByKind } = countNodesByKind(db, testFileIds); + const { total: totalEdges, byKind: edgesByKind } = countEdgesByKind(db, testFileIds); + const files = countFilesByLanguage(db, noTests); - // Cycles const fileCycles = findCycles(db, { fileLevel: true, noTests }); const fnCycles = findCycles(db, { fileLevel: false, noTests }); - // Top 5 coupling hotspots (fan-in + fan-out, file nodes) - const testFilter = testFilterSQL('n.file', noTests); - const hotspotRows = db - .prepare(` - SELECT n.file, - (SELECT COUNT(*) FROM edges WHERE target_id = n.id) as fan_in, - (SELECT COUNT(*) FROM edges WHERE source_id = n.id) as fan_out - FROM nodes n - WHERE n.kind = 'file' ${testFilter} - ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id) - + (SELECT COUNT(*) FROM edges WHERE source_id = n.id) DESC - `) - .all(); - const filteredHotspots = noTests ? hotspotRows.filter((r) => !isTestFile(r.file)) : hotspotRows; - const hotspots = filteredHotspots.slice(0, 5).map((r) => ({ - file: r.file, - fanIn: r.fan_in, - fanOut: r.fan_out, - })); - - // Embeddings metadata - let embeddings = null; - try { - const count = db.prepare('SELECT COUNT(*) as c FROM embeddings').get(); - if (count && count.c > 0) { - const meta = {}; - const metaRows = db.prepare('SELECT key, value FROM embedding_meta').all(); - for (const r of metaRows) meta[r.key] = r.value; - embeddings = { - count: count.c, - model: meta.model || null, - dim: meta.dim ? parseInt(meta.dim, 10) : null, - builtAt: meta.built_at || null, - }; - } - } catch (e) { - debug(`embeddings lookup skipped: ${e.message}`); - } - - // Graph quality metrics - const qualityTestFilter = testFilter.replace(/n\.file/g, 'file'); - const totalCallable = db - .prepare( - `SELECT COUNT(*) as c FROM nodes WHERE kind IN ('function', 'method') ${qualityTestFilter}`, - ) - .get().c; - const callableWithCallers = db - .prepare(` - SELECT COUNT(DISTINCT e.target_id) as c FROM edges e - JOIN nodes n ON e.target_id = n.id - WHERE e.kind = 'calls' AND n.kind IN ('function', 'method') ${testFilter} - `) - .get().c; - const callerCoverage = totalCallable > 0 ? callableWithCallers / totalCallable : 0; - - const totalCallEdges = db - .prepare("SELECT COUNT(*) as c FROM edges WHERE kind = 'calls'") - .get().c; - const highConfCallEdges = db - .prepare("SELECT COUNT(*) as c FROM edges WHERE kind = 'calls' AND confidence >= 0.7") - .get().c; - const callConfidence = totalCallEdges > 0 ? highConfCallEdges / totalCallEdges : 0; - - // False-positive warnings: generic names with > threshold callers - const fpRows = db - .prepare(` - SELECT n.name, n.file, n.line, COUNT(e.source_id) as caller_count - FROM nodes n - LEFT JOIN edges e ON n.id = e.target_id AND e.kind = 'calls' - WHERE n.kind IN ('function', 'method') - GROUP BY n.id - HAVING caller_count > ? - ORDER BY caller_count DESC - `) - .all(FALSE_POSITIVE_CALLER_THRESHOLD); - const falsePositiveWarnings = fpRows - .filter((r) => - FALSE_POSITIVE_NAMES.has(r.name.includes('.') ? r.name.split('.').pop() : r.name), - ) - .map((r) => ({ name: r.name, file: r.file, line: r.line, callerCount: r.caller_count })); - - // Edges from suspicious nodes - let fpEdgeCount = 0; - for (const fp of falsePositiveWarnings) fpEdgeCount += fp.callerCount; - const falsePositiveRatio = totalCallEdges > 0 ? fpEdgeCount / totalCallEdges : 0; - - const score = Math.round( - callerCoverage * 40 + callConfidence * 40 + (1 - falsePositiveRatio) * 20, - ); - - const quality = { - score, - callerCoverage: { - ratio: callerCoverage, - covered: callableWithCallers, - total: totalCallable, - }, - callConfidence: { - ratio: callConfidence, - highConf: highConfCallEdges, - total: totalCallEdges, - }, - falsePositiveWarnings, - }; - - // Role distribution - let roleRows; - if (noTests) { - const allRoleNodes = db.prepare('SELECT role, file FROM nodes WHERE role IS NOT NULL').all(); - const filtered = allRoleNodes.filter((n) => !isTestFile(n.file)); - const counts = {}; - for (const n of filtered) counts[n.role] = (counts[n.role] || 0) + 1; - roleRows = Object.entries(counts).map(([role, c]) => ({ role, c })); - } else { - roleRows = db - .prepare('SELECT role, COUNT(*) as c FROM nodes WHERE role IS NOT NULL GROUP BY role') - .all(); - } - const roles = {}; - for (const r of roleRows) roles[r.role] = r.c; - - // Complexity summary - let complexity = null; - try { - const cRows = db - .prepare( - `SELECT fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.maintainability_index - FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id - WHERE n.kind IN ('function','method') ${testFilter}`, - ) - .all(); - if (cRows.length > 0) { - const miValues = cRows.map((r) => r.maintainability_index || 0); - complexity = { - analyzed: cRows.length, - avgCognitive: +(cRows.reduce((s, r) => s + r.cognitive, 0) / cRows.length).toFixed(1), - avgCyclomatic: +(cRows.reduce((s, r) => s + r.cyclomatic, 0) / cRows.length).toFixed(1), - maxCognitive: Math.max(...cRows.map((r) => r.cognitive)), - maxCyclomatic: Math.max(...cRows.map((r) => r.cyclomatic)), - avgMI: +(miValues.reduce((s, v) => s + v, 0) / miValues.length).toFixed(1), - minMI: +Math.min(...miValues).toFixed(1), - }; - } - } catch (e) { - debug(`complexity summary skipped: ${e.message}`); - } + const hotspots = findHotspots(db, noTests, 5); + const embeddings = getEmbeddingsInfo(db); + const quality = computeQualityMetrics(db, testFilter); + const roles = countRoles(db, noTests); + const complexity = getComplexitySummary(db, testFilter); return { nodes: { total: totalNodes, byKind: nodesByKind }, edges: { total: totalEdges, byKind: edgesByKind }, - files: { total: fileNodes.length, languages: langCount, byLanguage }, + files, cycles: { fileLevel: fileCycles.length, functionLevel: fnCycles.length }, hotspots, embeddings, From 81cfbb9c6c6dbaca2f5655828c99a18599eb599b Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:35:59 -0600 Subject: [PATCH 04/22] refactor: decompose buildComplexityMetrics Impact: 5 functions changed, 3 affected --- src/features/complexity.js | 246 +++++++++++++++++++------------------ 1 file changed, 125 insertions(+), 121 deletions(-) diff --git a/src/features/complexity.js b/src/features/complexity.js index 12f5acf1..4f82e5ef 100644 --- a/src/features/complexity.js +++ b/src/features/complexity.js @@ -330,41 +330,138 @@ export function computeAllMetrics(functionNode, langId) { */ export { _findFunctionNode as findFunctionNode }; -/** - * Re-parse changed files with WASM tree-sitter, find function AST subtrees, - * compute complexity, and upsert into function_complexity table. - * - * @param {object} db - open better-sqlite3 database (read-write) - * @param {Map} fileSymbols - Map - * @param {string} rootDir - absolute project root path - * @param {object} [engineOpts] - engine options (unused; always uses WASM for AST) - */ -export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOpts) { - // Only initialize WASM parsers if some files lack both a cached tree AND pre-computed complexity - let parsers = null; - let extToLang = null; - let needsFallback = false; +async function initWasmParsersIfNeeded(fileSymbols) { for (const [relPath, symbols] of fileSymbols) { if (!symbols._tree) { - // Only consider files whose language actually has complexity rules const ext = path.extname(relPath).toLowerCase(); if (!COMPLEXITY_EXTENSIONS.has(ext)) continue; - // Check if all function/method defs have pre-computed complexity (native engine) const hasPrecomputed = symbols.definitions.every( (d) => (d.kind !== 'function' && d.kind !== 'method') || d.complexity, ); if (!hasPrecomputed) { - needsFallback = true; - break; + const { createParsers } = await import('../domain/parser.js'); + const parsers = await createParsers(); + const extToLang = buildExtToLangMap(); + return { parsers, extToLang }; } } } - if (needsFallback) { - const { createParsers } = await import('../domain/parser.js'); - parsers = await createParsers(); - extToLang = buildExtToLangMap(); + return { parsers: null, extToLang: null }; +} + +function getTreeForFile(symbols, relPath, rootDir, parsers, extToLang, getParser) { + let tree = symbols._tree; + let langId = symbols._langId; + + const allPrecomputed = symbols.definitions.every( + (d) => (d.kind !== 'function' && d.kind !== 'method') || d.complexity, + ); + + if (!allPrecomputed && !tree) { + const ext = path.extname(relPath).toLowerCase(); + if (!COMPLEXITY_EXTENSIONS.has(ext)) return null; + if (!extToLang) return null; + langId = extToLang.get(ext); + if (!langId) return null; + + const absPath = path.join(rootDir, relPath); + let code; + try { + code = fs.readFileSync(absPath, 'utf-8'); + } catch (e) { + debug(`complexity: cannot read ${relPath}: ${e.message}`); + return null; + } + + const parser = getParser(parsers, absPath); + if (!parser) return null; + + try { + tree = parser.parse(code); + } catch (e) { + debug(`complexity: parse failed for ${relPath}: ${e.message}`); + return null; + } } + return { tree, langId }; +} + +function upsertPrecomputedComplexity(db, upsert, def, relPath) { + const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); + if (!nodeId) return 0; + const ch = def.complexity.halstead; + const cl = def.complexity.loc; + upsert.run( + nodeId, + def.complexity.cognitive, + def.complexity.cyclomatic, + def.complexity.maxNesting ?? 0, + cl ? cl.loc : 0, + cl ? cl.sloc : 0, + cl ? cl.commentLines : 0, + ch ? ch.n1 : 0, + ch ? ch.n2 : 0, + ch ? ch.bigN1 : 0, + ch ? ch.bigN2 : 0, + ch ? ch.vocabulary : 0, + ch ? ch.length : 0, + ch ? ch.volume : 0, + ch ? ch.difficulty : 0, + ch ? ch.effort : 0, + ch ? ch.bugs : 0, + def.complexity.maintainabilityIndex ?? 0, + ); + return 1; +} + +function upsertAstComplexity(db, upsert, def, relPath, tree, langId, rules) { + if (!tree || !rules) return 0; + + const funcNode = _findFunctionNode(tree.rootNode, def.line, def.endLine, rules); + if (!funcNode) return 0; + + const metrics = computeAllMetrics(funcNode, langId); + if (!metrics) return 0; + + const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); + if (!nodeId) return 0; + + const h = metrics.halstead; + upsert.run( + nodeId, + metrics.cognitive, + metrics.cyclomatic, + metrics.maxNesting, + metrics.loc.loc, + metrics.loc.sloc, + metrics.loc.commentLines, + h ? h.n1 : 0, + h ? h.n2 : 0, + h ? h.bigN1 : 0, + h ? h.bigN2 : 0, + h ? h.vocabulary : 0, + h ? h.length : 0, + h ? h.volume : 0, + h ? h.difficulty : 0, + h ? h.effort : 0, + h ? h.bugs : 0, + metrics.mi, + ); + return 1; +} + +/** + * Re-parse changed files with WASM tree-sitter, find function AST subtrees, + * compute complexity, and upsert into function_complexity table. + * + * @param {object} db - open better-sqlite3 database (read-write) + * @param {Map} fileSymbols - Map + * @param {string} rootDir - absolute project root path + * @param {object} [engineOpts] - engine options (unused; always uses WASM for AST) + */ +export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOpts) { + const { parsers, extToLang } = await initWasmParsersIfNeeded(fileSymbols); const { getParser } = await import('../domain/parser.js'); const upsert = db.prepare( @@ -381,113 +478,20 @@ export async function buildComplexityMetrics(db, fileSymbols, rootDir, _engineOp const tx = db.transaction(() => { for (const [relPath, symbols] of fileSymbols) { - // Check if all function/method defs have pre-computed complexity - const allPrecomputed = symbols.definitions.every( - (d) => (d.kind !== 'function' && d.kind !== 'method') || d.complexity, - ); - - let tree = symbols._tree; - let langId = symbols._langId; - - // Only attempt WASM fallback if we actually need AST-based computation - if (!allPrecomputed && !tree) { - const ext = path.extname(relPath).toLowerCase(); - if (!COMPLEXITY_EXTENSIONS.has(ext)) continue; // Language has no complexity rules - if (!extToLang) continue; // No WASM parsers available - langId = extToLang.get(ext); - if (!langId) continue; - - const absPath = path.join(rootDir, relPath); - let code; - try { - code = fs.readFileSync(absPath, 'utf-8'); - } catch (e) { - debug(`complexity: cannot read ${relPath}: ${e.message}`); - continue; - } - - const parser = getParser(parsers, absPath); - if (!parser) continue; - - try { - tree = parser.parse(code); - } catch (e) { - debug(`complexity: parse failed for ${relPath}: ${e.message}`); - continue; - } - } - + const result = getTreeForFile(symbols, relPath, rootDir, parsers, extToLang, getParser); + const tree = result ? result.tree : null; + const langId = result ? result.langId : null; const rules = langId ? COMPLEXITY_RULES.get(langId) : null; for (const def of symbols.definitions) { if (def.kind !== 'function' && def.kind !== 'method') continue; if (!def.line) continue; - // Use pre-computed complexity from native engine if available if (def.complexity) { - const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); - if (!nodeId) continue; - const ch = def.complexity.halstead; - const cl = def.complexity.loc; - upsert.run( - nodeId, - def.complexity.cognitive, - def.complexity.cyclomatic, - def.complexity.maxNesting ?? 0, - cl ? cl.loc : 0, - cl ? cl.sloc : 0, - cl ? cl.commentLines : 0, - ch ? ch.n1 : 0, - ch ? ch.n2 : 0, - ch ? ch.bigN1 : 0, - ch ? ch.bigN2 : 0, - ch ? ch.vocabulary : 0, - ch ? ch.length : 0, - ch ? ch.volume : 0, - ch ? ch.difficulty : 0, - ch ? ch.effort : 0, - ch ? ch.bugs : 0, - def.complexity.maintainabilityIndex ?? 0, - ); - analyzed++; - continue; + analyzed += upsertPrecomputedComplexity(db, upsert, def, relPath); + } else { + analyzed += upsertAstComplexity(db, upsert, def, relPath, tree, langId, rules); } - - // Fallback: compute from AST tree - if (!tree || !rules) continue; - - const funcNode = _findFunctionNode(tree.rootNode, def.line, def.endLine, rules); - if (!funcNode) continue; - - // Single-pass: complexity + Halstead + LOC + MI in one DFS walk - const metrics = computeAllMetrics(funcNode, langId); - if (!metrics) continue; - - const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); - if (!nodeId) continue; - - const h = metrics.halstead; - upsert.run( - nodeId, - metrics.cognitive, - metrics.cyclomatic, - metrics.maxNesting, - metrics.loc.loc, - metrics.loc.sloc, - metrics.loc.commentLines, - h ? h.n1 : 0, - h ? h.n2 : 0, - h ? h.bigN1 : 0, - h ? h.bigN2 : 0, - h ? h.vocabulary : 0, - h ? h.length : 0, - h ? h.volume : 0, - h ? h.difficulty : 0, - h ? h.effort : 0, - h ? h.bugs : 0, - metrics.mi, - ); - analyzed++; } } }); From 7a0d00699069c90526c68a275cac4c1be2cb2c8f Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:36:05 -0600 Subject: [PATCH 05/22] refactor: decompose buildStructure into traversal, cohesion, and classification Impact: 8 functions changed, 3 affected --- src/features/structure.js | 208 ++++++++++++++++++++------------------ 1 file changed, 111 insertions(+), 97 deletions(-) diff --git a/src/features/structure.js b/src/features/structure.js index 4ba9ee0a..7f582076 100644 --- a/src/features/structure.js +++ b/src/features/structure.js @@ -5,73 +5,41 @@ import { isTestFile } from '../infrastructure/test-filter.js'; import { normalizePath } from '../shared/constants.js'; import { paginateResult } from '../shared/paginate.js'; -// ─── Build-time: insert directory nodes, contains edges, and metrics ──── +// ─── Build-time helpers ─────────────────────────────────────────────── -/** - * Build directory structure nodes, containment edges, and compute metrics. - * Called from builder.js after edge building. - * - * @param {import('better-sqlite3').Database} db - Open read-write database - * @param {Map} fileSymbols - Map of relPath → { definitions, imports, exports, calls } - * @param {string} rootDir - Absolute root directory - * @param {Map} lineCountMap - Map of relPath → line count - * @param {Set} directories - Set of relative directory paths - */ -export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, directories, changedFiles) { - const insertNode = db.prepare( - 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', - ); - const getNodeIdStmt = { - get: (name, kind, file, line) => { - const id = getNodeId(db, name, kind, file, line); - return id != null ? { id } : undefined; - }, - }; - const insertEdge = db.prepare( - 'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)', - ); - const upsertMetric = db.prepare(` - INSERT OR REPLACE INTO node_metrics - (node_id, line_count, symbol_count, import_count, export_count, fan_in, fan_out, cohesion, file_count) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) - `); - - const isIncremental = changedFiles != null && changedFiles.length > 0; +function getAncestorDirs(filePaths) { + const dirs = new Set(); + for (const f of filePaths) { + let d = normalizePath(path.dirname(f)); + while (d && d !== '.') { + dirs.add(d); + d = normalizePath(path.dirname(d)); + } + } + return dirs; +} +function cleanupPreviousData(db, getNodeIdStmt, isIncremental, changedFiles) { if (isIncremental) { - // Incremental: only clean up data for changed files and their ancestor directories - const affectedDirs = new Set(); - for (const f of changedFiles) { - let d = normalizePath(path.dirname(f)); - while (d && d !== '.') { - affectedDirs.add(d); - d = normalizePath(path.dirname(d)); - } - } + const affectedDirs = getAncestorDirs(changedFiles); const deleteContainsForDir = db.prepare( "DELETE FROM edges WHERE kind = 'contains' AND source_id IN (SELECT id FROM nodes WHERE name = ? AND kind = 'directory')", ); const deleteMetricForNode = db.prepare('DELETE FROM node_metrics WHERE node_id = ?'); db.transaction(() => { - // Delete contains edges only from affected directories for (const dir of affectedDirs) { deleteContainsForDir.run(dir); } - // Delete metrics for changed files for (const f of changedFiles) { const fileRow = getNodeIdStmt.get(f, 'file', f, 0); if (fileRow) deleteMetricForNode.run(fileRow.id); } - // Delete metrics for affected directories for (const dir of affectedDirs) { const dirRow = getNodeIdStmt.get(dir, 'directory', dir, 0); if (dirRow) deleteMetricForNode.run(dirRow.id); } })(); } else { - // Full rebuild: clean previous directory nodes/edges (idempotent) - // Scope contains-edge delete to directory-sourced edges only, - // preserving symbol-level contains edges (file→def, class→method, etc.) db.exec(` DELETE FROM edges WHERE kind = 'contains' AND source_id IN (SELECT id FROM nodes WHERE kind = 'directory'); @@ -79,8 +47,9 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director DELETE FROM nodes WHERE kind = 'directory'; `); } +} - // Step 1: Ensure all directories are represented (including intermediate parents) +function collectAllDirectories(directories, fileSymbols) { const allDirs = new Set(); for (const dir of directories) { let d = dir; @@ -89,7 +58,6 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director d = normalizePath(path.dirname(d)); } } - // Also add dirs derived from file paths for (const relPath of fileSymbols.keys()) { let d = normalizePath(path.dirname(relPath)); while (d && d !== '.') { @@ -97,37 +65,17 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director d = normalizePath(path.dirname(d)); } } + return allDirs; +} - // Step 2: Insert directory nodes (INSERT OR IGNORE — safe for incremental) - const insertDirs = db.transaction(() => { - for (const dir of allDirs) { - insertNode.run(dir, 'directory', dir, 0, null); - } - }); - insertDirs(); - - // Step 3: Insert 'contains' edges (dir → file, dir → subdirectory) - // On incremental, only re-insert for affected directories (others are intact) - const affectedDirs = isIncremental - ? (() => { - const dirs = new Set(); - for (const f of changedFiles) { - let d = normalizePath(path.dirname(f)); - while (d && d !== '.') { - dirs.add(d); - d = normalizePath(path.dirname(d)); - } - } - return dirs; - })() - : null; +function insertContainsEdges(db, insertEdge, getNodeIdStmt, fileSymbols, allDirs, changedFiles) { + const isIncremental = changedFiles != null && changedFiles.length > 0; + const affectedDirs = isIncremental ? getAncestorDirs(changedFiles) : null; - const insertContains = db.transaction(() => { - // dir → file + db.transaction(() => { for (const relPath of fileSymbols.keys()) { const dir = normalizePath(path.dirname(relPath)); if (!dir || dir === '.') continue; - // On incremental, skip dirs whose contains edges are intact if (affectedDirs && !affectedDirs.has(dir)) continue; const dirRow = getNodeIdStmt.get(dir, 'directory', dir, 0); const fileRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); @@ -135,11 +83,9 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director insertEdge.run(dirRow.id, fileRow.id, 'contains', 1.0, 0); } } - // dir → subdirectory for (const dir of allDirs) { const parent = normalizePath(path.dirname(dir)); if (!parent || parent === '.' || parent === dir) continue; - // On incremental, skip parent dirs whose contains edges are intact if (affectedDirs && !affectedDirs.has(parent)) continue; const parentRow = getNodeIdStmt.get(parent, 'directory', parent, 0); const childRow = getNodeIdStmt.get(dir, 'directory', dir, 0); @@ -147,11 +93,10 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director insertEdge.run(parentRow.id, childRow.id, 'contains', 1.0, 0); } } - }); - insertContains(); + })(); +} - // Step 4: Compute per-file metrics - // Pre-compute fan-in/fan-out per file from import edges +function computeImportEdgeMaps(db) { const fanInMap = new Map(); const fanOutMap = new Map(); const importEdges = db @@ -169,14 +114,24 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director fanOutMap.set(source_file, (fanOutMap.get(source_file) || 0) + 1); fanInMap.set(target_file, (fanInMap.get(target_file) || 0) + 1); } + return { fanInMap, fanOutMap, importEdges }; +} - const computeFileMetrics = db.transaction(() => { +function computeFileMetrics( + db, + upsertMetric, + getNodeIdStmt, + fileSymbols, + lineCountMap, + fanInMap, + fanOutMap, +) { + db.transaction(() => { for (const [relPath, symbols] of fileSymbols) { const fileRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); if (!fileRow) continue; const lineCount = lineCountMap.get(relPath) || 0; - // Deduplicate definitions by name+kind+line const seen = new Set(); let symbolCount = 0; for (const d of symbols.definitions) { @@ -203,11 +158,17 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director null, ); } - }); - computeFileMetrics(); + })(); +} - // Step 5: Compute per-directory metrics - // Build a map of dir → descendant files +function computeDirectoryMetrics( + db, + upsertMetric, + getNodeIdStmt, + fileSymbols, + allDirs, + importEdges, +) { const dirFiles = new Map(); for (const dir of allDirs) { dirFiles.set(dir, []); @@ -222,7 +183,6 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director } } - // Build reverse index: file → set of ancestor directories (O(files × depth)) const fileToAncestorDirs = new Map(); for (const [dir, files] of dirFiles) { for (const f of files) { @@ -231,7 +191,6 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director } } - // Single O(E) pass: pre-aggregate edge counts per directory const dirEdgeCounts = new Map(); for (const dir of allDirs) { dirEdgeCounts.set(dir, { intra: 0, fanIn: 0, fanOut: 0 }); @@ -241,7 +200,6 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director const tgtDirs = fileToAncestorDirs.get(target_file); if (!srcDirs && !tgtDirs) continue; - // For each directory that contains the source file if (srcDirs) { for (const dir of srcDirs) { const counts = dirEdgeCounts.get(dir); @@ -253,10 +211,9 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director } } } - // For each directory that contains the target but NOT the source if (tgtDirs) { for (const dir of tgtDirs) { - if (srcDirs?.has(dir)) continue; // already counted as intra + if (srcDirs?.has(dir)) continue; const counts = dirEdgeCounts.get(dir); if (!counts) continue; counts.fanIn++; @@ -264,7 +221,7 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director } } - const computeDirMetrics = db.transaction(() => { + db.transaction(() => { for (const [dir, files] of dirFiles) { const dirRow = getNodeIdStmt.get(dir, 'directory', dir, 0); if (!dirRow) continue; @@ -286,7 +243,6 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director } } - // O(1) lookup from pre-aggregated edge counts const counts = dirEdgeCounts.get(dir) || { intra: 0, fanIn: 0, fanOut: 0 }; const totalEdges = counts.intra + counts.fanIn + counts.fanOut; const cohesion = totalEdges > 0 ? counts.intra / totalEdges : null; @@ -303,11 +259,69 @@ export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, director fileCount, ); } - }); - computeDirMetrics(); + })(); +} + +// ─── Build-time: insert directory nodes, contains edges, and metrics ──── + +/** + * Build directory structure nodes, containment edges, and compute metrics. + * Called from builder.js after edge building. + * + * @param {import('better-sqlite3').Database} db - Open read-write database + * @param {Map} fileSymbols - Map of relPath → { definitions, imports, exports, calls } + * @param {string} rootDir - Absolute root directory + * @param {Map} lineCountMap - Map of relPath → line count + * @param {Set} directories - Set of relative directory paths + */ +export function buildStructure(db, fileSymbols, _rootDir, lineCountMap, directories, changedFiles) { + const insertNode = db.prepare( + 'INSERT OR IGNORE INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)', + ); + const getNodeIdStmt = { + get: (name, kind, file, line) => { + const id = getNodeId(db, name, kind, file, line); + return id != null ? { id } : undefined; + }, + }; + const insertEdge = db.prepare( + 'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)', + ); + const upsertMetric = db.prepare(` + INSERT OR REPLACE INTO node_metrics + (node_id, line_count, symbol_count, import_count, export_count, fan_in, fan_out, cohesion, file_count) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + `); + + const isIncremental = changedFiles != null && changedFiles.length > 0; + + cleanupPreviousData(db, getNodeIdStmt, isIncremental, changedFiles); + + const allDirs = collectAllDirectories(directories, fileSymbols); + + db.transaction(() => { + for (const dir of allDirs) { + insertNode.run(dir, 'directory', dir, 0, null); + } + })(); + + insertContainsEdges(db, insertEdge, getNodeIdStmt, fileSymbols, allDirs, changedFiles); + + const { fanInMap, fanOutMap, importEdges } = computeImportEdgeMaps(db); + + computeFileMetrics( + db, + upsertMetric, + getNodeIdStmt, + fileSymbols, + lineCountMap, + fanInMap, + fanOutMap, + ); + + computeDirectoryMetrics(db, upsertMetric, getNodeIdStmt, fileSymbols, allDirs, importEdges); - const dirCount = allDirs.size; - debug(`Structure: ${dirCount} directories, ${fileSymbols.size} files with metrics`); + debug(`Structure: ${allDirs.size} directories, ${fileSymbols.size} files with metrics`); } // ─── Node role classification ───────────────────────────────────────── From 39b065a0955fa9c8dbf053c31c681cbe4b3dbb28 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:36:16 -0600 Subject: [PATCH 06/22] refactor: decompose buildCFGData and buildDataflowEdges Impact: 10 functions changed, 5 affected --- src/features/cfg.js | 246 ++++++++++++++++++++------------------- src/features/dataflow.js | 240 +++++++++++++++++++------------------- 2 files changed, 244 insertions(+), 242 deletions(-) diff --git a/src/features/cfg.js b/src/features/cfg.js index ae1b8564..3f029274 100644 --- a/src/features/cfg.js +++ b/src/features/cfg.js @@ -68,30 +68,15 @@ export function buildFunctionCFG(functionNode, langId) { return { blocks: r.blocks, edges: r.edges, cyclomatic: r.cyclomatic }; } -// ─── Build-Time: Compute CFG for Changed Files ───────────────────────── +// ─── Build-Time Helpers ───────────────────────────────────────────────── -/** - * Build CFG data for all function/method definitions and persist to DB. - * - * @param {object} db - open better-sqlite3 database (read-write) - * @param {Map} fileSymbols - Map - * @param {string} rootDir - absolute project root path - * @param {object} [_engineOpts] - engine options (unused; always uses WASM for AST) - */ -export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { - // Lazily init WASM parsers if needed - let parsers = null; +async function initCfgParsers(fileSymbols) { let needsFallback = false; - // Always build ext→langId map so native-only builds (where _langId is unset) - // can still derive the language from the file extension. - const extToLang = buildExtToLangMap(); - for (const [relPath, symbols] of fileSymbols) { if (!symbols._tree) { const ext = path.extname(relPath).toLowerCase(); if (CFG_EXTENSIONS.has(ext)) { - // Check if all function/method defs already have native CFG data const hasNativeCfg = symbols.definitions .filter((d) => (d.kind === 'function' || d.kind === 'method') && d.line) .every((d) => d.cfg === null || d.cfg?.blocks?.length); @@ -103,18 +88,131 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { } } + let parsers = null; + let getParserFn = null; + if (needsFallback) { const { createParsers } = await import('../domain/parser.js'); parsers = await createParsers(); - } - - let getParserFn = null; - if (parsers) { const mod = await import('../domain/parser.js'); getParserFn = mod.getParser; } - // findFunctionNode imported from ./ast-analysis/shared.js at module level + return { parsers, getParserFn }; +} + +function getTreeAndLang(symbols, relPath, rootDir, extToLang, parsers, getParserFn) { + const ext = path.extname(relPath).toLowerCase(); + let tree = symbols._tree; + let langId = symbols._langId; + + const allNative = symbols.definitions + .filter((d) => (d.kind === 'function' || d.kind === 'method') && d.line) + .every((d) => d.cfg === null || d.cfg?.blocks?.length); + + if (!tree && !allNative) { + if (!getParserFn) return null; + langId = extToLang.get(ext); + if (!langId || !CFG_RULES.has(langId)) return null; + + const absPath = path.join(rootDir, relPath); + let code; + try { + code = fs.readFileSync(absPath, 'utf-8'); + } catch (e) { + debug(`cfg: cannot read ${relPath}: ${e.message}`); + return null; + } + + const parser = getParserFn(parsers, absPath); + if (!parser) return null; + + try { + tree = parser.parse(code); + } catch (e) { + debug(`cfg: parse failed for ${relPath}: ${e.message}`); + return null; + } + } + + if (!langId) { + langId = extToLang.get(ext); + if (!langId) return null; + } + + return { tree, langId }; +} + +function buildVisitorCfgMap(tree, cfgRules, symbols, langId) { + const needsVisitor = + tree && + symbols.definitions.some( + (d) => + (d.kind === 'function' || d.kind === 'method') && + d.line && + d.cfg !== null && + !d.cfg?.blocks?.length, + ); + if (!needsVisitor) return null; + + const visitor = createCfgVisitor(cfgRules); + const walkerOpts = { + functionNodeTypes: new Set(cfgRules.functionNodes), + nestingNodeTypes: new Set(), + getFunctionName: (node) => { + const nameNode = node.childForFieldName('name'); + return nameNode ? nameNode.text : null; + }, + }; + const walkResults = walkWithVisitors(tree.rootNode, [visitor], langId, walkerOpts); + const cfgResults = walkResults.cfg || []; + const visitorCfgByLine = new Map(); + for (const r of cfgResults) { + if (r.funcNode) { + const line = r.funcNode.startPosition.row + 1; + if (!visitorCfgByLine.has(line)) visitorCfgByLine.set(line, []); + visitorCfgByLine.get(line).push(r); + } + } + return visitorCfgByLine; +} + +function persistCfg(cfg, nodeId, insertBlock, insertEdge) { + const blockDbIds = new Map(); + for (const block of cfg.blocks) { + const result = insertBlock.run( + nodeId, + block.index, + block.type, + block.startLine, + block.endLine, + block.label, + ); + blockDbIds.set(block.index, result.lastInsertRowid); + } + + for (const edge of cfg.edges) { + const sourceDbId = blockDbIds.get(edge.sourceIndex); + const targetDbId = blockDbIds.get(edge.targetIndex); + if (sourceDbId && targetDbId) { + insertEdge.run(nodeId, sourceDbId, targetDbId, edge.kind); + } + } +} + +// ─── Build-Time: Compute CFG for Changed Files ───────────────────────── + +/** + * Build CFG data for all function/method definitions and persist to DB. + * + * @param {object} db - open better-sqlite3 database (read-write) + * @param {Map} fileSymbols - Map + * @param {string} rootDir - absolute project root path + * @param {object} [_engineOpts] - engine options (unused; always uses WASM for AST) + */ +export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { + const extToLang = buildExtToLangMap(); + const { parsers, getParserFn } = await initCfgParsers(fileSymbols); const insertBlock = db.prepare( `INSERT INTO cfg_blocks (function_node_id, block_index, block_type, start_line, end_line, label) @@ -131,81 +229,14 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { const ext = path.extname(relPath).toLowerCase(); if (!CFG_EXTENSIONS.has(ext)) continue; - let tree = symbols._tree; - let langId = symbols._langId; - - // Check if all defs already have native CFG — skip WASM parse if so - const allNative = symbols.definitions - .filter((d) => (d.kind === 'function' || d.kind === 'method') && d.line) - .every((d) => d.cfg === null || d.cfg?.blocks?.length); - - // WASM fallback if no cached tree and not all native - if (!tree && !allNative) { - if (!getParserFn) continue; - langId = extToLang.get(ext); - if (!langId || !CFG_RULES.has(langId)) continue; - - const absPath = path.join(rootDir, relPath); - let code; - try { - code = fs.readFileSync(absPath, 'utf-8'); - } catch (e) { - debug(`cfg: cannot read ${relPath}: ${e.message}`); - continue; - } - - const parser = getParserFn(parsers, absPath); - if (!parser) continue; - - try { - tree = parser.parse(code); - } catch (e) { - debug(`cfg: parse failed for ${relPath}: ${e.message}`); - continue; - } - } - - if (!langId) { - langId = extToLang.get(ext); - if (!langId) continue; - } + const treeLang = getTreeAndLang(symbols, relPath, rootDir, extToLang, parsers, getParserFn); + if (!treeLang) continue; + const { tree, langId } = treeLang; const cfgRules = CFG_RULES.get(langId); if (!cfgRules) continue; - // WASM fallback: run file-level visitor walk to compute CFG for all functions - // that don't already have pre-computed data (from native engine or unified walk) - let visitorCfgByLine = null; - const needsVisitor = - tree && - symbols.definitions.some( - (d) => - (d.kind === 'function' || d.kind === 'method') && - d.line && - d.cfg !== null && - !d.cfg?.blocks?.length, - ); - if (needsVisitor) { - const visitor = createCfgVisitor(cfgRules); - const walkerOpts = { - functionNodeTypes: new Set(cfgRules.functionNodes), - nestingNodeTypes: new Set(), - getFunctionName: (node) => { - const nameNode = node.childForFieldName('name'); - return nameNode ? nameNode.text : null; - }, - }; - const walkResults = walkWithVisitors(tree.rootNode, [visitor], langId, walkerOpts); - const cfgResults = walkResults.cfg || []; - visitorCfgByLine = new Map(); - for (const r of cfgResults) { - if (r.funcNode) { - const line = r.funcNode.startPosition.row + 1; - if (!visitorCfgByLine.has(line)) visitorCfgByLine.set(line, []); - visitorCfgByLine.get(line).push(r); - } - } - } + const visitorCfgByLine = buildVisitorCfgMap(tree, cfgRules, symbols, langId); for (const def of symbols.definitions) { if (def.kind !== 'function' && def.kind !== 'method') continue; @@ -214,7 +245,6 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); if (!nodeId) continue; - // Use pre-computed CFG (native engine or unified walk), then visitor fallback let cfg = null; if (def.cfg?.blocks?.length) { cfg = def.cfg; @@ -233,36 +263,10 @@ export async function buildCFGData(db, fileSymbols, rootDir, _engineOpts) { if (!cfg || cfg.blocks.length === 0) continue; - // Clear old CFG data for this function deleteCfgForNode(db, nodeId); - - // Insert blocks and build index→dbId mapping - const blockDbIds = new Map(); - for (const block of cfg.blocks) { - const result = insertBlock.run( - nodeId, - block.index, - block.type, - block.startLine, - block.endLine, - block.label, - ); - blockDbIds.set(block.index, result.lastInsertRowid); - } - - // Insert edges - for (const edge of cfg.edges) { - const sourceDbId = blockDbIds.get(edge.sourceIndex); - const targetDbId = blockDbIds.get(edge.targetIndex); - if (sourceDbId && targetDbId) { - insertEdge.run(nodeId, sourceDbId, targetDbId, edge.kind); - } - } - + persistCfg(cfg, nodeId, insertBlock, insertEdge); analyzed++; } - - // Don't release _tree here — complexity/dataflow may still need it } }); diff --git a/src/features/dataflow.js b/src/features/dataflow.js index 695afa95..2dee25b6 100644 --- a/src/features/dataflow.js +++ b/src/features/dataflow.js @@ -58,26 +58,11 @@ export function extractDataflow(tree, _filePath, _definitions, langId = 'javascr return results.dataflow; } -// ── buildDataflowEdges ────────────────────────────────────────────────────── +// ── Build-Time Helpers ────────────────────────────────────────────────────── -/** - * Build dataflow edges and insert them into the database. - * Called during graph build when --dataflow is enabled. - * - * @param {object} db - better-sqlite3 database instance - * @param {Map} fileSymbols - map of relPath → symbols - * @param {string} rootDir - absolute root directory - * @param {object} engineOpts - engine options - */ -export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) { - // Lazily init WASM parsers if needed - let parsers = null; +async function initDataflowParsers(fileSymbols) { let needsFallback = false; - // Always build ext→langId map so native-only builds (where _langId is unset) - // can still derive the language from the file extension. - const extToLang = buildExtToLangMap(); - for (const [relPath, symbols] of fileSymbols) { if (!symbols._tree && !symbols.dataflow) { const ext = path.extname(relPath).toLowerCase(); @@ -88,25 +73,130 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) } } + let parsers = null; + let getParserFn = null; + if (needsFallback) { const { createParsers } = await import('../domain/parser.js'); parsers = await createParsers(); - } - - let getParserFn = null; - if (parsers) { const mod = await import('../domain/parser.js'); getParserFn = mod.getParser; } + return { parsers, getParserFn }; +} + +function getDataflowForFile(symbols, relPath, rootDir, extToLang, parsers, getParserFn) { + if (symbols.dataflow) return symbols.dataflow; + + let tree = symbols._tree; + let langId = symbols._langId; + + if (!tree) { + if (!getParserFn) return null; + const ext = path.extname(relPath).toLowerCase(); + langId = extToLang.get(ext); + if (!langId || !DATAFLOW_RULES.has(langId)) return null; + + const absPath = path.join(rootDir, relPath); + let code; + try { + code = fs.readFileSync(absPath, 'utf-8'); + } catch (e) { + debug(`dataflow: cannot read ${relPath}: ${e.message}`); + return null; + } + + const parser = getParserFn(parsers, absPath); + if (!parser) return null; + + try { + tree = parser.parse(code); + } catch (e) { + debug(`dataflow: parse failed for ${relPath}: ${e.message}`); + return null; + } + } + + if (!langId) { + const ext = path.extname(relPath).toLowerCase(); + langId = extToLang.get(ext); + if (!langId) return null; + } + + if (!DATAFLOW_RULES.has(langId)) return null; + + return extractDataflow(tree, relPath, symbols.definitions, langId); +} + +function insertDataflowEdges(insert, data, resolveNode) { + let edgeCount = 0; + + for (const flow of data.argFlows) { + const sourceNode = resolveNode(flow.callerFunc); + const targetNode = resolveNode(flow.calleeName); + if (sourceNode && targetNode) { + insert.run( + sourceNode.id, + targetNode.id, + 'flows_to', + flow.argIndex, + flow.expression, + flow.line, + flow.confidence, + ); + edgeCount++; + } + } + + for (const assignment of data.assignments) { + const producerNode = resolveNode(assignment.sourceCallName); + const consumerNode = resolveNode(assignment.callerFunc); + if (producerNode && consumerNode) { + insert.run( + producerNode.id, + consumerNode.id, + 'returns', + null, + assignment.expression, + assignment.line, + 1.0, + ); + edgeCount++; + } + } + + for (const mut of data.mutations) { + const mutatorNode = resolveNode(mut.funcName); + if (mutatorNode && mut.binding?.type === 'param') { + insert.run(mutatorNode.id, mutatorNode.id, 'mutates', null, mut.mutatingExpr, mut.line, 1.0); + edgeCount++; + } + } + + return edgeCount; +} + +// ── buildDataflowEdges ────────────────────────────────────────────────────── + +/** + * Build dataflow edges and insert them into the database. + * Called during graph build when --dataflow is enabled. + * + * @param {object} db - better-sqlite3 database instance + * @param {Map} fileSymbols - map of relPath → symbols + * @param {string} rootDir - absolute root directory + * @param {object} engineOpts - engine options + */ +export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) { + const extToLang = buildExtToLangMap(); + const { parsers, getParserFn } = await initDataflowParsers(fileSymbols); + const insert = db.prepare( `INSERT INTO dataflow (source_id, target_id, kind, param_index, expression, line, confidence) VALUES (?, ?, ?, ?, ?, ?, ?)`, ); - // MVP scope: only resolve function/method nodes for dataflow edges. - // Future expansion: add 'parameter', 'property', 'constant' kinds to track - // data flow through property accessors or constant references. const getNodeByNameAndFile = db.prepare( `SELECT id, name, kind, file, line FROM nodes WHERE name = ? AND file = ? AND kind IN ('function', 'method')`, @@ -125,109 +215,17 @@ export async function buildDataflowEdges(db, fileSymbols, rootDir, _engineOpts) const ext = path.extname(relPath).toLowerCase(); if (!DATAFLOW_EXTENSIONS.has(ext)) continue; - // Use native dataflow data if available — skip WASM extraction - let data = symbols.dataflow; - if (!data) { - let tree = symbols._tree; - let langId = symbols._langId; - - // WASM fallback if no cached tree - if (!tree) { - if (!getParserFn) continue; - langId = extToLang.get(ext); - if (!langId || !DATAFLOW_RULES.has(langId)) continue; - - const absPath = path.join(rootDir, relPath); - let code; - try { - code = fs.readFileSync(absPath, 'utf-8'); - } catch (e) { - debug(`dataflow: cannot read ${relPath}: ${e.message}`); - continue; - } - - const parser = getParserFn(parsers, absPath); - if (!parser) continue; - - try { - tree = parser.parse(code); - } catch (e) { - debug(`dataflow: parse failed for ${relPath}: ${e.message}`); - continue; - } - } - - if (!langId) { - langId = extToLang.get(ext); - if (!langId) continue; - } - - if (!DATAFLOW_RULES.has(langId)) continue; - - data = extractDataflow(tree, relPath, symbols.definitions, langId); - } + const data = getDataflowForFile(symbols, relPath, rootDir, extToLang, parsers, getParserFn); + if (!data) continue; - // Resolve function names to node IDs in this file first, then globally - function resolveNode(funcName) { + const resolveNode = (funcName) => { const local = getNodeByNameAndFile.all(funcName, relPath); if (local.length > 0) return local[0]; const global = getNodeByName.all(funcName); return global.length > 0 ? global[0] : null; - } - - // flows_to: parameter/variable passed as argument to another function - for (const flow of data.argFlows) { - const sourceNode = resolveNode(flow.callerFunc); - const targetNode = resolveNode(flow.calleeName); - if (sourceNode && targetNode) { - insert.run( - sourceNode.id, - targetNode.id, - 'flows_to', - flow.argIndex, - flow.expression, - flow.line, - flow.confidence, - ); - totalEdges++; - } - } - - // returns: call return value captured in caller - for (const assignment of data.assignments) { - const producerNode = resolveNode(assignment.sourceCallName); - const consumerNode = resolveNode(assignment.callerFunc); - if (producerNode && consumerNode) { - insert.run( - producerNode.id, - consumerNode.id, - 'returns', - null, - assignment.expression, - assignment.line, - 1.0, - ); - totalEdges++; - } - } + }; - // mutates: parameter-derived value is mutated - for (const mut of data.mutations) { - const mutatorNode = resolveNode(mut.funcName); - if (mutatorNode && mut.binding?.type === 'param') { - // The mutation in this function affects the parameter source - insert.run( - mutatorNode.id, - mutatorNode.id, - 'mutates', - null, - mut.mutatingExpr, - mut.line, - 1.0, - ); - totalEdges++; - } - } + totalEdges += insertDataflowEdges(insert, data, resolveNode); } }); From e935eac77d8408009e250cb92df87efb447cc4a2 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:36:21 -0600 Subject: [PATCH 07/22] refactor: decompose sequenceData into BFS and message construction Impact: 5 functions changed, 2 affected --- src/features/sequence.js | 293 ++++++++++++++++++++------------------- 1 file changed, 152 insertions(+), 141 deletions(-) diff --git a/src/features/sequence.js b/src/features/sequence.js index 271d2ea2..cf59ddc3 100644 --- a/src/features/sequence.js +++ b/src/features/sequence.js @@ -68,6 +68,148 @@ function buildAliases(files) { return aliases; } +// ─── Helpers ───────────────────────────────────────────────────────── + +function findEntryNode(repo, name, opts) { + let matchNode = findMatchingNodes(repo, name, opts)[0] ?? null; + if (!matchNode) { + for (const prefix of FRAMEWORK_ENTRY_PREFIXES) { + matchNode = findMatchingNodes(repo, `${prefix}${name}`, opts)[0] ?? null; + if (matchNode) break; + } + } + return matchNode; +} + +function bfsCallees(repo, matchNode, maxDepth, noTests) { + const visited = new Set([matchNode.id]); + let frontier = [matchNode.id]; + const messages = []; + const fileSet = new Set([matchNode.file]); + const idToNode = new Map(); + idToNode.set(matchNode.id, matchNode); + let truncated = false; + + for (let d = 1; d <= maxDepth; d++) { + const nextFrontier = []; + + for (const fid of frontier) { + const callees = repo.findCallees(fid); + const caller = idToNode.get(fid); + + for (const c of callees) { + if (noTests && isTestFile(c.file)) continue; + + fileSet.add(c.file); + messages.push({ + from: caller.file, + to: c.file, + label: c.name, + type: 'call', + depth: d, + }); + + if (visited.has(c.id)) continue; + + visited.add(c.id); + nextFrontier.push(c.id); + idToNode.set(c.id, c); + } + } + + frontier = nextFrontier; + if (frontier.length === 0) break; + + if (d === maxDepth && frontier.length > 0) { + const hasMoreCalls = frontier.some((fid) => repo.findCallees(fid).length > 0); + if (hasMoreCalls) truncated = true; + } + } + + return { messages, fileSet, idToNode, truncated }; +} + +function annotateDataflow(repo, messages, idToNode) { + const hasTable = repo.hasDataflowTable(); + + if (!hasTable || !(repo instanceof SqliteRepository)) return; + + const db = repo.db; + const nodeByNameFile = new Map(); + for (const n of idToNode.values()) { + nodeByNameFile.set(`${n.name}|${n.file}`, n); + } + + const getReturns = db.prepare( + `SELECT d.expression FROM dataflow d + WHERE d.source_id = ? AND d.kind = 'returns'`, + ); + const getFlowsTo = db.prepare( + `SELECT d.expression FROM dataflow d + WHERE d.target_id = ? AND d.kind = 'flows_to' + ORDER BY d.param_index`, + ); + + const seenReturns = new Set(); + for (const msg of [...messages]) { + if (msg.type !== 'call') continue; + const targetNode = nodeByNameFile.get(`${msg.label}|${msg.to}`); + if (!targetNode) continue; + + const returnKey = `${msg.to}->${msg.from}:${msg.label}`; + if (seenReturns.has(returnKey)) continue; + + const returns = getReturns.all(targetNode.id); + + if (returns.length > 0) { + seenReturns.add(returnKey); + const expr = returns[0].expression || 'result'; + messages.push({ + from: msg.to, + to: msg.from, + label: expr, + type: 'return', + depth: msg.depth, + }); + } + } + + for (const msg of messages) { + if (msg.type !== 'call') continue; + const targetNode = nodeByNameFile.get(`${msg.label}|${msg.to}`); + if (!targetNode) continue; + + const params = getFlowsTo.all(targetNode.id); + + if (params.length > 0) { + const paramNames = params + .map((p) => p.expression) + .filter(Boolean) + .slice(0, 3); + if (paramNames.length > 0) { + msg.label = `${msg.label}(${paramNames.join(', ')})`; + } + } + } +} + +function buildParticipants(fileSet, entryFile) { + const aliases = buildAliases([...fileSet]); + const participants = [...fileSet].map((file) => ({ + id: aliases.get(file), + label: file.split('/').pop(), + file, + })); + + participants.sort((a, b) => { + if (a.file === entryFile) return -1; + if (b.file === entryFile) return 1; + return a.file.localeCompare(b.file); + }); + + return { participants, aliases }; +} + // ─── Core data function ────────────────────────────────────────────── /** @@ -90,19 +232,8 @@ export function sequenceData(name, dbPath, opts = {}) { try { const maxDepth = opts.depth || 10; const noTests = opts.noTests || false; - const withDataflow = opts.dataflow || false; - - // Phase 1: Direct LIKE match - let matchNode = findMatchingNodes(repo, name, opts)[0] ?? null; - - // Phase 2: Prefix-stripped matching - if (!matchNode) { - for (const prefix of FRAMEWORK_ENTRY_PREFIXES) { - matchNode = findMatchingNodes(repo, `${prefix}${name}`, opts)[0] ?? null; - if (matchNode) break; - } - } + const matchNode = findEntryNode(repo, name, opts); if (!matchNode) { return { entry: null, @@ -121,123 +252,17 @@ export function sequenceData(name, dbPath, opts = {}) { line: matchNode.line, }; - // BFS forward — track edges, not just nodes - const visited = new Set([matchNode.id]); - let frontier = [matchNode.id]; - const messages = []; - const fileSet = new Set([matchNode.file]); - const idToNode = new Map(); - idToNode.set(matchNode.id, matchNode); - let truncated = false; - - for (let d = 1; d <= maxDepth; d++) { - const nextFrontier = []; - - for (const fid of frontier) { - const callees = repo.findCallees(fid); - - const caller = idToNode.get(fid); - - for (const c of callees) { - if (noTests && isTestFile(c.file)) continue; - - // Always record the message (even for visited nodes — different caller path) - fileSet.add(c.file); - messages.push({ - from: caller.file, - to: c.file, - label: c.name, - type: 'call', - depth: d, - }); - - if (visited.has(c.id)) continue; - - visited.add(c.id); - nextFrontier.push(c.id); - idToNode.set(c.id, c); - } - } - - frontier = nextFrontier; - if (frontier.length === 0) break; - - if (d === maxDepth && frontier.length > 0) { - // Only mark truncated if at least one frontier node has further callees - const hasMoreCalls = frontier.some((fid) => repo.findCallees(fid).length > 0); - if (hasMoreCalls) truncated = true; - } - } - - // Dataflow annotations: add return arrows - if (withDataflow && messages.length > 0) { - const hasTable = repo.hasDataflowTable(); - - if (hasTable && repo instanceof SqliteRepository) { - const db = repo.db; - // Build name|file lookup for O(1) target node access - const nodeByNameFile = new Map(); - for (const n of idToNode.values()) { - nodeByNameFile.set(`${n.name}|${n.file}`, n); - } - - const getReturns = db.prepare( - `SELECT d.expression FROM dataflow d - WHERE d.source_id = ? AND d.kind = 'returns'`, - ); - const getFlowsTo = db.prepare( - `SELECT d.expression FROM dataflow d - WHERE d.target_id = ? AND d.kind = 'flows_to' - ORDER BY d.param_index`, - ); - - // For each called function, check if it has return edges - const seenReturns = new Set(); - for (const msg of [...messages]) { - if (msg.type !== 'call') continue; - const targetNode = nodeByNameFile.get(`${msg.label}|${msg.to}`); - if (!targetNode) continue; - - const returnKey = `${msg.to}->${msg.from}:${msg.label}`; - if (seenReturns.has(returnKey)) continue; - - const returns = getReturns.all(targetNode.id); - - if (returns.length > 0) { - seenReturns.add(returnKey); - const expr = returns[0].expression || 'result'; - messages.push({ - from: msg.to, - to: msg.from, - label: expr, - type: 'return', - depth: msg.depth, - }); - } - } + const { messages, fileSet, idToNode, truncated } = bfsCallees( + repo, + matchNode, + maxDepth, + noTests, + ); - // Annotate call messages with parameter names - for (const msg of messages) { - if (msg.type !== 'call') continue; - const targetNode = nodeByNameFile.get(`${msg.label}|${msg.to}`); - if (!targetNode) continue; - - const params = getFlowsTo.all(targetNode.id); - - if (params.length > 0) { - const paramNames = params - .map((p) => p.expression) - .filter(Boolean) - .slice(0, 3); - if (paramNames.length > 0) { - msg.label = `${msg.label}(${paramNames.join(', ')})`; - } - } - } - } + if (opts.dataflow && messages.length > 0) { + annotateDataflow(repo, messages, idToNode); } - // Sort messages by depth, then call before return messages.sort((a, b) => { if (a.depth !== b.depth) return a.depth - b.depth; if (a.type === 'call' && b.type === 'return') return -1; @@ -245,22 +270,8 @@ export function sequenceData(name, dbPath, opts = {}) { return 0; }); - // Build participant list from files - const aliases = buildAliases([...fileSet]); - const participants = [...fileSet].map((file) => ({ - id: aliases.get(file), - label: file.split('/').pop(), - file, - })); - - // Sort participants: entry file first, then alphabetically - participants.sort((a, b) => { - if (a.file === entry.file) return -1; - if (b.file === entry.file) return 1; - return a.file.localeCompare(b.file); - }); + const { participants, aliases } = buildParticipants(fileSet, entry.file); - // Replace file paths with alias IDs in messages for (const msg of messages) { msg.from = aliases.get(msg.from); msg.to = aliases.get(msg.to); From a00cf6675327bb55e11e4cd3e11398c812bd2737 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:39:38 -0600 Subject: [PATCH 08/22] refactor: decompose explain() into section renderers Impact: 5 functions changed, 2 affected --- src/presentation/queries-cli/inspect.js | 373 ++++++++++++------------ 1 file changed, 184 insertions(+), 189 deletions(-) diff --git a/src/presentation/queries-cli/inspect.js b/src/presentation/queries-cli/inspect.js index 5a3ddcb7..59b85d63 100644 --- a/src/presentation/queries-cli/inspect.js +++ b/src/presentation/queries-cli/inspect.js @@ -96,96 +96,7 @@ export function context(name, customDbPath, opts = {}) { } for (const r of data.results) { - const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`; - const roleTag = r.role ? ` [${r.role}]` : ''; - console.log(`\n# ${r.name} (${r.kind})${roleTag} — ${r.file}:${lineRange}\n`); - - // Signature - if (r.signature) { - console.log('## Type/Shape Info'); - if (r.signature.params != null) console.log(` Parameters: (${r.signature.params})`); - if (r.signature.returnType) console.log(` Returns: ${r.signature.returnType}`); - console.log(); - } - - // Children - if (r.children && r.children.length > 0) { - console.log(`## Children (${r.children.length})`); - for (const c of r.children) { - console.log(` ${kindIcon(c.kind)} ${c.name} :${c.line}`); - } - console.log(); - } - - // Complexity - if (r.complexity) { - const cx = r.complexity; - const miPart = cx.maintainabilityIndex ? ` | MI: ${cx.maintainabilityIndex}` : ''; - console.log('## Complexity'); - console.log( - ` Cognitive: ${cx.cognitive} | Cyclomatic: ${cx.cyclomatic} | Max Nesting: ${cx.maxNesting}${miPart}`, - ); - console.log(); - } - - // Source - if (r.source) { - console.log('## Source'); - for (const line of r.source.split('\n')) { - console.log(` ${line}`); - } - console.log(); - } - - // Callees - if (r.callees.length > 0) { - console.log(`## Direct Dependencies (${r.callees.length})`); - for (const c of r.callees) { - const summary = c.summary ? ` — ${c.summary}` : ''; - console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}${summary}`); - if (c.source) { - for (const line of c.source.split('\n').slice(0, 10)) { - console.log(` | ${line}`); - } - } - } - console.log(); - } - - // Callers - if (r.callers.length > 0) { - console.log(`## Callers (${r.callers.length})`); - for (const c of r.callers) { - const via = c.viaHierarchy ? ` (via ${c.viaHierarchy})` : ''; - console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}${via}`); - } - console.log(); - } - - // Related tests - if (r.relatedTests.length > 0) { - console.log('## Related Tests'); - for (const t of r.relatedTests) { - console.log(` ${t.file} — ${t.testCount} tests`); - for (const tn of t.testNames) { - console.log(` - ${tn}`); - } - if (t.source) { - console.log(' Source:'); - for (const line of t.source.split('\n').slice(0, 20)) { - console.log(` | ${line}`); - } - } - } - console.log(); - } - - if (r.callees.length === 0 && r.callers.length === 0 && r.relatedTests.length === 0) { - console.log( - ' (no call edges or tests found — may be invoked dynamically or via re-exports)', - ); - console.log(); - } + renderContextResult(r); } } @@ -209,126 +120,210 @@ export function children(name, customDbPath, opts = {}) { } } -export function explain(target, customDbPath, opts = {}) { - const data = explainData(target, customDbPath, opts); - if (outputResult(data, 'results', opts)) return; +function renderContextResult(r) { + const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`; + const roleTag = r.role ? ` [${r.role}]` : ''; + console.log(`\n# ${r.name} (${r.kind})${roleTag} — ${r.file}:${lineRange}\n`); - if (data.results.length === 0) { - console.log(`No ${data.kind === 'file' ? 'file' : 'function/symbol'} matching "${target}"`); - return; + if (r.signature) { + console.log('## Type/Shape Info'); + if (r.signature.params != null) console.log(` Parameters: (${r.signature.params})`); + if (r.signature.returnType) console.log(` Returns: ${r.signature.returnType}`); + console.log(); } - if (data.kind === 'file') { - for (const r of data.results) { - const publicCount = r.publicApi.length; - const internalCount = r.internal.length; - const lineInfo = r.lineCount ? `${r.lineCount} lines, ` : ''; - console.log(`\n# ${r.file}`); - console.log( - ` ${lineInfo}${r.symbolCount} symbols (${publicCount} exported, ${internalCount} internal)`, - ); + if (r.children && r.children.length > 0) { + console.log(`## Children (${r.children.length})`); + for (const c of r.children) { + console.log(` ${kindIcon(c.kind)} ${c.name} :${c.line}`); + } + console.log(); + } - if (r.imports.length > 0) { - console.log(` Imports: ${r.imports.map((i) => i.file).join(', ')}`); - } - if (r.importedBy.length > 0) { - console.log(` Imported by: ${r.importedBy.map((i) => i.file).join(', ')}`); - } + if (r.complexity) { + const cx = r.complexity; + const miPart = cx.maintainabilityIndex ? ` | MI: ${cx.maintainabilityIndex}` : ''; + console.log('## Complexity'); + console.log( + ` Cognitive: ${cx.cognitive} | Cyclomatic: ${cx.cyclomatic} | Max Nesting: ${cx.maxNesting}${miPart}`, + ); + console.log(); + } - if (r.publicApi.length > 0) { - console.log(`\n## Exported`); - for (const s of r.publicApi) { - const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; - const roleTag = s.role ? ` [${s.role}]` : ''; - const summary = s.summary ? ` -- ${s.summary}` : ''; - console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); - } - } + if (r.source) { + console.log('## Source'); + for (const line of r.source.split('\n')) { + console.log(` ${line}`); + } + console.log(); + } - if (r.internal.length > 0) { - console.log(`\n## Internal`); - for (const s of r.internal) { - const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; - const roleTag = s.role ? ` [${s.role}]` : ''; - const summary = s.summary ? ` -- ${s.summary}` : ''; - console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); + if (r.callees.length > 0) { + console.log(`## Direct Dependencies (${r.callees.length})`); + for (const c of r.callees) { + const summary = c.summary ? ` — ${c.summary}` : ''; + console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}${summary}`); + if (c.source) { + for (const line of c.source.split('\n').slice(0, 10)) { + console.log(` | ${line}`); } } + } + console.log(); + } - if (r.dataFlow.length > 0) { - console.log(`\n## Data Flow`); - for (const df of r.dataFlow) { - console.log(` ${df.caller} -> ${df.callees.join(', ')}`); - } - } - console.log(); + if (r.callers.length > 0) { + console.log(`## Callers (${r.callers.length})`); + for (const c of r.callers) { + const via = c.viaHierarchy ? ` (via ${c.viaHierarchy})` : ''; + console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}${via}`); } - } else { - function printFunctionExplain(r, indent = '') { - const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`; - const lineInfo = r.lineCount ? `${r.lineCount} lines` : ''; - const summaryPart = r.summary ? ` | ${r.summary}` : ''; - const roleTag = r.role ? ` [${r.role}]` : ''; - const depthLevel = r._depth || 0; - const heading = depthLevel === 0 ? '#' : '##'.padEnd(depthLevel + 2, '#'); - console.log(`\n${indent}${heading} ${r.name} (${r.kind})${roleTag} ${r.file}:${lineRange}`); - if (lineInfo || r.summary) { - console.log(`${indent} ${lineInfo}${summaryPart}`); - } - if (r.signature) { - if (r.signature.params != null) - console.log(`${indent} Parameters: (${r.signature.params})`); - if (r.signature.returnType) console.log(`${indent} Returns: ${r.signature.returnType}`); - } + console.log(); + } - if (r.complexity) { - const cx = r.complexity; - const miPart = cx.maintainabilityIndex ? ` MI=${cx.maintainabilityIndex}` : ''; - console.log( - `${indent} Complexity: cognitive=${cx.cognitive} cyclomatic=${cx.cyclomatic} nesting=${cx.maxNesting}${miPart}`, - ); + if (r.relatedTests.length > 0) { + console.log('## Related Tests'); + for (const t of r.relatedTests) { + console.log(` ${t.file} — ${t.testCount} tests`); + for (const tn of t.testNames) { + console.log(` - ${tn}`); } - - if (r.callees.length > 0) { - console.log(`\n${indent} Calls (${r.callees.length}):`); - for (const c of r.callees) { - console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); + if (t.source) { + console.log(' Source:'); + for (const line of t.source.split('\n').slice(0, 20)) { + console.log(` | ${line}`); } } + } + console.log(); + } - if (r.callers.length > 0) { - console.log(`\n${indent} Called by (${r.callers.length}):`); - for (const c of r.callers) { - console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); - } - } + if (r.callees.length === 0 && r.callers.length === 0 && r.relatedTests.length === 0) { + console.log(' (no call edges or tests found — may be invoked dynamically or via re-exports)'); + console.log(); + } +} - if (r.relatedTests.length > 0) { - const label = r.relatedTests.length === 1 ? 'file' : 'files'; - console.log(`\n${indent} Tests (${r.relatedTests.length} ${label}):`); - for (const t of r.relatedTests) { - console.log(`${indent} ${t.file}`); - } - } +function renderFileExplain(r) { + const publicCount = r.publicApi.length; + const internalCount = r.internal.length; + const lineInfo = r.lineCount ? `${r.lineCount} lines, ` : ''; + console.log(`\n# ${r.file}`); + console.log( + ` ${lineInfo}${r.symbolCount} symbols (${publicCount} exported, ${internalCount} internal)`, + ); + + if (r.imports.length > 0) { + console.log(` Imports: ${r.imports.map((i) => i.file).join(', ')}`); + } + if (r.importedBy.length > 0) { + console.log(` Imported by: ${r.importedBy.map((i) => i.file).join(', ')}`); + } - if (r.callees.length === 0 && r.callers.length === 0) { - console.log( - `${indent} (no call edges found -- may be invoked dynamically or via re-exports)`, - ); - } + if (r.publicApi.length > 0) { + console.log(`\n## Exported`); + for (const s of r.publicApi) { + const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; + const roleTag = s.role ? ` [${s.role}]` : ''; + const summary = s.summary ? ` -- ${s.summary}` : ''; + console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); + } + } - // Render recursive dependency details - if (r.depDetails && r.depDetails.length > 0) { - console.log(`\n${indent} --- Dependencies (depth ${depthLevel + 1}) ---`); - for (const dep of r.depDetails) { - printFunctionExplain(dep, `${indent} `); - } - } - console.log(); + if (r.internal.length > 0) { + console.log(`\n## Internal`); + for (const s of r.internal) { + const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; + const roleTag = s.role ? ` [${s.role}]` : ''; + const summary = s.summary ? ` -- ${s.summary}` : ''; + console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); + } + } + + if (r.dataFlow.length > 0) { + console.log(`\n## Data Flow`); + for (const df of r.dataFlow) { + console.log(` ${df.caller} -> ${df.callees.join(', ')}`); + } + } + console.log(); +} + +function renderFunctionExplain(r, indent = '') { + const lineRange = r.endLine ? `${r.line}-${r.endLine}` : `${r.line}`; + const lineInfo = r.lineCount ? `${r.lineCount} lines` : ''; + const summaryPart = r.summary ? ` | ${r.summary}` : ''; + const roleTag = r.role ? ` [${r.role}]` : ''; + const depthLevel = r._depth || 0; + const heading = depthLevel === 0 ? '#' : '##'.padEnd(depthLevel + 2, '#'); + console.log(`\n${indent}${heading} ${r.name} (${r.kind})${roleTag} ${r.file}:${lineRange}`); + if (lineInfo || r.summary) { + console.log(`${indent} ${lineInfo}${summaryPart}`); + } + if (r.signature) { + if (r.signature.params != null) console.log(`${indent} Parameters: (${r.signature.params})`); + if (r.signature.returnType) console.log(`${indent} Returns: ${r.signature.returnType}`); + } + + if (r.complexity) { + const cx = r.complexity; + const miPart = cx.maintainabilityIndex ? ` MI=${cx.maintainabilityIndex}` : ''; + console.log( + `${indent} Complexity: cognitive=${cx.cognitive} cyclomatic=${cx.cyclomatic} nesting=${cx.maxNesting}${miPart}`, + ); + } + + if (r.callees.length > 0) { + console.log(`\n${indent} Calls (${r.callees.length}):`); + for (const c of r.callees) { + console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); + } + } + + if (r.callers.length > 0) { + console.log(`\n${indent} Called by (${r.callers.length}):`); + for (const c of r.callers) { + console.log(`${indent} ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); + } + } + + if (r.relatedTests.length > 0) { + const label = r.relatedTests.length === 1 ? 'file' : 'files'; + console.log(`\n${indent} Tests (${r.relatedTests.length} ${label}):`); + for (const t of r.relatedTests) { + console.log(`${indent} ${t.file}`); } + } + + if (r.callees.length === 0 && r.callers.length === 0) { + console.log(`${indent} (no call edges found -- may be invoked dynamically or via re-exports)`); + } + if (r.depDetails && r.depDetails.length > 0) { + console.log(`\n${indent} --- Dependencies (depth ${depthLevel + 1}) ---`); + for (const dep of r.depDetails) { + renderFunctionExplain(dep, `${indent} `); + } + } + console.log(); +} + +export function explain(target, customDbPath, opts = {}) { + const data = explainData(target, customDbPath, opts); + if (outputResult(data, 'results', opts)) return; + + if (data.results.length === 0) { + console.log(`No ${data.kind === 'file' ? 'file' : 'function/symbol'} matching "${target}"`); + return; + } + + if (data.kind === 'file') { + for (const r of data.results) { + renderFileExplain(r); + } + } else { for (const r of data.results) { - printFunctionExplain(r); + renderFunctionExplain(r); } } } From 975a46b6476a1111bad74cd6c701ee8499d775b3 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:39:44 -0600 Subject: [PATCH 09/22] refactor: decompose stats() into section printers Impact: 12 functions changed, 6 affected --- src/presentation/queries-cli/overview.js | 115 +++++++++++------------ 1 file changed, 57 insertions(+), 58 deletions(-) diff --git a/src/presentation/queries-cli/overview.js b/src/presentation/queries-cli/overview.js index 88409da2..29a4f6e9 100644 --- a/src/presentation/queries-cli/overview.js +++ b/src/presentation/queries-cli/overview.js @@ -2,64 +2,42 @@ import path from 'node:path'; import { kindIcon, moduleMapData, rolesData, statsData } from '../../domain/queries.js'; import { outputResult } from '../../infrastructure/result-formatter.js'; -export async function stats(customDbPath, opts = {}) { - const data = statsData(customDbPath, { noTests: opts.noTests }); - - // Community detection summary (async import for lazy-loading) - try { - const { communitySummaryForStats } = await import('../../features/communities.js'); - data.communities = communitySummaryForStats(customDbPath, { noTests: opts.noTests }); - } catch { - /* graphology may not be available */ - } - - if (outputResult(data, null, opts)) return; - - // Human-readable output - console.log('\n# Codegraph Stats\n'); - - // Nodes - console.log(`Nodes: ${data.nodes.total} total`); - const kindEntries = Object.entries(data.nodes.byKind).sort((a, b) => b[1] - a[1]); - const kindParts = kindEntries.map(([k, v]) => `${k} ${v}`); - for (let i = 0; i < kindParts.length; i += 3) { - const row = kindParts +function printCountGrid(entries, padWidth) { + const parts = entries.map(([k, v]) => `${k} ${v}`); + for (let i = 0; i < parts.length; i += 3) { + const row = parts .slice(i, i + 3) - .map((p) => p.padEnd(18)) + .map((p) => p.padEnd(padWidth)) .join(''); console.log(` ${row}`); } +} - // Edges +function printNodes(data) { + console.log(`Nodes: ${data.nodes.total} total`); + const kindEntries = Object.entries(data.nodes.byKind).sort((a, b) => b[1] - a[1]); + printCountGrid(kindEntries, 18); +} + +function printEdges(data) { console.log(`\nEdges: ${data.edges.total} total`); const edgeEntries = Object.entries(data.edges.byKind).sort((a, b) => b[1] - a[1]); - const edgeParts = edgeEntries.map(([k, v]) => `${k} ${v}`); - for (let i = 0; i < edgeParts.length; i += 3) { - const row = edgeParts - .slice(i, i + 3) - .map((p) => p.padEnd(18)) - .join(''); - console.log(` ${row}`); - } + printCountGrid(edgeEntries, 18); +} - // Files +function printFiles(data) { console.log(`\nFiles: ${data.files.total} (${data.files.languages} languages)`); const langEntries = Object.entries(data.files.byLanguage).sort((a, b) => b[1] - a[1]); - const langParts = langEntries.map(([k, v]) => `${k} ${v}`); - for (let i = 0; i < langParts.length; i += 3) { - const row = langParts - .slice(i, i + 3) - .map((p) => p.padEnd(18)) - .join(''); - console.log(` ${row}`); - } + printCountGrid(langEntries, 18); +} - // Cycles +function printCycles(data) { console.log( `\nCycles: ${data.cycles.fileLevel} file-level, ${data.cycles.functionLevel} function-level`, ); +} - // Hotspots +function printHotspots(data) { if (data.hotspots.length > 0) { console.log(`\nTop ${data.hotspots.length} coupling hotspots:`); for (let i = 0; i < data.hotspots.length; i++) { @@ -69,8 +47,9 @@ export async function stats(customDbPath, opts = {}) { ); } } +} - // Embeddings +function printEmbeddings(data) { if (data.embeddings) { const e = data.embeddings; console.log( @@ -79,8 +58,9 @@ export async function stats(customDbPath, opts = {}) { } else { console.log('\nEmbeddings: not built'); } +} - // Quality +function printQuality(data) { if (data.quality) { const q = data.quality; const cc = q.callerCoverage; @@ -99,24 +79,18 @@ export async function stats(customDbPath, opts = {}) { } } } +} - // Roles +function printRoles(data) { if (data.roles && Object.keys(data.roles).length > 0) { const total = Object.values(data.roles).reduce((a, b) => a + b, 0); console.log(`\nRoles: ${total} classified symbols`); - const roleParts = Object.entries(data.roles) - .sort((a, b) => b[1] - a[1]) - .map(([k, v]) => `${k} ${v}`); - for (let i = 0; i < roleParts.length; i += 3) { - const row = roleParts - .slice(i, i + 3) - .map((p) => p.padEnd(18)) - .join(''); - console.log(` ${row}`); - } + const roleEntries = Object.entries(data.roles).sort((a, b) => b[1] - a[1]); + printCountGrid(roleEntries, 18); } +} - // Complexity +function printComplexity(data) { if (data.complexity) { const cx = data.complexity; const miPart = cx.avgMI != null ? ` | avg MI: ${cx.avgMI} | min MI: ${cx.minMI}` : ''; @@ -124,15 +98,40 @@ export async function stats(customDbPath, opts = {}) { `\nComplexity: ${cx.analyzed} functions | avg cognitive: ${cx.avgCognitive} | avg cyclomatic: ${cx.avgCyclomatic} | max cognitive: ${cx.maxCognitive}${miPart}`, ); } +} - // Communities +function printCommunities(data) { if (data.communities) { const cm = data.communities; console.log( `\nCommunities: ${cm.communityCount} detected | modularity: ${cm.modularity} | drift: ${cm.driftScore}%`, ); } +} + +export async function stats(customDbPath, opts = {}) { + const data = statsData(customDbPath, { noTests: opts.noTests }); + + try { + const { communitySummaryForStats } = await import('../../features/communities.js'); + data.communities = communitySummaryForStats(customDbPath, { noTests: opts.noTests }); + } catch { + /* graphology may not be available */ + } + if (outputResult(data, null, opts)) return; + + console.log('\n# Codegraph Stats\n'); + printNodes(data); + printEdges(data); + printFiles(data); + printCycles(data); + printHotspots(data); + printEmbeddings(data); + printQuality(data); + printRoles(data); + printComplexity(data); + printCommunities(data); console.log(); } From fa450ee60a3a8b3df11f63e7dcaaeacee23c7097 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:53:11 -0600 Subject: [PATCH 10/22] fix: address quality issues in features (boundaries, communities, triage) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract per-section validators from validateBoundaryConfig (cog 101→2). Extract buildCommunityObjects and analyzeDrift from communitiesData (cog 32→4). Extract buildTriageItems and computeTriageSummary from triageData (bugs 1.4→0.48). Impact: 13 functions changed, 11 affected --- src/features/boundaries.js | 181 +++++++++++++++++++----------------- src/features/communities.js | 121 +++++++++++++----------- src/features/triage.js | 151 ++++++++++++++++-------------- 3 files changed, 249 insertions(+), 204 deletions(-) diff --git a/src/features/boundaries.js b/src/features/boundaries.js index 7a357ebd..536dbafa 100644 --- a/src/features/boundaries.js +++ b/src/features/boundaries.js @@ -94,104 +94,119 @@ export function resolveModules(boundaryConfig) { // ─── Validation ────────────────────────────────────────────────────── /** - * Validate a boundary configuration object. - * @param {object} config - The `manifesto.boundaries` config - * @returns {{ valid: boolean, errors: string[] }} + * Validate the `modules` section of a boundary config. + * @param {object} modules + * @param {string[]} errors - Mutated: push any validation errors */ -export function validateBoundaryConfig(config) { - const errors = []; +function validateModules(modules, errors) { + if (!modules || typeof modules !== 'object' || Object.keys(modules).length === 0) { + errors.push('boundaries.modules must be a non-empty object'); + return; + } + for (const [name, value] of Object.entries(modules)) { + if (typeof value === 'string') continue; + if (value && typeof value === 'object' && typeof value.match === 'string') continue; + errors.push(`boundaries.modules.${name}: must be a glob string or { match: "" }`); + } +} - if (!config || typeof config !== 'object') { - return { valid: false, errors: ['boundaries config must be an object'] }; +/** + * Validate the `preset` field of a boundary config. + * @param {string|null|undefined} preset + * @param {string[]} errors - Mutated: push any validation errors + */ +function validatePreset(preset, errors) { + if (preset == null) return; + if (typeof preset !== 'string' || !PRESETS[preset]) { + errors.push( + `boundaries.preset: must be one of ${Object.keys(PRESETS).join(', ')} (got "${preset}")`, + ); } +} - // Validate modules - if ( - !config.modules || - typeof config.modules !== 'object' || - Object.keys(config.modules).length === 0 - ) { - errors.push('boundaries.modules must be a non-empty object'); - } else { - for (const [name, value] of Object.entries(config.modules)) { - if (typeof value === 'string') continue; - if (value && typeof value === 'object' && typeof value.match === 'string') continue; - errors.push(`boundaries.modules.${name}: must be a glob string or { match: "" }`); +/** + * Validate a single rule's target list (`notTo` or `onlyTo`). + * @param {*} list - The target list value + * @param {string} field - "notTo" or "onlyTo" + * @param {number} idx - Rule index for error messages + * @param {Set} moduleNames + * @param {string[]} errors - Mutated + */ +function validateTargetList(list, field, idx, moduleNames, errors) { + if (!Array.isArray(list)) { + errors.push(`boundaries.rules[${idx}]: "${field}" must be an array`); + return; + } + for (const target of list) { + if (!moduleNames.has(target)) { + errors.push(`boundaries.rules[${idx}]: "${field}" references unknown module "${target}"`); } } +} - // Validate preset - if (config.preset != null) { - if (typeof config.preset !== 'string' || !PRESETS[config.preset]) { - errors.push( - `boundaries.preset: must be one of ${Object.keys(PRESETS).join(', ')} (got "${config.preset}")`, - ); +/** + * Validate the `rules` array of a boundary config. + * @param {Array} rules + * @param {object|undefined} modules - The modules config (for cross-referencing names) + * @param {string[]} errors - Mutated + */ +function validateRules(rules, modules, errors) { + if (!rules) return; + if (!Array.isArray(rules)) { + errors.push('boundaries.rules must be an array'); + return; + } + const moduleNames = modules ? new Set(Object.keys(modules)) : new Set(); + for (let i = 0; i < rules.length; i++) { + const rule = rules[i]; + if (!rule.from) { + errors.push(`boundaries.rules[${i}]: missing "from" field`); + } else if (!moduleNames.has(rule.from)) { + errors.push(`boundaries.rules[${i}]: "from" references unknown module "${rule.from}"`); + } + if (rule.notTo && rule.onlyTo) { + errors.push(`boundaries.rules[${i}]: cannot have both "notTo" and "onlyTo"`); + } + if (!rule.notTo && !rule.onlyTo) { + errors.push(`boundaries.rules[${i}]: must have either "notTo" or "onlyTo"`); } + if (rule.notTo) validateTargetList(rule.notTo, 'notTo', i, moduleNames, errors); + if (rule.onlyTo) validateTargetList(rule.onlyTo, 'onlyTo', i, moduleNames, errors); } +} - // Validate rules - if (config.rules) { - if (!Array.isArray(config.rules)) { - errors.push('boundaries.rules must be an array'); - } else { - const moduleNames = config.modules ? new Set(Object.keys(config.modules)) : new Set(); - for (let i = 0; i < config.rules.length; i++) { - const rule = config.rules[i]; - if (!rule.from) { - errors.push(`boundaries.rules[${i}]: missing "from" field`); - } else if (!moduleNames.has(rule.from)) { - errors.push(`boundaries.rules[${i}]: "from" references unknown module "${rule.from}"`); - } - if (rule.notTo && rule.onlyTo) { - errors.push(`boundaries.rules[${i}]: cannot have both "notTo" and "onlyTo"`); - } - if (!rule.notTo && !rule.onlyTo) { - errors.push(`boundaries.rules[${i}]: must have either "notTo" or "onlyTo"`); - } - if (rule.notTo) { - if (!Array.isArray(rule.notTo)) { - errors.push(`boundaries.rules[${i}]: "notTo" must be an array`); - } else { - for (const target of rule.notTo) { - if (!moduleNames.has(target)) { - errors.push( - `boundaries.rules[${i}]: "notTo" references unknown module "${target}"`, - ); - } - } - } - } - if (rule.onlyTo) { - if (!Array.isArray(rule.onlyTo)) { - errors.push(`boundaries.rules[${i}]: "onlyTo" must be an array`); - } else { - for (const target of rule.onlyTo) { - if (!moduleNames.has(target)) { - errors.push( - `boundaries.rules[${i}]: "onlyTo" references unknown module "${target}"`, - ); - } - } - } - } - } +/** + * Validate that module layer assignments match preset layers. + * @param {object} config + * @param {string[]} errors - Mutated + */ +function validateLayerAssignments(config, errors) { + if (!config.preset || !PRESETS[config.preset] || !config.modules) return; + const presetLayers = new Set(PRESETS[config.preset].layers); + for (const [name, value] of Object.entries(config.modules)) { + if (typeof value === 'object' && value.layer && !presetLayers.has(value.layer)) { + errors.push( + `boundaries.modules.${name}: layer "${value.layer}" not in preset "${config.preset}" (valid: ${[...presetLayers].join(', ')})`, + ); } } +} - // Validate preset + layer assignments - if (config.preset && PRESETS[config.preset] && config.modules) { - const presetLayers = new Set(PRESETS[config.preset].layers); - for (const [name, value] of Object.entries(config.modules)) { - if (typeof value === 'object' && value.layer) { - if (!presetLayers.has(value.layer)) { - errors.push( - `boundaries.modules.${name}: layer "${value.layer}" not in preset "${config.preset}" (valid: ${[...presetLayers].join(', ')})`, - ); - } - } - } +/** + * Validate a boundary configuration object. + * @param {object} config - The `manifesto.boundaries` config + * @returns {{ valid: boolean, errors: string[] }} + */ +export function validateBoundaryConfig(config) { + if (!config || typeof config !== 'object') { + return { valid: false, errors: ['boundaries config must be an object'] }; } + const errors = []; + validateModules(config.modules, errors); + validatePreset(config.preset, errors); + validateRules(config.rules, config.modules, errors); + validateLayerAssignments(config, errors); return { valid: errors.length === 0, errors }; } diff --git a/src/features/communities.js b/src/features/communities.js index 062a89b5..f850dc8d 100644 --- a/src/features/communities.js +++ b/src/features/communities.js @@ -11,48 +11,18 @@ function getDirectory(filePath) { return dir === '.' ? '(root)' : dir; } -// ─── Core Analysis ──────────────────────────────────────────────────── +// ─── Community Building ────────────────────────────────────────────── /** - * Run Louvain community detection and return structured data. - * - * @param {string} [customDbPath] - Path to graph.db - * @param {object} [opts] - * @param {boolean} [opts.functions] - Function-level instead of file-level - * @param {number} [opts.resolution] - Louvain resolution (default 1.0) - * @param {boolean} [opts.noTests] - Exclude test files - * @param {boolean} [opts.drift] - Drift-only mode (omit community member lists) - * @param {boolean} [opts.json] - JSON output (used by CLI wrapper only) - * @returns {{ communities: object[], modularity: number, drift: object, summary: object }} + * Group graph nodes by Louvain community assignment and build structured objects. + * @param {object} graph - The dependency graph + * @param {Map} assignments - Node key → community ID + * @param {object} opts + * @param {boolean} [opts.drift] - If true, omit member lists + * @returns {{ communities: object[], communityDirs: Map> }} */ -export function communitiesData(customDbPath, opts = {}) { - const { repo, close } = openRepo(customDbPath, opts); - let graph; - try { - graph = buildDependencyGraph(repo, { - fileLevel: !opts.functions, - noTests: opts.noTests, - }); - } finally { - close(); - } - - // Handle empty or trivial graphs - if (graph.nodeCount === 0 || graph.edgeCount === 0) { - return { - communities: [], - modularity: 0, - drift: { splitCandidates: [], mergeCandidates: [] }, - summary: { communityCount: 0, modularity: 0, nodeCount: graph.nodeCount, driftScore: 0 }, - }; - } - - // Run Louvain - const resolution = opts.resolution ?? 1.0; - const { assignments, modularity } = louvainCommunities(graph, { resolution }); - - // Group nodes by community - const communityMap = new Map(); // community id → node keys[] +function buildCommunityObjects(graph, assignments, opts) { + const communityMap = new Map(); for (const [key] of graph.nodes()) { const cid = assignments.get(key); if (cid == null) continue; @@ -60,9 +30,8 @@ export function communitiesData(customDbPath, opts = {}) { communityMap.get(cid).push(key); } - // Build community objects const communities = []; - const communityDirs = new Map(); // community id → Set + const communityDirs = new Map(); for (const [cid, members] of communityMap) { const dirCounts = {}; @@ -88,19 +57,27 @@ export function communitiesData(customDbPath, opts = {}) { }); } - // Sort by size descending communities.sort((a, b) => b.size - a.size); + return { communities, communityDirs }; +} - // ─── Drift Analysis ───────────────────────────────────────────── +// ─── Drift Analysis ────────────────────────────────────────────────── - // Split candidates: directories with members in 2+ communities - const dirToCommunities = new Map(); // dir → Set +/** + * Compute split/merge candidates and drift score from community directory data. + * @param {object[]} communities - Community objects with `directories` + * @param {Map>} communityDirs - Community ID → directory set + * @returns {{ splitCandidates: object[], mergeCandidates: object[], driftScore: number }} + */ +function analyzeDrift(communities, communityDirs) { + const dirToCommunities = new Map(); for (const [cid, dirs] of communityDirs) { for (const dir of dirs) { if (!dirToCommunities.has(dir)) dirToCommunities.set(dir, new Set()); dirToCommunities.get(dir).add(cid); } } + const splitCandidates = []; for (const [dir, cids] of dirToCommunities) { if (cids.size >= 2) { @@ -109,7 +86,6 @@ export function communitiesData(customDbPath, opts = {}) { } splitCandidates.sort((a, b) => b.communityCount - a.communityCount); - // Merge candidates: communities spanning 2+ directories const mergeCandidates = []; for (const c of communities) { const dirCount = Object.keys(c.directories).length; @@ -124,17 +100,56 @@ export function communitiesData(customDbPath, opts = {}) { } mergeCandidates.sort((a, b) => b.directoryCount - a.directoryCount); - // Drift score: 0-100 based on how much directory structure diverges from communities const totalDirs = dirToCommunities.size; - const splitDirs = splitCandidates.length; - const splitRatio = totalDirs > 0 ? splitDirs / totalDirs : 0; - + const splitRatio = totalDirs > 0 ? splitCandidates.length / totalDirs : 0; const totalComms = communities.length; - const mergeComms = mergeCandidates.length; - const mergeRatio = totalComms > 0 ? mergeComms / totalComms : 0; - + const mergeRatio = totalComms > 0 ? mergeCandidates.length / totalComms : 0; const driftScore = Math.round(((splitRatio + mergeRatio) / 2) * 100); + return { splitCandidates, mergeCandidates, driftScore }; +} + +// ─── Core Analysis ──────────────────────────────────────────────────── + +/** + * Run Louvain community detection and return structured data. + * + * @param {string} [customDbPath] - Path to graph.db + * @param {object} [opts] + * @param {boolean} [opts.functions] - Function-level instead of file-level + * @param {number} [opts.resolution] - Louvain resolution (default 1.0) + * @param {boolean} [opts.noTests] - Exclude test files + * @param {boolean} [opts.drift] - Drift-only mode (omit community member lists) + * @param {boolean} [opts.json] - JSON output (used by CLI wrapper only) + * @returns {{ communities: object[], modularity: number, drift: object, summary: object }} + */ +export function communitiesData(customDbPath, opts = {}) { + const { repo, close } = openRepo(customDbPath, opts); + let graph; + try { + graph = buildDependencyGraph(repo, { + fileLevel: !opts.functions, + noTests: opts.noTests, + }); + } finally { + close(); + } + + if (graph.nodeCount === 0 || graph.edgeCount === 0) { + return { + communities: [], + modularity: 0, + drift: { splitCandidates: [], mergeCandidates: [] }, + summary: { communityCount: 0, modularity: 0, nodeCount: graph.nodeCount, driftScore: 0 }, + }; + } + + const resolution = opts.resolution ?? 1.0; + const { assignments, modularity } = louvainCommunities(graph, { resolution }); + + const { communities, communityDirs } = buildCommunityObjects(graph, assignments, opts); + const { splitCandidates, mergeCandidates, driftScore } = analyzeDrift(communities, communityDirs); + const base = { communities: opts.drift ? [] : communities, modularity: +modularity.toFixed(4), diff --git a/src/features/triage.js b/src/features/triage.js index 00b35ccd..8c23875a 100644 --- a/src/features/triage.js +++ b/src/features/triage.js @@ -4,8 +4,83 @@ import { warn } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { paginateResult } from '../shared/paginate.js'; +// ─── Scoring ───────────────────────────────────────────────────────── + +const SORT_FNS = { + risk: (a, b) => b.riskScore - a.riskScore, + complexity: (a, b) => b.cognitive - a.cognitive, + churn: (a, b) => b.churn - a.churn, + 'fan-in': (a, b) => b.fanIn - a.fanIn, + mi: (a, b) => a.maintainabilityIndex - b.maintainabilityIndex, +}; + +/** + * Build scored triage items from raw rows and risk metrics. + * @param {object[]} rows - Raw DB rows + * @param {object[]} riskMetrics - Per-row risk metric objects from scoreRisk + * @returns {object[]} + */ +function buildTriageItems(rows, riskMetrics) { + return rows.map((r, i) => ({ + name: r.name, + kind: r.kind, + file: r.file, + line: r.line, + role: r.role || null, + fanIn: r.fan_in, + cognitive: r.cognitive, + churn: r.churn, + maintainabilityIndex: r.mi, + normFanIn: riskMetrics[i].normFanIn, + normComplexity: riskMetrics[i].normComplexity, + normChurn: riskMetrics[i].normChurn, + normMI: riskMetrics[i].normMI, + roleWeight: riskMetrics[i].roleWeight, + riskScore: riskMetrics[i].riskScore, + })); +} + +/** + * Compute signal coverage and summary statistics. + * @param {object[]} filtered - All filtered rows + * @param {object[]} scored - Scored and filtered items + * @param {object} weights - Active weights + * @returns {object} + */ +function computeTriageSummary(filtered, scored, weights) { + const signalCoverage = { + complexity: round4(filtered.filter((r) => r.cognitive > 0).length / filtered.length), + churn: round4(filtered.filter((r) => r.churn > 0).length / filtered.length), + fanIn: round4(filtered.filter((r) => r.fan_in > 0).length / filtered.length), + mi: round4(filtered.filter((r) => r.mi > 0).length / filtered.length), + }; + + const scores = scored.map((it) => it.riskScore); + const avgScore = + scores.length > 0 ? round4(scores.reduce((a, b) => a + b, 0) / scores.length) : 0; + const maxScore = scores.length > 0 ? round4(Math.max(...scores)) : 0; + + return { + total: filtered.length, + analyzed: scored.length, + avgScore, + maxScore, + weights, + signalCoverage, + }; +} + // ─── Data Function ──────────────────────────────────────────────────── +const EMPTY_SUMMARY = (weights) => ({ + total: 0, + analyzed: 0, + avgScore: 0, + maxScore: 0, + weights, + signalCoverage: {}, +}); + /** * Compute composite risk scores for all symbols. * @@ -17,9 +92,6 @@ export function triageData(customDbPath, opts = {}) { const { repo, close } = openRepo(customDbPath, opts); try { const noTests = opts.noTests || false; - const fileFilter = opts.file || null; - const kindFilter = opts.kind || null; - const roleFilter = opts.role || null; const minScore = opts.minScore != null ? Number(opts.minScore) : null; const sort = opts.sort || 'risk'; const weights = { ...DEFAULT_WEIGHTS, ...(opts.weights || {}) }; @@ -28,86 +100,29 @@ export function triageData(customDbPath, opts = {}) { try { rows = repo.findNodesForTriage({ noTests, - file: fileFilter, - kind: kindFilter, - role: roleFilter, + file: opts.file || null, + kind: opts.kind || null, + role: opts.role || null, }); } catch (err) { warn(`triage query failed: ${err.message}`); - return { - items: [], - summary: { total: 0, analyzed: 0, avgScore: 0, maxScore: 0, weights, signalCoverage: {} }, - }; + return { items: [], summary: EMPTY_SUMMARY(weights) }; } - // Post-filter test files (belt-and-suspenders) const filtered = noTests ? rows.filter((r) => !isTestFile(r.file)) : rows; - if (filtered.length === 0) { - return { - items: [], - summary: { total: 0, analyzed: 0, avgScore: 0, maxScore: 0, weights, signalCoverage: {} }, - }; + return { items: [], summary: EMPTY_SUMMARY(weights) }; } - // Delegate scoring to classifier const riskMetrics = scoreRisk(filtered, weights); + const items = buildTriageItems(filtered, riskMetrics); - // Compute risk scores - const items = filtered.map((r, i) => ({ - name: r.name, - kind: r.kind, - file: r.file, - line: r.line, - role: r.role || null, - fanIn: r.fan_in, - cognitive: r.cognitive, - churn: r.churn, - maintainabilityIndex: r.mi, - normFanIn: riskMetrics[i].normFanIn, - normComplexity: riskMetrics[i].normComplexity, - normChurn: riskMetrics[i].normChurn, - normMI: riskMetrics[i].normMI, - roleWeight: riskMetrics[i].roleWeight, - riskScore: riskMetrics[i].riskScore, - })); - - // Apply minScore filter const scored = minScore != null ? items.filter((it) => it.riskScore >= minScore) : items; - - // Sort - const sortFns = { - risk: (a, b) => b.riskScore - a.riskScore, - complexity: (a, b) => b.cognitive - a.cognitive, - churn: (a, b) => b.churn - a.churn, - 'fan-in': (a, b) => b.fanIn - a.fanIn, - mi: (a, b) => a.maintainabilityIndex - b.maintainabilityIndex, - }; - scored.sort(sortFns[sort] || sortFns.risk); - - // Signal coverage: % of items with non-zero signal - const signalCoverage = { - complexity: round4(filtered.filter((r) => r.cognitive > 0).length / filtered.length), - churn: round4(filtered.filter((r) => r.churn > 0).length / filtered.length), - fanIn: round4(filtered.filter((r) => r.fan_in > 0).length / filtered.length), - mi: round4(filtered.filter((r) => r.mi > 0).length / filtered.length), - }; - - const scores = scored.map((it) => it.riskScore); - const avgScore = - scores.length > 0 ? round4(scores.reduce((a, b) => a + b, 0) / scores.length) : 0; - const maxScore = scores.length > 0 ? round4(Math.max(...scores)) : 0; + scored.sort(SORT_FNS[sort] || SORT_FNS.risk); const result = { items: scored, - summary: { - total: filtered.length, - analyzed: scored.length, - avgScore, - maxScore, - weights, - signalCoverage, - }, + summary: computeTriageSummary(filtered, scored, weights), }; return paginateResult(result, 'items', { From fed3b7cc240c20f947d9f5744b6ac3321701e349 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:56:08 -0600 Subject: [PATCH 11/22] fix: split data fetching from formatting in presentation queries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract printDiffFunctions/Coupled/Ownership/Boundaries/Summary from diffImpact (cog 28→6, cyc 21→7). Extract printExportHeader/Symbols from fileExports. Extract printNotFound/PathSteps from symbolPath. Impact: 12 functions changed, 7 affected --- src/presentation/queries-cli/exports.js | 35 +++++---- src/presentation/queries-cli/impact.js | 94 +++++++++++++++---------- src/presentation/queries-cli/path.js | 65 +++++++++-------- 3 files changed, 112 insertions(+), 82 deletions(-) diff --git a/src/presentation/queries-cli/exports.js b/src/presentation/queries-cli/exports.js index ea7dcade..fe06f731 100644 --- a/src/presentation/queries-cli/exports.js +++ b/src/presentation/queries-cli/exports.js @@ -1,19 +1,7 @@ import { exportsData, kindIcon } from '../../domain/queries.js'; import { outputResult } from '../../infrastructure/result-formatter.js'; -export function fileExports(file, customDbPath, opts = {}) { - const data = exportsData(file, customDbPath, opts); - if (outputResult(data, 'results', opts)) return; - - if (data.results.length === 0) { - if (opts.unused) { - console.log(`No unused exports found for "${file}".`); - } else { - console.log(`No exported symbols found for "${file}". Run "codegraph build" first.`); - } - return; - } - +function printExportHeader(data, opts) { if (opts.unused) { console.log( `\n# ${data.file} — ${data.totalUnused} unused export${data.totalUnused !== 1 ? 's' : ''} (of ${data.totalExported} exported)\n`, @@ -24,8 +12,10 @@ export function fileExports(file, customDbPath, opts = {}) { `\n# ${data.file} — ${data.totalExported} exported${unusedNote}, ${data.totalInternal} internal\n`, ); } +} - for (const sym of data.results) { +function printExportSymbols(results) { + for (const sym of results) { const icon = kindIcon(sym.kind); const sig = sym.signature?.params ? `(${sym.signature.params})` : ''; const role = sym.role ? ` [${sym.role}]` : ''; @@ -38,6 +28,23 @@ export function fileExports(file, customDbPath, opts = {}) { } } } +} + +export function fileExports(file, customDbPath, opts = {}) { + const data = exportsData(file, customDbPath, opts); + if (outputResult(data, 'results', opts)) return; + + if (data.results.length === 0) { + if (opts.unused) { + console.log(`No unused exports found for "${file}".`); + } else { + console.log(`No exported symbols found for "${file}". Run "codegraph build" first.`); + } + return; + } + + printExportHeader(data, opts); + printExportSymbols(data.results); if (data.reexports.length > 0) { console.log(`\n Re-exports: ${data.reexports.map((r) => r.file).join(', ')}`); diff --git a/src/presentation/queries-cli/impact.js b/src/presentation/queries-cli/impact.js index 176172be..511cb42e 100644 --- a/src/presentation/queries-cli/impact.js +++ b/src/presentation/queries-cli/impact.js @@ -132,6 +132,56 @@ export function fnImpact(name, customDbPath, opts = {}) { } } +function printDiffFunctions(data) { + console.log(`\ndiff-impact: ${data.changedFiles} files changed\n`); + console.log(` ${data.affectedFunctions.length} functions changed:\n`); + for (const fn of data.affectedFunctions) { + console.log(` ${kindIcon(fn.kind)} ${fn.name} -- ${fn.file}:${fn.line}`); + if (fn.transitiveCallers > 0) console.log(` ^ ${fn.transitiveCallers} transitive callers`); + } +} + +function printDiffCoupled(data) { + if (!data.historicallyCoupled?.length) return; + console.log('\n Historically coupled (not in static graph):\n'); + for (const c of data.historicallyCoupled) { + const pct = `${(c.jaccard * 100).toFixed(0)}%`; + console.log( + ` ${c.file} <- coupled with ${c.coupledWith} (${pct}, ${c.commitCount} commits)`, + ); + } +} + +function printDiffOwnership(data) { + if (!data.ownership) return; + console.log(`\n Affected owners: ${data.ownership.affectedOwners.join(', ')}`); + console.log(` Suggested reviewers: ${data.ownership.suggestedReviewers.join(', ')}`); +} + +function printDiffBoundaries(data) { + if (!data.boundaryViolations?.length) return; + console.log(`\n Boundary violations (${data.boundaryViolationCount}):\n`); + for (const v of data.boundaryViolations) { + console.log(` [${v.name}] ${v.file} -> ${v.targetFile}`); + if (v.message) console.log(` ${v.message}`); + } +} + +function printDiffSummary(summary) { + if (!summary) return; + let line = `\n Summary: ${summary.functionsChanged} functions changed -> ${summary.callersAffected} callers affected across ${summary.filesAffected} files`; + if (summary.historicallyCoupledCount > 0) { + line += `, ${summary.historicallyCoupledCount} historically coupled`; + } + if (summary.ownersAffected > 0) { + line += `, ${summary.ownersAffected} owners affected`; + } + if (summary.boundaryViolationCount > 0) { + line += `, ${summary.boundaryViolationCount} boundary violations`; + } + console.log(`${line}\n`); +} + export function diffImpact(customDbPath, opts = {}) { if (opts.format === 'mermaid') { console.log(diffImpactMermaid(customDbPath, opts)); @@ -156,43 +206,9 @@ export function diffImpact(customDbPath, opts = {}) { return; } - console.log(`\ndiff-impact: ${data.changedFiles} files changed\n`); - console.log(` ${data.affectedFunctions.length} functions changed:\n`); - for (const fn of data.affectedFunctions) { - console.log(` ${kindIcon(fn.kind)} ${fn.name} -- ${fn.file}:${fn.line}`); - if (fn.transitiveCallers > 0) console.log(` ^ ${fn.transitiveCallers} transitive callers`); - } - if (data.historicallyCoupled && data.historicallyCoupled.length > 0) { - console.log('\n Historically coupled (not in static graph):\n'); - for (const c of data.historicallyCoupled) { - const pct = `${(c.jaccard * 100).toFixed(0)}%`; - console.log( - ` ${c.file} <- coupled with ${c.coupledWith} (${pct}, ${c.commitCount} commits)`, - ); - } - } - if (data.ownership) { - console.log(`\n Affected owners: ${data.ownership.affectedOwners.join(', ')}`); - console.log(` Suggested reviewers: ${data.ownership.suggestedReviewers.join(', ')}`); - } - if (data.boundaryViolations && data.boundaryViolations.length > 0) { - console.log(`\n Boundary violations (${data.boundaryViolationCount}):\n`); - for (const v of data.boundaryViolations) { - console.log(` [${v.name}] ${v.file} -> ${v.targetFile}`); - if (v.message) console.log(` ${v.message}`); - } - } - if (data.summary) { - let summaryLine = `\n Summary: ${data.summary.functionsChanged} functions changed -> ${data.summary.callersAffected} callers affected across ${data.summary.filesAffected} files`; - if (data.summary.historicallyCoupledCount > 0) { - summaryLine += `, ${data.summary.historicallyCoupledCount} historically coupled`; - } - if (data.summary.ownersAffected > 0) { - summaryLine += `, ${data.summary.ownersAffected} owners affected`; - } - if (data.summary.boundaryViolationCount > 0) { - summaryLine += `, ${data.summary.boundaryViolationCount} boundary violations`; - } - console.log(`${summaryLine}\n`); - } + printDiffFunctions(data); + printDiffCoupled(data); + printDiffOwnership(data); + printDiffBoundaries(data); + printDiffSummary(data.summary); } diff --git a/src/presentation/queries-cli/path.js b/src/presentation/queries-cli/path.js index fbdaafa5..9d61b1a6 100644 --- a/src/presentation/queries-cli/path.js +++ b/src/presentation/queries-cli/path.js @@ -1,6 +1,40 @@ import { kindIcon, pathData } from '../../domain/queries.js'; import { outputResult } from '../../infrastructure/result-formatter.js'; +function printNotFound(from, to, data) { + const dir = data.reverse ? 'reverse ' : ''; + console.log(`No ${dir}path from "${from}" to "${to}" within ${data.maxDepth} hops.`); + if (data.fromCandidates.length > 1) { + console.log( + `\n "${from}" matched ${data.fromCandidates.length} symbols — using top match: ${data.fromCandidates[0].name} (${data.fromCandidates[0].file}:${data.fromCandidates[0].line})`, + ); + } + if (data.toCandidates.length > 1) { + console.log( + ` "${to}" matched ${data.toCandidates.length} symbols — using top match: ${data.toCandidates[0].name} (${data.toCandidates[0].file}:${data.toCandidates[0].line})`, + ); + } +} + +function printPathSteps(data) { + for (let i = 0; i < data.path.length; i++) { + const n = data.path[i]; + const indent = ' '.repeat(i + 1); + if (i === 0) { + console.log(`${indent}${kindIcon(n.kind)} ${n.name} (${n.kind}) -- ${n.file}:${n.line}`); + } else { + console.log( + `${indent}--[${n.edgeKind}]--> ${kindIcon(n.kind)} ${n.name} (${n.kind}) -- ${n.file}:${n.line}`, + ); + } + } + if (data.alternateCount > 0) { + console.log( + `\n (${data.alternateCount} alternate shortest ${data.alternateCount === 1 ? 'path' : 'paths'} at same depth)`, + ); + } +} + export function symbolPath(from, to, customDbPath, opts = {}) { const data = pathData(from, to, customDbPath, opts); if (outputResult(data, null, opts)) return; @@ -11,18 +45,7 @@ export function symbolPath(from, to, customDbPath, opts = {}) { } if (!data.found) { - const dir = data.reverse ? 'reverse ' : ''; - console.log(`No ${dir}path from "${from}" to "${to}" within ${data.maxDepth} hops.`); - if (data.fromCandidates.length > 1) { - console.log( - `\n "${from}" matched ${data.fromCandidates.length} symbols — using top match: ${data.fromCandidates[0].name} (${data.fromCandidates[0].file}:${data.fromCandidates[0].line})`, - ); - } - if (data.toCandidates.length > 1) { - console.log( - ` "${to}" matched ${data.toCandidates.length} symbols — using top match: ${data.toCandidates[0].name} (${data.toCandidates[0].file}:${data.toCandidates[0].line})`, - ); - } + printNotFound(from, to, data); return; } @@ -37,22 +60,6 @@ export function symbolPath(from, to, customDbPath, opts = {}) { console.log( `\nPath from ${from} to ${to} (${data.hops} ${data.hops === 1 ? 'hop' : 'hops'})${dir}:\n`, ); - for (let i = 0; i < data.path.length; i++) { - const n = data.path[i]; - const indent = ' '.repeat(i + 1); - if (i === 0) { - console.log(`${indent}${kindIcon(n.kind)} ${n.name} (${n.kind}) -- ${n.file}:${n.line}`); - } else { - console.log( - `${indent}--[${n.edgeKind}]--> ${kindIcon(n.kind)} ${n.name} (${n.kind}) -- ${n.file}:${n.line}`, - ); - } - } - - if (data.alternateCount > 0) { - console.log( - `\n (${data.alternateCount} alternate shortest ${data.alternateCount === 1 ? 'path' : 'paths'} at same depth)`, - ); - } + printPathSteps(data); console.log(); } From 8680a252e1e1d965803e96ea15265e3fdd569666 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:59:35 -0600 Subject: [PATCH 12/22] fix: extract subcommand dispatch in check, triage CLI and MCP server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract runManifesto/validateKind from check execute (cyc 14→10). Extract runHotspots/validateFilters/parseWeights from triage execute (cyc 13→4). Extract loadMCPSdk/createLazyLoaders/resolveDbPath/validateMultiRepoAccess from startMCPServer (cog 34→13, cyc 19→7). Impact: 14 functions changed, 4 affected --- src/cli/commands/check.js | 40 ++++++------ src/cli/commands/triage.js | 62 +++++++++++-------- src/mcp/server.js | 121 ++++++++++++++++++++----------------- 3 files changed, 119 insertions(+), 104 deletions(-) diff --git a/src/cli/commands/check.js b/src/cli/commands/check.js index 24cd9a63..501e4aa4 100644 --- a/src/cli/commands/check.js +++ b/src/cli/commands/check.js @@ -2,6 +2,22 @@ import { EVERY_SYMBOL_KIND } from '../../domain/queries.js'; import { ConfigError } from '../../shared/errors.js'; import { config } from '../shared/options.js'; +function validateKind(kind) { + if (kind && !EVERY_SYMBOL_KIND.includes(kind)) { + throw new ConfigError(`Invalid kind "${kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); + } +} + +async function runManifesto(opts, qOpts) { + validateKind(opts.kind); + const { manifesto } = await import('../../presentation/manifesto.js'); + manifesto(opts.db, { + file: opts.file, + kind: opts.kind, + ...qOpts, + }); +} + export const command = { name: 'check [ref]', description: @@ -29,17 +45,7 @@ export const command = { const qOpts = ctx.resolveQueryOpts(opts); if (!isDiffMode && !opts.rules) { - if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { - throw new ConfigError( - `Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`, - ); - } - const { manifesto } = await import('../../presentation/manifesto.js'); - manifesto(opts.db, { - file: opts.file, - kind: opts.kind, - ...qOpts, - }); + await runManifesto(opts, qOpts); return; } @@ -58,17 +64,7 @@ export const command = { }); if (opts.rules) { - if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { - throw new ConfigError( - `Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`, - ); - } - const { manifesto } = await import('../../presentation/manifesto.js'); - manifesto(opts.db, { - file: opts.file, - kind: opts.kind, - ...qOpts, - }); + await runManifesto(opts, qOpts); } }, }; diff --git a/src/cli/commands/triage.js b/src/cli/commands/triage.js index 5a8a570f..828b5623 100644 --- a/src/cli/commands/triage.js +++ b/src/cli/commands/triage.js @@ -1,6 +1,39 @@ import { EVERY_SYMBOL_KIND, VALID_ROLES } from '../../domain/queries.js'; import { ConfigError } from '../../shared/errors.js'; +function validateFilters(opts) { + if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { + throw new ConfigError(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); + } + if (opts.role && !VALID_ROLES.includes(opts.role)) { + throw new ConfigError(`Invalid role "${opts.role}". Valid: ${VALID_ROLES.join(', ')}`); + } +} + +function parseWeights(raw) { + if (!raw) return undefined; + try { + return JSON.parse(raw); + } catch (err) { + throw new ConfigError('Invalid --weights JSON', { cause: err }); + } +} + +async function runHotspots(opts, ctx) { + const { hotspotsData, formatHotspots } = await import('../../presentation/structure.js'); + const metric = opts.sort === 'risk' ? 'fan-in' : opts.sort; + const data = hotspotsData(opts.db, { + metric, + level: opts.level, + limit: parseInt(opts.limit, 10), + offset: opts.offset ? parseInt(opts.offset, 10) : undefined, + noTests: ctx.resolveNoTests(opts), + }); + if (!ctx.outputResult(data, 'hotspots', opts)) { + console.log(formatHotspots(data)); + } +} + export const command = { name: 'triage', description: @@ -31,35 +64,12 @@ export const command = { ], async execute(_args, opts, ctx) { if (opts.level === 'file' || opts.level === 'directory') { - const { hotspotsData, formatHotspots } = await import('../../presentation/structure.js'); - const metric = opts.sort === 'risk' ? 'fan-in' : opts.sort; - const data = hotspotsData(opts.db, { - metric, - level: opts.level, - limit: parseInt(opts.limit, 10), - offset: opts.offset ? parseInt(opts.offset, 10) : undefined, - noTests: ctx.resolveNoTests(opts), - }); - if (!ctx.outputResult(data, 'hotspots', opts)) { - console.log(formatHotspots(data)); - } + await runHotspots(opts, ctx); return; } - if (opts.kind && !EVERY_SYMBOL_KIND.includes(opts.kind)) { - throw new ConfigError(`Invalid kind "${opts.kind}". Valid: ${EVERY_SYMBOL_KIND.join(', ')}`); - } - if (opts.role && !VALID_ROLES.includes(opts.role)) { - throw new ConfigError(`Invalid role "${opts.role}". Valid: ${VALID_ROLES.join(', ')}`); - } - let weights; - if (opts.weights) { - try { - weights = JSON.parse(opts.weights); - } catch (err) { - throw new ConfigError('Invalid --weights JSON', { cause: err }); - } - } + validateFilters(opts); + const weights = parseWeights(opts.weights); const { triage } = await import('../../presentation/triage.js'); triage(opts.db, { limit: parseInt(opts.limit, 10), diff --git a/src/mcp/server.js b/src/mcp/server.js index 464fafaf..bae0bbbf 100644 --- a/src/mcp/server.js +++ b/src/mcp/server.js @@ -21,45 +21,84 @@ import { TOOL_HANDLERS } from './tools/index.js'; * @param {boolean} [options.multiRepo] - Enable multi-repo access (default: false) * @param {string[]} [options.allowedRepos] - Restrict access to these repo names only */ -export async function startMCPServer(customDbPath, options = {}) { - const { allowedRepos } = options; - const multiRepo = options.multiRepo || !!allowedRepos; - let Server, StdioServerTransport, ListToolsRequestSchema, CallToolRequestSchema; +async function loadMCPSdk() { try { const sdk = await import('@modelcontextprotocol/sdk/server/index.js'); - Server = sdk.Server; const transport = await import('@modelcontextprotocol/sdk/server/stdio.js'); - StdioServerTransport = transport.StdioServerTransport; const types = await import('@modelcontextprotocol/sdk/types.js'); - ListToolsRequestSchema = types.ListToolsRequestSchema; - CallToolRequestSchema = types.CallToolRequestSchema; + return { + Server: sdk.Server, + StdioServerTransport: transport.StdioServerTransport, + ListToolsRequestSchema: types.ListToolsRequestSchema, + CallToolRequestSchema: types.CallToolRequestSchema, + }; } catch { throw new ConfigError( 'MCP server requires @modelcontextprotocol/sdk.\nInstall it with: npm install @modelcontextprotocol/sdk', ); } +} - // Connect transport FIRST so the server can receive the client's - // `initialize` request while heavy modules (queries, better-sqlite3) - // are still loading. These are lazy-loaded on the first tool call - // and cached for subsequent calls. +function createLazyLoaders() { let _queries; let _Database; + return { + async getQueries() { + if (!_queries) _queries = await import('../domain/queries.js'); + return _queries; + }, + getDatabase() { + if (!_Database) { + const require = createRequire(import.meta.url); + _Database = require('better-sqlite3'); + } + return _Database; + }, + }; +} - async function getQueries() { - if (!_queries) { - _queries = await import('../domain/queries.js'); +async function resolveDbPath(customDbPath, args, allowedRepos) { + let dbPath = customDbPath || undefined; + if (args.repo) { + if (allowedRepos && !allowedRepos.includes(args.repo)) { + throw new ConfigError(`Repository "${args.repo}" is not in the allowed repos list.`); } - return _queries; + const { resolveRepoDbPath } = await import('../infrastructure/registry.js'); + const resolved = resolveRepoDbPath(args.repo); + if (!resolved) + throw new ConfigError( + `Repository "${args.repo}" not found in registry or its database is missing.`, + ); + dbPath = resolved; } + return dbPath; +} - function getDatabase() { - if (!_Database) { - const require = createRequire(import.meta.url); - _Database = require('better-sqlite3'); - } - return _Database; +function validateMultiRepoAccess(multiRepo, name, args) { + if (!multiRepo && args.repo) { + throw new ConfigError( + 'Multi-repo access is disabled. Restart with `codegraph mcp --multi-repo` to access other repositories.', + ); + } + if (!multiRepo && name === 'list_repos') { + throw new ConfigError( + 'Multi-repo access is disabled. Restart with `codegraph mcp --multi-repo` to list repositories.', + ); } +} + +export async function startMCPServer(customDbPath, options = {}) { + const { allowedRepos } = options; + const multiRepo = options.multiRepo || !!allowedRepos; + + const { Server, StdioServerTransport, ListToolsRequestSchema, CallToolRequestSchema } = + await loadMCPSdk(); + + // Connect transport FIRST so the server can receive the client's + // `initialize` request while heavy modules (queries, better-sqlite3) + // are still loading. These are lazy-loaded on the first tool call + // and cached for subsequent calls. + const { getQueries, getDatabase } = createLazyLoaders(); const server = new Server( { name: 'codegraph', version: '1.0.0' }, @@ -73,47 +112,17 @@ export async function startMCPServer(customDbPath, options = {}) { server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; try { - if (!multiRepo && args.repo) { - throw new ConfigError( - 'Multi-repo access is disabled. Restart with `codegraph mcp --multi-repo` to access other repositories.', - ); - } - if (!multiRepo && name === 'list_repos') { - throw new ConfigError( - 'Multi-repo access is disabled. Restart with `codegraph mcp --multi-repo` to list repositories.', - ); - } - - let dbPath = customDbPath || undefined; - if (args.repo) { - if (allowedRepos && !allowedRepos.includes(args.repo)) { - throw new ConfigError(`Repository "${args.repo}" is not in the allowed repos list.`); - } - const { resolveRepoDbPath } = await import('../infrastructure/registry.js'); - const resolved = resolveRepoDbPath(args.repo); - if (!resolved) - throw new ConfigError( - `Repository "${args.repo}" not found in registry or its database is missing.`, - ); - dbPath = resolved; - } + validateMultiRepoAccess(multiRepo, name, args); + const dbPath = await resolveDbPath(customDbPath, args, allowedRepos); const toolEntry = TOOL_HANDLERS.get(name); if (!toolEntry) { return { content: [{ type: 'text', text: `Unknown tool: ${name}` }], isError: true }; } - const ctx = { - dbPath, - getQueries, - getDatabase, - findDbPath, - allowedRepos, - MCP_MAX_LIMIT, - }; - + const ctx = { dbPath, getQueries, getDatabase, findDbPath, allowedRepos, MCP_MAX_LIMIT }; const result = await toolEntry.handler(args, ctx); - if (result?.content) return result; // pass-through MCP responses + if (result?.content) return result; return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] }; } catch (err) { const code = err instanceof CodegraphError ? err.code : 'UNKNOWN_ERROR'; From b032fa152016cdf181fe4cded6aa24211dc51a47 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 04:31:52 -0600 Subject: [PATCH 13/22] fix: add test-only role for symbols called exclusively from test files (#488) Symbols whose only callers are in test files were inflated as core/utility based on test fan-in. Compute production fan-in at build time and classify these as test-only so roles reflect production architecture accurately. Impact: 2 functions changed, 1 affected --- src/features/structure.js | 25 +++++++++++++++++-- src/graph/classifiers/roles.js | 9 ++++++- src/shared/kinds.js | 2 +- tests/graph/classifiers/roles.test.js | 35 +++++++++++++++++++++++++++ tests/unit/roles.test.js | 10 +++++++- 5 files changed, 76 insertions(+), 5 deletions(-) diff --git a/src/features/structure.js b/src/features/structure.js index 7f582076..e5acf7b5 100644 --- a/src/features/structure.js +++ b/src/features/structure.js @@ -349,7 +349,7 @@ export function classifyNodeRoles(db) { .all(); if (rows.length === 0) { - return { entry: 0, core: 0, utility: 0, adapter: 0, dead: 0, leaf: 0 }; + return { entry: 0, core: 0, utility: 0, adapter: 0, dead: 0, leaf: 0, 'test-only': 0 }; } const exportedIds = new Set( @@ -365,6 +365,26 @@ export function classifyNodeRoles(db) { .map((r) => r.target_id), ); + // Compute production fan-in (excluding callers in test files) + const prodFanInMap = new Map(); + const prodRows = db + .prepare( + `SELECT e.target_id, COUNT(*) AS cnt + FROM edges e + JOIN nodes caller ON e.source_id = caller.id + WHERE e.kind = 'calls' + AND caller.file NOT LIKE '%.test.%' + AND caller.file NOT LIKE '%.spec.%' + AND caller.file NOT LIKE '%__test__%' + AND caller.file NOT LIKE '%__tests__%' + AND caller.file NOT LIKE '%.stories.%' + GROUP BY e.target_id`, + ) + .all(); + for (const r of prodRows) { + prodFanInMap.set(r.target_id, r.cnt); + } + // Delegate classification to the pure-logic classifier const classifierInput = rows.map((r) => ({ id: String(r.id), @@ -372,12 +392,13 @@ export function classifyNodeRoles(db) { fanIn: r.fan_in, fanOut: r.fan_out, isExported: exportedIds.has(r.id), + productionFanIn: prodFanInMap.get(r.id) || 0, })); const roleMap = classifyRoles(classifierInput); // Build summary and updates - const summary = { entry: 0, core: 0, utility: 0, adapter: 0, dead: 0, leaf: 0 }; + const summary = { entry: 0, core: 0, utility: 0, adapter: 0, dead: 0, leaf: 0, 'test-only': 0 }; const updates = []; for (const row of rows) { const role = roleMap.get(String(row.id)) || 'leaf'; diff --git a/src/graph/classifiers/roles.js b/src/graph/classifiers/roles.js index 394197d3..964b19a7 100644 --- a/src/graph/classifiers/roles.js +++ b/src/graph/classifiers/roles.js @@ -15,7 +15,11 @@ function median(sorted) { /** * Classify nodes into architectural roles based on fan-in/fan-out metrics. * - * @param {{ id: string, name: string, fanIn: number, fanOut: number, isExported: boolean }[]} nodes + * When `productionFanIn` is provided on a node (number >= 0), symbols with + * callers exclusively in test files (fanIn > 0, productionFanIn === 0) are + * classified as `test-only` instead of inflating production role metrics. + * + * @param {{ id: string, name: string, fanIn: number, fanOut: number, isExported: boolean, productionFanIn?: number }[]} nodes * @returns {Map} nodeId → role */ export function classifyRoles(nodes) { @@ -38,6 +42,7 @@ export function classifyRoles(nodes) { for (const node of nodes) { const highIn = node.fanIn >= medFanIn && node.fanIn > 0; const highOut = node.fanOut >= medFanOut && node.fanOut > 0; + const hasProdFanIn = typeof node.productionFanIn === 'number'; let role; const isFrameworkEntry = FRAMEWORK_ENTRY_PREFIXES.some((p) => node.name.startsWith(p)); @@ -47,6 +52,8 @@ export function classifyRoles(nodes) { role = 'dead'; } else if (node.fanIn === 0 && node.isExported) { role = 'entry'; + } else if (hasProdFanIn && node.productionFanIn === 0) { + role = 'test-only'; } else if (highIn && !highOut) { role = 'core'; } else if (highIn && highOut) { diff --git a/src/shared/kinds.js b/src/shared/kinds.js index 3f469c43..3a046889 100644 --- a/src/shared/kinds.js +++ b/src/shared/kinds.js @@ -47,4 +47,4 @@ export const STRUCTURAL_EDGE_KINDS = ['parameter_of', 'receiver']; // Full set for MCP enum and validation export const EVERY_EDGE_KIND = [...CORE_EDGE_KINDS, ...STRUCTURAL_EDGE_KINDS]; -export const VALID_ROLES = ['entry', 'core', 'utility', 'adapter', 'dead', 'leaf']; +export const VALID_ROLES = ['entry', 'core', 'utility', 'adapter', 'dead', 'leaf', 'test-only']; diff --git a/tests/graph/classifiers/roles.test.js b/tests/graph/classifiers/roles.test.js index b790996c..b5c65f82 100644 --- a/tests/graph/classifiers/roles.test.js +++ b/tests/graph/classifiers/roles.test.js @@ -60,4 +60,39 @@ describe('classifyRoles', () => { const roles = classifyRoles(nodes); expect(roles.get('1')).toBe('leaf'); }); + + it('classifies test-only (has callers but all in test files)', () => { + const nodes = [ + { id: '1', name: 'helper', fanIn: 5, fanOut: 2, isExported: false, productionFanIn: 0 }, + { id: '2', name: 'coreLib', fanIn: 10, fanOut: 1, isExported: true, productionFanIn: 10 }, + ]; + const roles = classifyRoles(nodes); + expect(roles.get('1')).toBe('test-only'); + expect(roles.get('2')).toBe('core'); + }); + + it('does not classify test-only when productionFanIn is not provided', () => { + // Backward compat: without productionFanIn, classification is unchanged + const nodes = [ + { id: '1', name: 'helper', fanIn: 5, fanOut: 2, isExported: false }, + { id: '2', name: 'other', fanIn: 1, fanOut: 1, isExported: true }, + ]; + const roles = classifyRoles(nodes); + expect(roles.get('1')).not.toBe('test-only'); + }); + + it('framework entry takes precedence over test-only', () => { + const nodes = [ + { + id: '1', + name: 'route:/health', + fanIn: 3, + fanOut: 1, + isExported: false, + productionFanIn: 0, + }, + ]; + const roles = classifyRoles(nodes); + expect(roles.get('1')).toBe('entry'); + }); }); diff --git a/tests/unit/roles.test.js b/tests/unit/roles.test.js index b703f899..5b531f38 100644 --- a/tests/unit/roles.test.js +++ b/tests/unit/roles.test.js @@ -151,7 +151,15 @@ describe('classifyNodeRoles', () => { it('handles empty graph without crashing', () => { const summary = classifyNodeRoles(db); - expect(summary).toEqual({ entry: 0, core: 0, utility: 0, adapter: 0, dead: 0, leaf: 0 }); + expect(summary).toEqual({ + entry: 0, + core: 0, + utility: 0, + adapter: 0, + dead: 0, + leaf: 0, + 'test-only': 0, + }); }); it('adapts median thresholds to data', () => { From 19d3d399fa3433bb079d1193d16fc5a209b01855 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Tue, 17 Mar 2026 06:01:17 -0600 Subject: [PATCH 14/22] feat: add titan-close skill with drift detection and issue tracking --- .claude/skills/titan-close/SKILL.md | 558 +++++++++++++++++++++++++ .claude/skills/titan-gate/SKILL.md | 107 ++++- .claude/skills/titan-gauntlet/SKILL.md | 161 +++++-- .claude/skills/titan-recon/SKILL.md | 28 +- .claude/skills/titan-reset/SKILL.md | 3 + .claude/skills/titan-sync/SKILL.md | 111 ++++- .gitignore | 1 + 7 files changed, 915 insertions(+), 54 deletions(-) create mode 100644 .claude/skills/titan-close/SKILL.md diff --git a/.claude/skills/titan-close/SKILL.md b/.claude/skills/titan-close/SKILL.md new file mode 100644 index 00000000..b9727328 --- /dev/null +++ b/.claude/skills/titan-close/SKILL.md @@ -0,0 +1,558 @@ +--- +name: titan-close +description: Split branch commits into focused PRs, compile issue tracker, generate final report with before/after metrics (Titan Paradigm Phase 5) +argument-hint: <--dry-run to preview without creating PRs> +allowed-tools: Bash, Read, Write, Glob, Grep, Edit +--- + +# Titan CLOSE — PR Splitting & Final Report + +You are running the **CLOSE** phase of the Titan Paradigm. + +Your goal: analyze all commits on the current branch, split them into focused PRs for easier review, compile the issue tracker from all phases, capture final metrics, and generate a comprehensive audit report. + +> **Context budget:** This phase reads artifacts and git history. Keep codegraph queries targeted — only for final metrics comparison. + +**Dry-run mode:** If `$ARGUMENTS` contains `--dry-run`, preview the PR split plan and report without creating PRs or pushing branches. + +--- + +## Step 0 — Pre-flight: find and consolidate the Titan session + +1. **Locate the Titan session.** All prior phases (RECON → GAUNTLET → SYNC → GATE) may have run across different worktrees or branches. You need to consolidate their work. + + ```bash + git worktree list + ``` + + For each worktree, check for Titan artifacts: + ```bash + ls /.codegraph/titan/titan-state.json 2>/dev/null + ``` + + Also check branches (including remote): + ```bash + git branch -a --list '*titan*' + git branch -a --list '*refactor/*' + ``` + + **Decision logic:** + - **Found exactly one worktree/branch with `titan-state.json`:** Read its `currentPhase`. If it's `"sync"` or later, this is the right session. Merge its branch into your worktree. + - **Found a worktree but `currentPhase` is earlier than expected (e.g., `"recon"` or `"gauntlet"`):** The pipeline may not be complete. Keep searching — there may be a more advanced worktree. If nothing better found, ask the user: "Found Titan state at `` with phase ``. The pipeline appears incomplete. Continue anyway, or should I look elsewhere?" + - **Found multiple worktrees with `titan-state.json`:** List them all with `currentPhase`, `lastUpdated`, and branch name. The Titan pipeline may have been split across worktrees (RECON in one, GAUNTLET in another). Merge them in phase order into your worktree. If there's ambiguity (e.g., two worktrees at the same phase), ask the user. + - **Found branches but no worktrees:** Merge the titan branch(es) in phase order: `git merge --no-edit` + - **Found nothing:** Stop: "No Titan session found in any worktree or branch. Run `/titan-recon` first." + +2. **Ensure worktree isolation:** + ```bash + git rev-parse --show-toplevel && git worktree list + ``` + If not in a worktree, stop: "Run `/worktree` first." + +3. **Sync with main:** + ```bash + git fetch origin main && git merge origin/main --no-edit + ``` + If there are merge conflicts, stop: "Merge conflict detected. Resolve conflicts and re-run `/titan-close`." + +4. **Load artifacts.** Read: + - `.codegraph/titan/titan-state.json` — session state, baseline metrics, progress + - `.codegraph/titan/GLOBAL_ARCH.md` — architecture document + - `.codegraph/titan/gauntlet-summary.json` — audit results + - `.codegraph/titan/sync.json` — execution plan (commit grouping) + - `.codegraph/titan/gate-log.ndjson` — validation history + - `.codegraph/titan/issues.ndjson` — issue tracker from all phases + + If `titan-state.json` is missing after the search, stop: "No Titan session found. Run `/titan-recon` first." + +5. **Detect version.** Extract from `package.json`: + ```bash + node -e "console.log(require('./package.json').version)" + ``` + +--- + +## Step 1 — Drift detection: final staleness assessment + +CLOSE is the last phase — it must assess the full pipeline's freshness before generating the report. + +1. **Compare main SHA:** + ```bash + git rev-parse origin/main + ``` + Compare against `titan-state.json → mainSHA`. + +2. **If main has advanced**, calculate full drift: + ```bash + git rev-list --count ..origin/main + git diff --name-only ..origin/main + ``` + +3. **Read all prior drift reports** from `.codegraph/titan/drift-report.json`. This shows the cumulative drift across the pipeline. + +4. **Assess overall pipeline freshness:** + + | Level | Condition | Action | + |-------|-----------|--------| + | **fresh** | mainSHA matches current main, no drift reports with severity > low | Generate report normally | + | **acceptable** | Some drift detected but phases handled it (re-audited stale targets) | Generate report — note drift in Executive Summary | + | **stale** | Significant unaddressed drift: >10 commits behind, >20% of audited targets changed on main since audit | **Warn user:** "Pipeline results are partially stale. N targets were modified on main after being audited. The report will flag these. Consider re-running `/titan-gauntlet` for affected targets before finalizing." | + | **expired** | >50 commits behind OR >50% of targets changed OR architecture-level changes (new directories in src/) | **Stop:** "Pipeline results are too stale to produce a reliable report. Run `/titan-recon` for a fresh baseline." | + +5. **Write final drift assessment** to the drift report (same schema, `"detectedBy": "close"`). + +6. **Include drift summary in the report.** The final report's Executive Summary and Recommendations sections must reflect any staleness. Stale targets should be called out in a "Staleness Warnings" subsection. + +--- + +## Step 2 — Collect branch commit history + +```bash +git log main..HEAD --oneline --no-merges +git log main..HEAD --format="%H %s" --no-merges +``` + +Extract: total commit count, commit messages, SHAs. If zero commits, stop: "No commits on this branch. Nothing to close." + +For each commit, get the files changed: +```bash +git diff-tree --no-commit-id --name-only -r +``` + +--- + +## Step 3 — Classify commits into PR groups + +Analyze commit messages and changed files to group commits into **focused PRs**. Each PR should address a single concern for easier review. + +### Grouping strategy (in priority order) + +Use `sync.json` execution phases as the primary guide if available: + +1. **Dead code cleanup** — commits removing dead symbols + - PR title: `chore: remove dead code identified by Titan audit` +2. **Shared abstractions** — commits extracting interfaces/utilities + - PR title: `refactor: extract from ` +3. **Cycle breaks** — commits resolving circular dependencies + - PR title: `refactor: break circular dependency in ` +4. **Decompositions** — commits splitting complex functions/files + - PR title: `refactor: decompose in ` +5. **Quality fixes** — commits addressing fail-level violations + - Group by domain: `fix: address quality issues in ` +6. **Warning improvements** — commits addressing warn-level issues + - Group by domain: `refactor: improve code quality in ` + +### Fallback grouping (if no sync.json) + +Group by changed file paths — commits touching the same directory/domain go together. Use commit message prefixes (`fix:`, `refactor:`, `chore:`) as secondary signals. + +### Rules for grouping +- A PR should touch **one domain** where possible +- A PR should address **one concern** (don't mix dead code removal with refactors) +- Order PRs so dependencies come first (if PR B depends on PR A's changes, A merges first) +- Each PR must be independently reviewable — no PR should break the build alone +- If a commit touches files across multiple concerns, assign it to the primary concern and note the cross-cutting nature in the PR description + +Record the grouping plan: +```json +[ + { + "pr": 1, + "title": "...", + "concern": "dead_code|abstraction|cycle_break|decomposition|quality_fix|warning", + "domain": "", + "commits": ["", ""], + "files": ["", ""], + "dependsOn": [], + "description": "..." + } +] +``` + +--- + +## Step 4 — Capture final metrics + +Rebuild the graph and collect current metrics: + +```bash +codegraph build +codegraph stats --json +codegraph complexity --health --above-threshold -T --json --limit 50 +codegraph roles --role dead -T --json +codegraph roles --role core -T --json +codegraph cycles --json +``` + +Extract: `totalNodes`, `totalEdges`, `totalFiles`, `qualityScore`, functions above threshold, dead symbol count, core symbol count, cycle count. + +Also get the worst offenders for comparison: +```bash +codegraph complexity --health --sort effort -T --json --limit 10 +codegraph complexity --health --sort bugs -T --json --limit 10 +codegraph complexity --health --sort mi -T --json --limit 10 +``` + +### Compute deltas + +Compare final metrics against `titan-state.json` baseline: + +| Metric | Baseline | Final | Delta | +|--------|----------|-------|-------| +| Quality Score | from state | from stats | +/- | +| Functions above threshold | from state | from complexity | +/- | +| Dead symbols | from state | from roles | +/- | +| Cycles | from state | from cycles | +/- | +| Total nodes | from state | from stats | +/- | + +--- + +## Step 5 — Compile the issue tracker + +Read `.codegraph/titan/issues.ndjson`. Each line is a JSON object: + +```json +{"phase": "recon|gauntlet|sync|gate", "timestamp": "ISO 8601", "severity": "bug|limitation|suggestion", "category": "codegraph|tooling|process|codebase", "description": "...", "context": "optional detail"} +``` + +Group issues by category and severity. Summarize: +- **Codegraph bugs:** issues with codegraph itself (wrong output, crashes, missing features) +- **Tooling issues:** problems with the Titan pipeline or other tools +- **Process notes:** suggestions for improving the Titan workflow +- **Codebase observations:** structural concerns beyond what the audit covered + +--- + +## Step 6 — Compile the gate log + +Read `.codegraph/titan/gate-log.ndjson`. Summarize: +- Total gate runs +- Pass / Warn / Fail counts +- Rollbacks triggered +- Most common failure reasons + +--- + +## Step 7 — Generate the report + +### Report path + +``` +generated/titan/titan-report-v-T