diff --git a/crates/codegraph-core/src/extractors/r_lang.rs b/crates/codegraph-core/src/extractors/r_lang.rs index 256500ef..d5d89bdb 100644 --- a/crates/codegraph-core/src/extractors/r_lang.rs +++ b/crates/codegraph-core/src/extractors/r_lang.rs @@ -163,10 +163,14 @@ fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { handle_set_class(node, source, symbols); return; } - "setGeneric" | "setMethod" => { + "setGeneric" => { handle_set_generic(node, source, symbols); return; } + "setMethod" => { + handle_set_method(node, source, symbols); + return; + } _ => {} } } @@ -332,6 +336,23 @@ fn handle_set_generic(node: &Node, source: &[u8], symbols: &mut FileSymbols) { } } +// `setMethod("greet", "Person", function(x) ...)` registers an implementation +// of the generic `greet` — it is not a new top-level definition. Emitting a +// definition here produced two `function` nodes with the same name (one from +// setGeneric, one from setMethod) and broke resolution. Emit a call edge to +// the generic instead; the method body's calls are still picked up by the +// recursive walk of the anonymous function argument. +fn handle_set_method(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name) = first_argument_value(node, source, false) { + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + receiver: None, + }); + } +} + #[cfg(test)] mod tests { use super::*; @@ -439,6 +460,51 @@ mod tests { assert_eq!(d.kind, "function"); } + #[test] + fn set_method_does_not_duplicate_generic_definition() { + // Idiomatic S4: a setGeneric followed by setMethod implementations. + // Only the setGeneric should emit a definition — setMethod registers + // an implementation of the generic, which we model as a call edge. + let code = r#" +setGeneric("greet", function(x) standardGeneric("greet")) +setMethod("greet", "Person", function(x) paste("Hello", x@name)) +setMethod("greet", "Animal", function(x) paste("Hi", x@species)) +"#; + let s = parse_r(code); + let greet_defs: Vec<&Definition> = + s.definitions.iter().filter(|d| d.name == "greet").collect(); + assert_eq!( + greet_defs.len(), + 1, + "expected exactly one `greet` definition, got {greet_defs:#?}", + ); + assert_eq!(greet_defs[0].kind, "function"); + } + + #[test] + fn set_method_emits_call_to_generic() { + // setMethod registers an implementation of the generic. The fix emits + // a call edge to the generic so the dispatch relationship is visible + // in the graph. + let s = parse_r( + "setMethod(\"greet\", \"Person\", function(x) paste(\"Hello\", x@name))\n", + ); + let calls: Vec<&Call> = s.calls.iter().filter(|c| c.name == "greet").collect(); + assert_eq!(calls.len(), 1, "expected setMethod to emit one call to `greet`"); + } + + #[test] + fn set_method_body_calls_are_still_captured() { + // The recursive walk visits the anonymous function passed to + // setMethod, so calls inside the method body must still appear. + let s = parse_r( + "setMethod(\"greet\", \"Person\", function(x) { helper(x); validate(x) })\n", + ); + let names: Vec<&str> = s.calls.iter().map(|c| c.name.as_str()).collect(); + assert!(names.contains(&"helper"), "method body call `helper` not captured"); + assert!(names.contains(&"validate"), "method body call `validate` not captured"); + } + #[test] fn function_with_double_arrow_assignment() { // `<<-` is super-assignment in R; the JS extractor accepts it too. diff --git a/src/extractors/r.ts b/src/extractors/r.ts index 15e4be61..a6edac13 100644 --- a/src/extractors/r.ts +++ b/src/extractors/r.ts @@ -125,11 +125,16 @@ function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void { return; } - if (funcName === 'setGeneric' || funcName === 'setMethod') { + if (funcName === 'setGeneric') { handleSetGeneric(node, ctx); return; } + if (funcName === 'setMethod') { + handleSetMethod(node, ctx); + return; + } + // Regular call if (funcNode.type === 'identifier') { ctx.calls.push({ name: funcName, line: node.startPosition.row + 1 }); @@ -212,47 +217,55 @@ function handleLibraryCall(node: TreeSitterNode, ctx: ExtractorOutput): void { } function handleSourceCall(node: TreeSitterNode, ctx: ExtractorOutput): void { - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (!child || child.type !== 'arguments') continue; - for (let j = 0; j < child.childCount; j++) { - const arg = child.child(j); - if (!arg) continue; - if (arg.type === 'string') { - const text = arg.text.replace(/^["']|["']$/g, ''); - ctx.imports.push({ - source: text, - names: ['source'], - line: node.startPosition.row + 1, - }); - return; - } - } - } + // source() only accepts string literals — `source(varname)` is not an import. + const path = firstStringArgument(node); + if (path === null) return; + ctx.imports.push({ + source: path, + names: ['source'], + line: node.startPosition.row + 1, + }); } function handleSetClass(node: TreeSitterNode, ctx: ExtractorOutput): void { - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (!child || child.type !== 'arguments') continue; - for (let j = 0; j < child.childCount; j++) { - const arg = child.child(j); - if (!arg) continue; - if (arg.type === 'string') { - const name = arg.text.replace(/^["']|["']$/g, ''); - ctx.definitions.push({ - name, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - return; - } - } - } + const name = firstStringArgument(node); + if (name === null) return; + ctx.definitions.push({ + name, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); } function handleSetGeneric(node: TreeSitterNode, ctx: ExtractorOutput): void { + const name = firstStringArgument(node); + if (name === null) return; + ctx.definitions.push({ + name, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +// setMethod("greet", "Person", function(x) ...) registers an implementation of +// the generic `greet` — it is not a new top-level definition. Emitting a +// definition here produced two `function` nodes with the same name (one from +// setGeneric, one from setMethod) and broke resolution. Emit a call edge to +// the generic instead; the method body's calls are still picked up by the +// recursive walk of the anonymous function argument. +function handleSetMethod(node: TreeSitterNode, ctx: ExtractorOutput): void { + const name = firstStringArgument(node); + if (name === null) return; + ctx.calls.push({ name, line: node.startPosition.row + 1 }); +} + +// tree-sitter-r wraps each positional argument in an `argument` node that +// contains the actual `string` (or `identifier`) child, so the inner string +// must be unwrapped — checking `child.type === 'string'` directly misses it. +// Mirrors `first_argument_value` in the Rust extractor for parity. +function firstStringArgument(node: TreeSitterNode): string | null { for (let i = 0; i < node.childCount; i++) { const child = node.child(i); if (!child || child.type !== 'arguments') continue; @@ -260,15 +273,21 @@ function handleSetGeneric(node: TreeSitterNode, ctx: ExtractorOutput): void { const arg = child.child(j); if (!arg) continue; if (arg.type === 'string') { - const name = arg.text.replace(/^["']|["']$/g, ''); - ctx.definitions.push({ - name, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - return; + return stripQuotes(arg.text); + } + if (arg.type === 'argument') { + const valueNode = arg.childForFieldName('value'); + if (valueNode && valueNode.type === 'string') return stripQuotes(valueNode.text); + for (let k = 0; k < arg.childCount; k++) { + const inner = arg.child(k); + if (inner && inner.type === 'string') return stripQuotes(inner.text); + } } } } + return null; +} + +function stripQuotes(text: string): string { + return text.replace(/^["']|["']$/g, ''); } diff --git a/tests/parsers/r.test.ts b/tests/parsers/r.test.ts index 878a179b..57f142eb 100644 --- a/tests/parsers/r.test.ts +++ b/tests/parsers/r.test.ts @@ -53,4 +53,61 @@ mean(c(1, 2, 3))`); expect(symbols.imports).toContainEqual(expect.objectContaining({ source: 'dplyr' })); expect(symbols.imports.some((i) => i.source === 'package')).toBe(false); }); + + it('extracts source() imports', () => { + // Parity guard: native produces an import with `source: "service.R"`. + // The WASM extractor previously failed silently for the same reason as + // setClass/setGeneric — it didn't unwrap the `argument` node. + const symbols = parseR(`source("service.R")`); + expect(symbols.imports).toContainEqual( + expect.objectContaining({ source: 'service.R', names: ['source'] }), + ); + }); + + it('extracts a class definition from setClass', () => { + // Parity guard: the native extractor produces a `class` definition for + // `setClass(...)`; the WASM extractor previously failed silently because + // it did not unwrap the `argument` node around the string literal. + const symbols = parseR(`setClass("Person", representation(name = "character"))`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'Person', kind: 'class' }), + ); + }); + + it('extracts a function definition from setGeneric', () => { + // Same parity guard for setGeneric — was silently broken in WASM. + const symbols = parseR(`setGeneric("doIt", function(x) standardGeneric("doIt"))`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'doIt', kind: 'function' }), + ); + }); + + it('does not duplicate the generic definition when setMethod is present', () => { + // Idiomatic S4: a setGeneric followed by setMethod implementations. + // Only setGeneric should emit a function definition — setMethod registers + // an implementation, which we model as a call edge to the generic. + const symbols = parseR(` +setGeneric("greet", function(x) standardGeneric("greet")) +setMethod("greet", "Person", function(x) paste("Hello", x@name)) +setMethod("greet", "Animal", function(x) paste("Hi", x@species)) +`); + const greetDefs = symbols.definitions.filter((d) => d.name === 'greet'); + expect(greetDefs).toHaveLength(1); + expect(greetDefs[0]).toMatchObject({ kind: 'function' }); + }); + + it('emits a call to the generic for setMethod', () => { + const symbols = parseR(`setMethod("greet", "Person", function(x) paste("Hello", x@name))`); + const greetCalls = symbols.calls.filter((c) => c.name === 'greet'); + expect(greetCalls).toHaveLength(1); + }); + + it('still captures calls from inside the setMethod body', () => { + // The recursive walk visits the anonymous function passed to setMethod, + // so calls inside the method body must still appear in ctx.calls. + const symbols = parseR(`setMethod("greet", "Person", function(x) { helper(x); validate(x) })`); + const names = symbols.calls.map((c) => c.name); + expect(names).toContain('helper'); + expect(names).toContain('validate'); + }); });