From cca457017547b5b19fff8ebe3d742b3511bf7038 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 14 May 2026 17:49:49 -0600 Subject: [PATCH] fix(r): setMethod emits call edge, not duplicate definition (#1109) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In idiomatic S4 code, `setGeneric("greet", ...)` followed by `setMethod("greet", "Person", ...)` produced two `function` definition nodes for the same name. Split the handler: `setGeneric` still emits a function definition (the generic), while `setMethod` emits a call edge to the generic — modelling the implementation registration without duplicating the symbol. The method body's calls are still picked up by the recursive walk of the anonymous function argument. While auditing the WASM extractor, found three sibling handlers that silently produced no output because they didn't unwrap the `argument` node that tree-sitter-r places around each positional string literal: setClass, setGeneric, and source(). The native (Rust) extractor handled this correctly via `first_argument_value`. Introduced a parity helper `firstStringArgument` mirroring the Rust logic and routed all three handlers through it. Closes #1109. --- .../codegraph-core/src/extractors/r_lang.rs | 68 ++++++++++- src/extractors/r.ts | 107 +++++++++++------- tests/parsers/r.test.ts | 57 ++++++++++ 3 files changed, 187 insertions(+), 45 deletions(-) diff --git a/crates/codegraph-core/src/extractors/r_lang.rs b/crates/codegraph-core/src/extractors/r_lang.rs index 256500ef..d5d89bdb 100644 --- a/crates/codegraph-core/src/extractors/r_lang.rs +++ b/crates/codegraph-core/src/extractors/r_lang.rs @@ -163,10 +163,14 @@ fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { handle_set_class(node, source, symbols); return; } - "setGeneric" | "setMethod" => { + "setGeneric" => { handle_set_generic(node, source, symbols); return; } + "setMethod" => { + handle_set_method(node, source, symbols); + return; + } _ => {} } } @@ -332,6 +336,23 @@ fn handle_set_generic(node: &Node, source: &[u8], symbols: &mut FileSymbols) { } } +// `setMethod("greet", "Person", function(x) ...)` registers an implementation +// of the generic `greet` — it is not a new top-level definition. Emitting a +// definition here produced two `function` nodes with the same name (one from +// setGeneric, one from setMethod) and broke resolution. Emit a call edge to +// the generic instead; the method body's calls are still picked up by the +// recursive walk of the anonymous function argument. +fn handle_set_method(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name) = first_argument_value(node, source, false) { + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + receiver: None, + }); + } +} + #[cfg(test)] mod tests { use super::*; @@ -439,6 +460,51 @@ mod tests { assert_eq!(d.kind, "function"); } + #[test] + fn set_method_does_not_duplicate_generic_definition() { + // Idiomatic S4: a setGeneric followed by setMethod implementations. + // Only the setGeneric should emit a definition — setMethod registers + // an implementation of the generic, which we model as a call edge. + let code = r#" +setGeneric("greet", function(x) standardGeneric("greet")) +setMethod("greet", "Person", function(x) paste("Hello", x@name)) +setMethod("greet", "Animal", function(x) paste("Hi", x@species)) +"#; + let s = parse_r(code); + let greet_defs: Vec<&Definition> = + s.definitions.iter().filter(|d| d.name == "greet").collect(); + assert_eq!( + greet_defs.len(), + 1, + "expected exactly one `greet` definition, got {greet_defs:#?}", + ); + assert_eq!(greet_defs[0].kind, "function"); + } + + #[test] + fn set_method_emits_call_to_generic() { + // setMethod registers an implementation of the generic. The fix emits + // a call edge to the generic so the dispatch relationship is visible + // in the graph. + let s = parse_r( + "setMethod(\"greet\", \"Person\", function(x) paste(\"Hello\", x@name))\n", + ); + let calls: Vec<&Call> = s.calls.iter().filter(|c| c.name == "greet").collect(); + assert_eq!(calls.len(), 1, "expected setMethod to emit one call to `greet`"); + } + + #[test] + fn set_method_body_calls_are_still_captured() { + // The recursive walk visits the anonymous function passed to + // setMethod, so calls inside the method body must still appear. + let s = parse_r( + "setMethod(\"greet\", \"Person\", function(x) { helper(x); validate(x) })\n", + ); + let names: Vec<&str> = s.calls.iter().map(|c| c.name.as_str()).collect(); + assert!(names.contains(&"helper"), "method body call `helper` not captured"); + assert!(names.contains(&"validate"), "method body call `validate` not captured"); + } + #[test] fn function_with_double_arrow_assignment() { // `<<-` is super-assignment in R; the JS extractor accepts it too. diff --git a/src/extractors/r.ts b/src/extractors/r.ts index 15e4be61..a6edac13 100644 --- a/src/extractors/r.ts +++ b/src/extractors/r.ts @@ -125,11 +125,16 @@ function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void { return; } - if (funcName === 'setGeneric' || funcName === 'setMethod') { + if (funcName === 'setGeneric') { handleSetGeneric(node, ctx); return; } + if (funcName === 'setMethod') { + handleSetMethod(node, ctx); + return; + } + // Regular call if (funcNode.type === 'identifier') { ctx.calls.push({ name: funcName, line: node.startPosition.row + 1 }); @@ -212,47 +217,55 @@ function handleLibraryCall(node: TreeSitterNode, ctx: ExtractorOutput): void { } function handleSourceCall(node: TreeSitterNode, ctx: ExtractorOutput): void { - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (!child || child.type !== 'arguments') continue; - for (let j = 0; j < child.childCount; j++) { - const arg = child.child(j); - if (!arg) continue; - if (arg.type === 'string') { - const text = arg.text.replace(/^["']|["']$/g, ''); - ctx.imports.push({ - source: text, - names: ['source'], - line: node.startPosition.row + 1, - }); - return; - } - } - } + // source() only accepts string literals — `source(varname)` is not an import. + const path = firstStringArgument(node); + if (path === null) return; + ctx.imports.push({ + source: path, + names: ['source'], + line: node.startPosition.row + 1, + }); } function handleSetClass(node: TreeSitterNode, ctx: ExtractorOutput): void { - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (!child || child.type !== 'arguments') continue; - for (let j = 0; j < child.childCount; j++) { - const arg = child.child(j); - if (!arg) continue; - if (arg.type === 'string') { - const name = arg.text.replace(/^["']|["']$/g, ''); - ctx.definitions.push({ - name, - kind: 'class', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - return; - } - } - } + const name = firstStringArgument(node); + if (name === null) return; + ctx.definitions.push({ + name, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); } function handleSetGeneric(node: TreeSitterNode, ctx: ExtractorOutput): void { + const name = firstStringArgument(node); + if (name === null) return; + ctx.definitions.push({ + name, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +// setMethod("greet", "Person", function(x) ...) registers an implementation of +// the generic `greet` — it is not a new top-level definition. Emitting a +// definition here produced two `function` nodes with the same name (one from +// setGeneric, one from setMethod) and broke resolution. Emit a call edge to +// the generic instead; the method body's calls are still picked up by the +// recursive walk of the anonymous function argument. +function handleSetMethod(node: TreeSitterNode, ctx: ExtractorOutput): void { + const name = firstStringArgument(node); + if (name === null) return; + ctx.calls.push({ name, line: node.startPosition.row + 1 }); +} + +// tree-sitter-r wraps each positional argument in an `argument` node that +// contains the actual `string` (or `identifier`) child, so the inner string +// must be unwrapped — checking `child.type === 'string'` directly misses it. +// Mirrors `first_argument_value` in the Rust extractor for parity. +function firstStringArgument(node: TreeSitterNode): string | null { for (let i = 0; i < node.childCount; i++) { const child = node.child(i); if (!child || child.type !== 'arguments') continue; @@ -260,15 +273,21 @@ function handleSetGeneric(node: TreeSitterNode, ctx: ExtractorOutput): void { const arg = child.child(j); if (!arg) continue; if (arg.type === 'string') { - const name = arg.text.replace(/^["']|["']$/g, ''); - ctx.definitions.push({ - name, - kind: 'function', - line: node.startPosition.row + 1, - endLine: nodeEndLine(node), - }); - return; + return stripQuotes(arg.text); + } + if (arg.type === 'argument') { + const valueNode = arg.childForFieldName('value'); + if (valueNode && valueNode.type === 'string') return stripQuotes(valueNode.text); + for (let k = 0; k < arg.childCount; k++) { + const inner = arg.child(k); + if (inner && inner.type === 'string') return stripQuotes(inner.text); + } } } } + return null; +} + +function stripQuotes(text: string): string { + return text.replace(/^["']|["']$/g, ''); } diff --git a/tests/parsers/r.test.ts b/tests/parsers/r.test.ts index 878a179b..57f142eb 100644 --- a/tests/parsers/r.test.ts +++ b/tests/parsers/r.test.ts @@ -53,4 +53,61 @@ mean(c(1, 2, 3))`); expect(symbols.imports).toContainEqual(expect.objectContaining({ source: 'dplyr' })); expect(symbols.imports.some((i) => i.source === 'package')).toBe(false); }); + + it('extracts source() imports', () => { + // Parity guard: native produces an import with `source: "service.R"`. + // The WASM extractor previously failed silently for the same reason as + // setClass/setGeneric — it didn't unwrap the `argument` node. + const symbols = parseR(`source("service.R")`); + expect(symbols.imports).toContainEqual( + expect.objectContaining({ source: 'service.R', names: ['source'] }), + ); + }); + + it('extracts a class definition from setClass', () => { + // Parity guard: the native extractor produces a `class` definition for + // `setClass(...)`; the WASM extractor previously failed silently because + // it did not unwrap the `argument` node around the string literal. + const symbols = parseR(`setClass("Person", representation(name = "character"))`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'Person', kind: 'class' }), + ); + }); + + it('extracts a function definition from setGeneric', () => { + // Same parity guard for setGeneric — was silently broken in WASM. + const symbols = parseR(`setGeneric("doIt", function(x) standardGeneric("doIt"))`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'doIt', kind: 'function' }), + ); + }); + + it('does not duplicate the generic definition when setMethod is present', () => { + // Idiomatic S4: a setGeneric followed by setMethod implementations. + // Only setGeneric should emit a function definition — setMethod registers + // an implementation, which we model as a call edge to the generic. + const symbols = parseR(` +setGeneric("greet", function(x) standardGeneric("greet")) +setMethod("greet", "Person", function(x) paste("Hello", x@name)) +setMethod("greet", "Animal", function(x) paste("Hi", x@species)) +`); + const greetDefs = symbols.definitions.filter((d) => d.name === 'greet'); + expect(greetDefs).toHaveLength(1); + expect(greetDefs[0]).toMatchObject({ kind: 'function' }); + }); + + it('emits a call to the generic for setMethod', () => { + const symbols = parseR(`setMethod("greet", "Person", function(x) paste("Hello", x@name))`); + const greetCalls = symbols.calls.filter((c) => c.name === 'greet'); + expect(greetCalls).toHaveLength(1); + }); + + it('still captures calls from inside the setMethod body', () => { + // The recursive walk visits the anonymous function passed to setMethod, + // so calls inside the method body must still appear in ctx.calls. + const symbols = parseR(`setMethod("greet", "Person", function(x) { helper(x); validate(x) })`); + const names = symbols.calls.map((c) => c.name); + expect(names).toContain('helper'); + expect(names).toContain('validate'); + }); });