Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 67 additions & 1 deletion crates/codegraph-core/src/extractors/r_lang.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,14 @@ fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
handle_set_class(node, source, symbols);
return;
}
"setGeneric" | "setMethod" => {
"setGeneric" => {
handle_set_generic(node, source, symbols);
return;
}
"setMethod" => {
handle_set_method(node, source, symbols);
return;
}
_ => {}
}
}
Expand Down Expand Up @@ -332,6 +336,23 @@ fn handle_set_generic(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
}
}

// `setMethod("greet", "Person", function(x) ...)` registers an implementation
// of the generic `greet` — it is not a new top-level definition. Emitting a
// definition here produced two `function` nodes with the same name (one from
// setGeneric, one from setMethod) and broke resolution. Emit a call edge to
// the generic instead; the method body's calls are still picked up by the
// recursive walk of the anonymous function argument.
fn handle_set_method(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
if let Some(name) = first_argument_value(node, source, false) {
symbols.calls.push(Call {
name,
line: start_line(node),
dynamic: None,
receiver: None,
});
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -439,6 +460,51 @@ mod tests {
assert_eq!(d.kind, "function");
}

#[test]
fn set_method_does_not_duplicate_generic_definition() {
// Idiomatic S4: a setGeneric followed by setMethod implementations.
// Only the setGeneric should emit a definition — setMethod registers
// an implementation of the generic, which we model as a call edge.
let code = r#"
setGeneric("greet", function(x) standardGeneric("greet"))
setMethod("greet", "Person", function(x) paste("Hello", x@name))
setMethod("greet", "Animal", function(x) paste("Hi", x@species))
"#;
let s = parse_r(code);
let greet_defs: Vec<&Definition> =
s.definitions.iter().filter(|d| d.name == "greet").collect();
assert_eq!(
greet_defs.len(),
1,
"expected exactly one `greet` definition, got {greet_defs:#?}",
);
assert_eq!(greet_defs[0].kind, "function");
}

#[test]
fn set_method_emits_call_to_generic() {
// setMethod registers an implementation of the generic. The fix emits
// a call edge to the generic so the dispatch relationship is visible
// in the graph.
let s = parse_r(
"setMethod(\"greet\", \"Person\", function(x) paste(\"Hello\", x@name))\n",
);
let calls: Vec<&Call> = s.calls.iter().filter(|c| c.name == "greet").collect();
assert_eq!(calls.len(), 1, "expected setMethod to emit one call to `greet`");
}

#[test]
fn set_method_body_calls_are_still_captured() {
// The recursive walk visits the anonymous function passed to
// setMethod, so calls inside the method body must still appear.
let s = parse_r(
"setMethod(\"greet\", \"Person\", function(x) { helper(x); validate(x) })\n",
);
let names: Vec<&str> = s.calls.iter().map(|c| c.name.as_str()).collect();
assert!(names.contains(&"helper"), "method body call `helper` not captured");
assert!(names.contains(&"validate"), "method body call `validate` not captured");
}

#[test]
fn function_with_double_arrow_assignment() {
// `<<-` is super-assignment in R; the JS extractor accepts it too.
Expand Down
107 changes: 63 additions & 44 deletions src/extractors/r.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,16 @@ function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
return;
}

if (funcName === 'setGeneric' || funcName === 'setMethod') {
if (funcName === 'setGeneric') {
handleSetGeneric(node, ctx);
return;
}

if (funcName === 'setMethod') {
handleSetMethod(node, ctx);
return;
}

// Regular call
if (funcNode.type === 'identifier') {
ctx.calls.push({ name: funcName, line: node.startPosition.row + 1 });
Expand Down Expand Up @@ -212,63 +217,77 @@ function handleLibraryCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
}

function handleSourceCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
for (let i = 0; i < node.childCount; i++) {
const child = node.child(i);
if (!child || child.type !== 'arguments') continue;
for (let j = 0; j < child.childCount; j++) {
const arg = child.child(j);
if (!arg) continue;
if (arg.type === 'string') {
const text = arg.text.replace(/^["']|["']$/g, '');
ctx.imports.push({
source: text,
names: ['source'],
line: node.startPosition.row + 1,
});
return;
}
}
}
// source() only accepts string literals — `source(varname)` is not an import.
const path = firstStringArgument(node);
if (path === null) return;
ctx.imports.push({
source: path,
names: ['source'],
line: node.startPosition.row + 1,
});
}

function handleSetClass(node: TreeSitterNode, ctx: ExtractorOutput): void {
for (let i = 0; i < node.childCount; i++) {
const child = node.child(i);
if (!child || child.type !== 'arguments') continue;
for (let j = 0; j < child.childCount; j++) {
const arg = child.child(j);
if (!arg) continue;
if (arg.type === 'string') {
const name = arg.text.replace(/^["']|["']$/g, '');
ctx.definitions.push({
name,
kind: 'class',
line: node.startPosition.row + 1,
endLine: nodeEndLine(node),
});
return;
}
}
}
const name = firstStringArgument(node);
if (name === null) return;
ctx.definitions.push({
name,
kind: 'class',
line: node.startPosition.row + 1,
endLine: nodeEndLine(node),
});
}

function handleSetGeneric(node: TreeSitterNode, ctx: ExtractorOutput): void {
const name = firstStringArgument(node);
if (name === null) return;
ctx.definitions.push({
name,
kind: 'function',
line: node.startPosition.row + 1,
endLine: nodeEndLine(node),
});
}

// setMethod("greet", "Person", function(x) ...) registers an implementation of
// the generic `greet` — it is not a new top-level definition. Emitting a
// definition here produced two `function` nodes with the same name (one from
// setGeneric, one from setMethod) and broke resolution. Emit a call edge to
// the generic instead; the method body's calls are still picked up by the
// recursive walk of the anonymous function argument.
function handleSetMethod(node: TreeSitterNode, ctx: ExtractorOutput): void {
const name = firstStringArgument(node);
if (name === null) return;
ctx.calls.push({ name, line: node.startPosition.row + 1 });
}

// tree-sitter-r wraps each positional argument in an `argument` node that
// contains the actual `string` (or `identifier`) child, so the inner string
// must be unwrapped — checking `child.type === 'string'` directly misses it.
// Mirrors `first_argument_value` in the Rust extractor for parity.
function firstStringArgument(node: TreeSitterNode): string | null {
for (let i = 0; i < node.childCount; i++) {
const child = node.child(i);
if (!child || child.type !== 'arguments') continue;
for (let j = 0; j < child.childCount; j++) {
const arg = child.child(j);
if (!arg) continue;
if (arg.type === 'string') {
const name = arg.text.replace(/^["']|["']$/g, '');
ctx.definitions.push({
name,
kind: 'function',
line: node.startPosition.row + 1,
endLine: nodeEndLine(node),
});
return;
return stripQuotes(arg.text);
}
if (arg.type === 'argument') {
const valueNode = arg.childForFieldName('value');
if (valueNode && valueNode.type === 'string') return stripQuotes(valueNode.text);
for (let k = 0; k < arg.childCount; k++) {
const inner = arg.child(k);
if (inner && inner.type === 'string') return stripQuotes(inner.text);
}
}
}
}
return null;
}

function stripQuotes(text: string): string {
return text.replace(/^["']|["']$/g, '');
}
57 changes: 57 additions & 0 deletions tests/parsers/r.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,61 @@ mean(c(1, 2, 3))`);
expect(symbols.imports).toContainEqual(expect.objectContaining({ source: 'dplyr' }));
expect(symbols.imports.some((i) => i.source === 'package')).toBe(false);
});

it('extracts source() imports', () => {
// Parity guard: native produces an import with `source: "service.R"`.
// The WASM extractor previously failed silently for the same reason as
// setClass/setGeneric — it didn't unwrap the `argument` node.
const symbols = parseR(`source("service.R")`);
expect(symbols.imports).toContainEqual(
expect.objectContaining({ source: 'service.R', names: ['source'] }),
);
});

it('extracts a class definition from setClass', () => {
// Parity guard: the native extractor produces a `class` definition for
// `setClass(...)`; the WASM extractor previously failed silently because
// it did not unwrap the `argument` node around the string literal.
const symbols = parseR(`setClass("Person", representation(name = "character"))`);
expect(symbols.definitions).toContainEqual(
expect.objectContaining({ name: 'Person', kind: 'class' }),
);
});

it('extracts a function definition from setGeneric', () => {
// Same parity guard for setGeneric — was silently broken in WASM.
const symbols = parseR(`setGeneric("doIt", function(x) standardGeneric("doIt"))`);
expect(symbols.definitions).toContainEqual(
expect.objectContaining({ name: 'doIt', kind: 'function' }),
);
});

it('does not duplicate the generic definition when setMethod is present', () => {
// Idiomatic S4: a setGeneric followed by setMethod implementations.
// Only setGeneric should emit a function definition — setMethod registers
// an implementation, which we model as a call edge to the generic.
const symbols = parseR(`
setGeneric("greet", function(x) standardGeneric("greet"))
setMethod("greet", "Person", function(x) paste("Hello", x@name))
setMethod("greet", "Animal", function(x) paste("Hi", x@species))
`);
const greetDefs = symbols.definitions.filter((d) => d.name === 'greet');
expect(greetDefs).toHaveLength(1);
expect(greetDefs[0]).toMatchObject({ kind: 'function' });
});

it('emits a call to the generic for setMethod', () => {
const symbols = parseR(`setMethod("greet", "Person", function(x) paste("Hello", x@name))`);
const greetCalls = symbols.calls.filter((c) => c.name === 'greet');
expect(greetCalls).toHaveLength(1);
});

it('still captures calls from inside the setMethod body', () => {
// The recursive walk visits the anonymous function passed to setMethod,
// so calls inside the method body must still appear in ctx.calls.
const symbols = parseR(`setMethod("greet", "Person", function(x) { helper(x); validate(x) })`);
const names = symbols.calls.map((c) => c.name);
expect(names).toContain('helper');
expect(names).toContain('validate');
});
});
Loading