From d764b288d1feac898b9b0f2dbd319aec528d6c8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=87=A1=E5=A4=AB=E4=BF=97=E5=AD=901025?= <370438778@qq.com> Date: Wed, 29 Apr 2026 11:46:47 +0800 Subject: [PATCH] fix: extract correct function name for C++ scoped definitions When a C++ function definition uses a qualified name (e.g. `ClassName::method_name`) and has a non-primitive return type (`std::string`, custom types like `bufferlist`, etc.), the parser incorrectly extracts the return type as the function name. Root cause: `_get_name` recurses into `function_declarator` for C++, but `qualified_identifier` nodes (used for scoped names) are not handled by the generic identifier loop. The recursion returns None and the outer loop matches the return type's `type_identifier` first. Fix: detect `qualified_identifier` inside `function_declarator` and take the rightmost `identifier`/`field_identifier` (the actual method name after the last `::`). Also add `field_identifier` to the generic name loop for C++ class member function declarations. Closes #395 Co-Authored-By: Claude Opus 4.7 --- code_review_graph/parser.py | 13 ++++++- tests/test_parser.py | 77 +++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 2 deletions(-) diff --git a/code_review_graph/parser.py b/code_review_graph/parser.py index f681263a..3c1226ba 100644 --- a/code_review_graph/parser.py +++ b/code_review_graph/parser.py @@ -3975,6 +3975,13 @@ def _get_name(self, node, language: str, kind: str) -> Optional[str]: if language in ("c", "cpp", "objc") and kind == "function": for child in node.children: if child.type in ("function_declarator", "pointer_declarator"): + # Scoped names like Foo::bar use qualified_identifier; take + # the rightmost identifier/field_identifier after the last ::. + for sub in child.children: + if sub.type == "qualified_identifier": + for qsub in reversed(sub.children): + if qsub.type in ("identifier", "field_identifier"): + return qsub.text.decode("utf-8", errors="replace") result = self._get_name(child, language, kind) if result: return result @@ -4010,11 +4017,13 @@ def _get_name(self, node, language: str, kind: str) -> Optional[str]: for sub in child.children: if sub.type == "type_identifier": return sub.text.decode("utf-8", errors="replace") - # Most languages use a 'name' child + # Most languages use a 'name' child. + # field_identifier covers C++ class member function names inside + # function_declarator (e.g. virtual std::string get_name() = 0). for child in node.children: if child.type in ( "identifier", "name", "type_identifier", "property_identifier", - "simple_identifier", "constant", + "simple_identifier", "constant", "field_identifier", ): return child.text.decode("utf-8", errors="replace") # For Go type declarations, look for type_spec diff --git a/tests/test_parser.py b/tests/test_parser.py index b38ff11b..5a341e96 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1209,3 +1209,80 @@ def test_elixir_top_level_dotted_call_attributes_to_file(self): and e.target.endswith("puts") ] assert len(top_level) == 1 + +class TestCppScopedFunctionName: + """Regression tests for C++ scoped function name extraction. + + See: https://github.com/tirth8205/code-review-graph/issues/395 + """ + + def test_scoped_function_with_type_identifier_return(self, tmp_path): + """bufferlist OSDService::get_inc_map(...) should extract 'get_inc_map'.""" + src = tmp_path / "osd_service.cpp" + src.write_text( + "bufferlist OSDService::get_inc_map(epoch_t e) {\n" + " bufferlist bl;\n" + " return bl;\n" + "}\n" + ) + p = CodeParser() + nodes, _ = p.parse_file(src) + fns = [n for n in nodes if n.kind == "Function"] + assert len(fns) == 1 + assert fns[0].name == "get_inc_map" + + def test_scoped_function_with_qualified_return(self, tmp_path): + """std::string OSDMap::get_pool_name(...) should extract 'get_pool_name'.""" + src = tmp_path / "osd_map.cpp" + src.write_text( + "std::string OSDMap::get_pool_name(int64_t pool_id) const {\n" + ' return "";\n' + "}\n" + ) + p = CodeParser() + nodes, _ = p.parse_file(src) + fns = [n for n in nodes if n.kind == "Function"] + assert len(fns) == 1 + assert fns[0].name == "get_pool_name" + + def test_scoped_function_with_primitive_return_still_works(self, tmp_path): + """int OSD::handle_osd_map(...) was already correct; verify no regression.""" + src = tmp_path / "osd.cpp" + src.write_text( + "int OSD::handle_osd_map(MOSDMap *m) {\n" + " return 0;\n" + "}\n" + ) + p = CodeParser() + nodes, _ = p.parse_file(src) + fns = [n for n in nodes if n.kind == "Function"] + assert len(fns) == 1 + assert fns[0].name == "handle_osd_map" + + def test_unscoped_function_with_type_identifier_return(self, tmp_path): + """static std::string _make_key(...) should extract '_make_key'.""" + src = tmp_path / "util.cpp" + src.write_text( + "static std::string _make_key(const std::string& prefix) {\n" + " return prefix;\n" + "}\n" + ) + p = CodeParser() + nodes, _ = p.parse_file(src) + fns = [n for n in nodes if n.kind == "Function"] + assert len(fns) == 1 + assert fns[0].name == "_make_key" + + def test_scoped_function_string_return(self, tmp_path): + """string RGWDedupProcessor::get_obj_fingerprint(...) should extract the method name.""" + src = tmp_path / "rgw_dedup.cpp" + src.write_text( + "string RGWDedupProcessor::get_obj_fingerprint(const rgw_obj& obj) {\n" + ' return "";\n' + "}\n" + ) + p = CodeParser() + nodes, _ = p.parse_file(src) + fns = [n for n in nodes if n.kind == "Function"] + assert len(fns) == 1 + assert fns[0].name == "get_obj_fingerprint"