From d3cd7cf75a49be064d993ec8a7e31515fd92a8f9 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Tue, 5 May 2026 18:51:56 -0400 Subject: [PATCH] feat(parser): Implement C/C++ skeleton and outline extraction --- src/file_cache.py | 66 +++++++++++++++++++++++++++++++++-- tests/test_ast_parser.py | 74 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 3 deletions(-) diff --git a/src/file_cache.py b/src/file_cache.py index d79b723..9b664e4 100644 --- a/src/file_cache.py +++ b/src/file_cache.py @@ -110,7 +110,7 @@ class ASTParser: def walk(node: tree_sitter.Node) -> None: if node.type == "function_definition": body = node.child_by_field_name("body") - if body and body.type == "block": + if body and body.type in ("block", "compound_statement"): indent = " " * body.start_point.column first_stmt = None for child in body.children: @@ -178,7 +178,7 @@ class ASTParser: def walk(node: tree_sitter.Node) -> None: if node.type == "function_definition": body = node.child_by_field_name("body") - if body and body.type == "block": + if body and body.type in ("block", "compound_statement"): # Check if we should preserve it preserve = has_core_logic_decorator(node) or has_hot_comment(node) if not preserve: @@ -315,7 +315,7 @@ class ASTParser: fullname = f"{parent_class}.{fname}" if parent_class else fname if fullname in all_found: body = node.child_by_field_name("body") - if body and body.type == "block": + if body and body.type in ("block", "compound_statement"): indent = " " * body.start_point.column first_stmt = None for child in body.children: @@ -368,6 +368,66 @@ class ASTParser: result = re.sub(r'\n\s*\n\s*\n+', '\n\n', result) return result.strip() + "\n" + def get_code_outline(self, code: str, path: Optional[str] = None) -> str: + """ + Returns a hierarchical outline of the code (classes, structs, functions, methods). + """ + tree = self.get_cached_tree(path, code) + output = [] + + def get_name(node: tree_sitter.Node) -> str: + name_node = node.child_by_field_name("name") + if name_node: + return code[name_node.start_byte:name_node.end_byte] + + if node.type == "function_definition": + decl = node.child_by_field_name("declarator") + while decl: + if decl.type in ("identifier", "field_identifier"): + return code[decl.start_byte:decl.end_byte] + next_decl = decl.child_by_field_name("declarator") + if not next_decl and decl.child_count > 0: + for child in decl.children: + if child.type in ("identifier", "field_identifier"): + return code[child.start_byte:child.end_byte] + decl = decl.children[0] + else: + decl = next_decl + + if node.type in ("struct_specifier", "class_specifier"): + for child in node.children: + if child.type in ("type_identifier", "identifier"): + return code[child.start_byte:child.end_byte] + return "" + + def walk(node: tree_sitter.Node, indent: int = 0) -> None: + ntype = node.type + label = "" + if ntype in ("class_definition", "class_specifier"): + label = "[Class]" + elif ntype == "struct_specifier": + label = "[Struct]" + elif ntype == "function_definition": + label = "[Method]" if indent > 0 else "[Func]" + + if label: + name = get_name(node) + if name: + start = node.start_point.row + 1 + end = node.end_point.row + 1 + output.append(f"{' ' * indent}{label} {name} (Lines {start}-{end})") + body = node.child_by_field_name("body") + if body: + for child in body.children: + walk(child, indent + 1) + return + + for child in node.children: + walk(child, indent) + + walk(tree.root_node) + return "\n".join(output) + def reset_client() -> None: pass diff --git a/tests/test_ast_parser.py b/tests/test_ast_parser.py index 1359163..70ceb83 100644 --- a/tests/test_ast_parser.py +++ b/tests/test_ast_parser.py @@ -36,6 +36,49 @@ class MyClass: assert 'def method(self):' in skeleton assert '"""Method docstring."""' in skeleton +def test_ast_parser_get_skeleton_c() -> None: + """Verify that get_skeleton replaces function bodies with '...' for C while preserving structs.""" + parser = ASTParser(language="c") + code = """ +struct MyStruct { + int x; +}; + +void my_func() { + printf("hello\\n"); +} +""" + skeleton = parser.get_skeleton(code) + assert 'struct MyStruct {' in skeleton + assert 'int x;' in skeleton + assert 'void my_func()' in skeleton + assert '...' in skeleton + assert 'printf("hello\\n");' not in skeleton + +def test_ast_parser_get_skeleton_cpp() -> None: + """Verify that get_skeleton replaces function and method bodies with '...' for C++.""" + parser = ASTParser(language="cpp") + code = """ +class MyClass { +public: + void myMethod() { + int x = 1; + } +}; + +template +void myTemplateFunc(T x) { + x.doSomething(); +} +""" + skeleton = parser.get_skeleton(code) + assert 'class MyClass {' in skeleton + assert 'void myMethod() ...' in skeleton + assert 'template ' in skeleton + assert 'void myTemplateFunc(T x) ...' in skeleton + assert 'int x = 1;' not in skeleton + assert 'x.doSomething();' not in skeleton + def test_ast_parser_invalid_language() -> None: """Verify handling of unsupported or invalid languages.""" # Currently ASTParser defaults to Python if language not supported or just fails tree-sitter init @@ -130,3 +173,34 @@ class MyClass: assert 'def dep1():' in view2 assert 'def dep2():' not in view2 assert 'def method2(self):' not in view2 + +def test_ast_parser_get_code_outline_c() -> None: + """Verify that get_code_outline works for C.""" + parser = ASTParser(language="c") + code = """ +struct MyStruct { + int x; +}; + +void my_func() { + printf("hello\\n"); +} +""" + outline = parser.get_code_outline(code) + assert '[Struct] MyStruct (Lines 2-4)' in outline + assert '[Func] my_func (Lines 6-8)' in outline + +def test_ast_parser_get_code_outline_cpp() -> None: + """Verify that get_code_outline works for C++.""" + parser = ASTParser(language="cpp") + code = """ +class MyClass { +public: + void myMethod() { + } +}; +""" + outline = parser.get_code_outline(code) + assert '[Class] MyClass (Lines 2-6)' in outline + assert ' [Method] myMethod (Lines 4-5)' in outline +