Private
Public Access
0
0

more organization

This commit is contained in:
2026-06-06 10:24:22 -04:00
parent 1c627bcc30
commit 7d555361f9
20 changed files with 630 additions and 725 deletions
+78 -85
View File
@@ -49,33 +49,29 @@ _ast_cache: Dict[str, Tuple[float, tree_sitter.Tree]] = {}
class ASTParser:
"""
Parser for extracting AST-based views of source code.
Currently supports Python.
Parser for extracting AST-based views of source code.
Currently supports Python.
"""
#region: Core Operations
def __init__(self, language: str) -> None:
"""
[C: src/mcp_client.py:_DDGParser.__init__, src/mcp_client.py:_TextExtractor.__init__]
[C: src/mcp_client.py:_DDGParser.__init__, src/mcp_client.py:_TextExtractor.__init__]
"""
if language not in ("python", "cpp", "c"):
raise ValueError(f"Language '{language}' not supported yet.")
self.language_name = language
# Load the tree-sitter language grammar
if language == "python":
self.language = tree_sitter.Language(tree_sitter_python.language())
elif language == "cpp":
self.language = tree_sitter.Language(tree_sitter_cpp.language())
elif language == "c":
self.language = tree_sitter.Language(tree_sitter_c.language())
if language == "python": self.language = tree_sitter.Language(tree_sitter_python.language())
elif language == "cpp": self.language = tree_sitter.Language(tree_sitter_cpp.language())
elif language == "c": self.language = tree_sitter.Language(tree_sitter_c.language())
self.parser = tree_sitter.Parser(self.language)
def parse(self, code: str) -> tree_sitter.Tree:
"""
Parse the given code and return the tree-sitter Tree.
[C: src/mcp_client.py:_search_file, src/mcp_client.py:derive_code_path, src/mcp_client.py:py_check_syntax, src/mcp_client.py:py_get_class_summary, src/mcp_client.py:py_get_definition, src/mcp_client.py:py_get_docstring, src/mcp_client.py:py_get_imports, src/mcp_client.py:py_get_signature, src/mcp_client.py:py_get_symbol_info, src/mcp_client.py:py_get_var_declaration, src/mcp_client.py:py_set_signature, src/mcp_client.py:py_set_var_declaration, src/mcp_client.py:py_update_definition, src/mcp_client.py:trace, src/outline_tool.py:CodeOutliner.outline, src/rag_engine.py:RAGEngine._chunk_code, src/summarize.py:_summarise_python, tests/test_ast_parser.py:test_ast_parser_parse, tests/test_tree_sitter_setup.py:test_tree_sitter_python_setup]
Parse the given code and return the tree-sitter Tree.
[C: src/mcp_client.py:_search_file, src/mcp_client.py:derive_code_path, src/mcp_client.py:py_check_syntax, src/mcp_client.py:py_get_class_summary, src/mcp_client.py:py_get_definition, src/mcp_client.py:py_get_docstring, src/mcp_client.py:py_get_imports, src/mcp_client.py:py_get_signature, src/mcp_client.py:py_get_symbol_info, src/mcp_client.py:py_get_var_declaration, src/mcp_client.py:py_set_signature, src/mcp_client.py:py_set_var_declaration, src/mcp_client.py:py_update_definition, src/mcp_client.py:trace, src/outline_tool.py:CodeOutliner.outline, src/rag_engine.py:RAGEngine._chunk_code, src/summarize.py:_summarise_python, tests/test_ast_parser.py:test_ast_parser_parse, tests/test_tree_sitter_setup.py:test_tree_sitter_python_setup]
"""
return self.parser.parse(bytes(code, "utf8"))
@@ -85,7 +81,7 @@ class ASTParser:
return self.parse(code)
try:
p = Path(path)
p = Path(path)
mtime = p.stat().st_mtime if p.exists() else 0.0
except Exception:
mtime = 0.0
@@ -185,17 +181,18 @@ class ASTParser:
if child.type in ("type_identifier", "identifier", "namespace_identifier", "qualified_identifier"):
return code_bytes[child.start_byte:child.end_byte].decode("utf8", errors="replace")
return ""
#endregion: Core Operations
#region: Skeleton & Curated Views
def get_skeleton(self, code: str, path: Optional[str] = None) -> str:
"""
Returns a skeleton of a Python file (preserving docstrings, stripping function bodies).
[C: src/mcp_client.py:py_get_skeleton, src/mcp_client.py:ts_c_get_skeleton, src/mcp_client.py:ts_cpp_get_skeleton, src/multi_agent_conductor.py:run_worker_lifecycle, tests/test_ast_parser.py:test_ast_parser_get_skeleton_c, tests/test_ast_parser.py:test_ast_parser_get_skeleton_cpp, tests/test_ast_parser.py:test_ast_parser_get_skeleton_python, tests/test_context_pruner.py:test_ast_caching, tests/test_context_pruner.py:test_performance_large_file]
Returns a skeleton of a Python file (preserving docstrings, stripping function bodies).
[C: src/mcp_client.py:py_get_skeleton, src/mcp_client.py:ts_c_get_skeleton, src/mcp_client.py:ts_cpp_get_skeleton, src/multi_agent_conductor.py:run_worker_lifecycle, tests/test_ast_parser.py:test_ast_parser_get_skeleton_c, tests/test_ast_parser.py:test_ast_parser_get_skeleton_cpp, tests/test_ast_parser.py:test_ast_parser_get_skeleton_python, tests/test_context_pruner.py:test_ast_caching, tests/test_context_pruner.py:test_performance_large_file]
"""
code_bytes = code.encode("utf8")
tree = self.get_cached_tree(path, code)
tree = self.get_cached_tree(path, code)
edits: List[Tuple[int, int, str]] = []
def is_docstring(node: tree_sitter.Node) -> bool:
@@ -206,7 +203,7 @@ class ASTParser:
def walk(node: tree_sitter.Node) -> None:
"""
[C: src/mcp_client.py:_search_file, src/mcp_client.py:py_find_usages, src/mcp_client.py:py_get_hierarchy, src/mcp_client.py:trace, src/outline_tool.py:CodeOutliner.outline, src/outline_tool.py:CodeOutliner.walk, src/summarize.py:_summarise_python]
[C: src/mcp_client.py:_search_file, src/mcp_client.py:py_find_usages, src/mcp_client.py:py_get_hierarchy, src/mcp_client.py:trace, src/outline_tool.py:CodeOutliner.outline, src/outline_tool.py:CodeOutliner.walk, src/summarize.py:_summarise_python]
"""
if node.type in ("function_definition", "method_definition"):
body = node.child_by_field_name("body")
@@ -218,7 +215,7 @@ class ASTParser:
break
if body and body.type in ("block", "compound_statement"):
indent = " " * body.start_point.column
indent = " " * body.start_point.column
first_stmt = None
for child in body.children:
if child.type not in ("comment", "{", "}"):
@@ -244,17 +241,17 @@ class ASTParser:
edits.append((start_byte, end_byte, f"\n{indent}..."))
else:
start_byte = initializer.start_byte if initializer else body.start_byte
end_byte = body.end_byte
end_byte = body.end_byte
# Try to preserve braces for C-style languages
if body.type == "compound_statement" and len(body.children) >= 2 and body.children[0].type == "{" and body.children[-1].type == "}":
if initializer:
start_byte = initializer.start_byte
end_byte = body.children[-1].start_byte
end_byte = body.children[-1].start_byte
edits.append((start_byte, end_byte, "{ ... "))
else:
start_byte = body.children[0].end_byte
end_byte = body.children[-1].start_byte
end_byte = body.children[-1].start_byte
edits.append((start_byte, end_byte, " ... "))
else:
edits.append((start_byte, end_byte, "..."))
@@ -275,15 +272,13 @@ class ASTParser:
return code_bytearray.decode("utf8")
def get_curated_view(self, code: str, path: Optional[str] = None) -> str:
"""
Returns a curated skeleton of a Python file.
Preserves function bodies if they have @core_logic decorator or # [HOT] comment.
Otherwise strips bodies but preserves docstrings.
[C: src/multi_agent_conductor.py:run_worker_lifecycle, tests/test_ast_parser.py:test_ast_parser_get_curated_view]
Returns a curated skeleton of a Python file.
Preserves function bodies if they have @core_logic decorator or # [HOT] comment.
Otherwise strips bodies but preserves docstrings.
[C: src/multi_agent_conductor.py:run_worker_lifecycle, tests/test_ast_parser.py:test_ast_parser_get_curated_view]
"""
code_bytes = code.encode("utf8")
tree = self.get_cached_tree(path, code)
tree = self.get_cached_tree(path, code)
edits: List[Tuple[int, int, str]] = []
def is_docstring(node: tree_sitter.Node) -> bool:
@@ -318,7 +313,7 @@ class ASTParser:
def walk(node: tree_sitter.Node) -> None:
"""
[C: src/mcp_client.py:_search_file, src/mcp_client.py:py_find_usages, src/mcp_client.py:py_get_hierarchy, src/mcp_client.py:trace, src/outline_tool.py:CodeOutliner.outline, src/outline_tool.py:CodeOutliner.walk, src/summarize.py:_summarise_python]
[C: src/mcp_client.py:_search_file, src/mcp_client.py:py_find_usages, src/mcp_client.py:py_get_hierarchy, src/mcp_client.py:trace, src/outline_tool.py:CodeOutliner.outline, src/outline_tool.py:CodeOutliner.walk, src/summarize.py:_summarise_python]
"""
if node.type == "function_definition":
body = node.child_by_field_name("body")
@@ -326,7 +321,7 @@ class ASTParser:
# Check if we should preserve it
preserve = has_core_logic_decorator(node) or has_hot_comment(node)
if not preserve:
indent = " " * body.start_point.column
indent = " " * body.start_point.column
first_stmt = None
for child in body.children:
if child.type != "comment":
@@ -334,12 +329,12 @@ class ASTParser:
break
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
end_byte = body.end_byte
if end_byte > start_byte:
edits.append((start_byte, end_byte, f"\n{indent}..."))
else:
start_byte = body.start_byte
end_byte = body.end_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
for child in node.children:
walk(child)
@@ -350,16 +345,16 @@ class ASTParser:
for start, end, replacement in edits:
code_bytearray[start:end] = bytes(replacement, "utf8")
return code_bytearray.decode("utf8")
#endregion: Skeleton & Curated Views
#region: Targeted Views
def get_targeted_view(self, code: str, function_names: List[str], path: Optional[str] = None) -> str:
"""
Returns a targeted view of the code including only the specified functions
and their dependencies up to depth 2.
[C: src/multi_agent_conductor.py:run_worker_lifecycle, tests/test_ast_parser.py:test_ast_parser_get_targeted_view, tests/test_context_pruner.py:test_class_targeted_extraction, tests/test_context_pruner.py:test_targeted_extraction]
Returns a targeted view of the code including only the specified functions
and their dependencies up to depth 2.
[C: src/multi_agent_conductor.py:run_worker_lifecycle, tests/test_ast_parser.py:test_ast_parser_get_targeted_view, tests/test_context_pruner.py:test_class_targeted_extraction, tests/test_context_pruner.py:test_targeted_extraction]
"""
code_bytes = code.encode("utf8")
tree = self.get_cached_tree(path, code)
@@ -375,9 +370,9 @@ class ASTParser:
elif node.type == "class_definition":
name_node = node.child_by_field_name("name")
if name_node:
cname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace")
cname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace")
full_cname = f"{class_name}.{cname}" if class_name else cname
body = node.child_by_field_name("body")
body = node.child_by_field_name("body")
if body:
collect_functions(body, full_cname)
return
@@ -413,12 +408,12 @@ class ASTParser:
to_include.add(full_name)
current_layer = set(to_include)
all_found = set(to_include)
all_found = set(to_include)
for _ in range(2):
next_layer = set()
for name in current_layer:
if name in all_functions:
node = all_functions[name]
node = all_functions[name]
calls = get_calls(node)
for call in calls:
for func_name in all_functions:
@@ -440,14 +435,14 @@ class ASTParser:
def check_for_targeted(node, parent_class=None):
if node.type == "function_definition":
name_node = node.child_by_field_name("name")
fname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if name_node else ""
fullname = f"{parent_class}.{fname}" if parent_class else fname
fname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if name_node else ""
fullname = f"{parent_class}.{fname}" if parent_class else fname
return fullname in all_found
if node.type == "class_definition":
name_node = node.child_by_field_name("name")
cname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if name_node else ""
name_node = node.child_by_field_name("name")
cname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if name_node else ""
full_cname = f"{parent_class}.{cname}" if parent_class else cname
body = node.child_by_field_name("body")
body = node.child_by_field_name("body")
if body:
for child in body.children:
if check_for_targeted(child, full_cname):
@@ -461,12 +456,12 @@ class ASTParser:
def walk_edits(node, parent_class=None):
if node.type == "function_definition":
name_node = node.child_by_field_name("name")
fname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if name_node else ""
fullname = f"{parent_class}.{fname}" if parent_class else fname
fname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if name_node else ""
fullname = f"{parent_class}.{fname}" if parent_class else fname
if fullname in all_found:
body = node.child_by_field_name("body")
if body and body.type in ("block", "compound_statement"):
indent = " " * body.start_point.column
indent = " " * body.start_point.column
first_stmt = None
for child in body.children:
if child.type != "comment":
@@ -474,22 +469,22 @@ class ASTParser:
break
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
end_byte = body.end_byte
if end_byte > start_byte:
edits.append((start_byte, end_byte, f"\n{indent}..."))
else:
start_byte = body.start_byte
end_byte = body.end_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
else:
edits.append((node.start_byte, node.end_byte, ""))
return
if node.type == "class_definition":
if check_for_targeted(node, parent_class):
name_node = node.child_by_field_name("name")
cname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if name_node else ""
name_node = node.child_by_field_name("name")
cname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if name_node else ""
full_cname = f"{parent_class}.{cname}" if parent_class else cname
body = node.child_by_field_name("body")
body = node.child_by_field_name("body")
if body:
for child in body.children:
walk_edits(child, full_cname)
@@ -517,15 +512,16 @@ class ASTParser:
result = code_bytearray.decode("utf8")
result = re.sub(r'\n\s*\n\s*\n+', '\n\n', result)
return result.strip() + "\n"
#endregion: Targeted Views
#region: Symbol Extraction
def get_definition(self, code: str, name: str, path: Optional[str] = None) -> str:
"""
Returns the full source code for a specific definition by name.
Supports 'ClassName::method' or 'method' for C++.
[C: src/mcp_client.py:trace, src/mcp_client.py:ts_c_get_definition, src/mcp_client.py:ts_cpp_get_definition, tests/test_ast_parser.py:test_ast_parser_get_definition_c, tests/test_ast_parser.py:test_ast_parser_get_definition_cpp, tests/test_ast_parser.py:test_ast_parser_get_definition_cpp_template]
Returns the full source code for a specific definition by name.
Supports 'ClassName::method' or 'method' for C++.
[C: src/mcp_client.py:trace, src/mcp_client.py:ts_c_get_definition, src/mcp_client.py:ts_cpp_get_definition, tests/test_ast_parser.py:test_ast_parser_get_definition_c, tests/test_ast_parser.py:test_ast_parser_get_definition_cpp, tests/test_ast_parser.py:test_ast_parser_get_definition_cpp_template]
"""
code_bytes = code.encode("utf8")
tree = self.get_cached_tree(path, code)
@@ -621,16 +617,13 @@ class ASTParser:
def get_signature(self, code: str, name: str, path: Optional[str] = None) -> str:
"""
Returns only the signature part of a function or method.
For C/C++, this is the code from the start of the definition until the block start '{'.
[C: src/mcp_client.py:ts_c_get_signature, src/mcp_client.py:ts_cpp_get_signature, tests/test_ast_parser.py:test_ast_parser_get_signature_c, tests/test_ast_parser.py:test_ast_parser_get_signature_cpp]
Returns only the signature part of a function or method.
For C/C++, this is the code from the start of the definition until the block start '{'.
[C: src/mcp_client.py:ts_c_get_signature, src/mcp_client.py:ts_cpp_get_signature, tests/test_ast_parser.py:test_ast_parser_get_signature_c, tests/test_ast_parser.py:test_ast_parser_get_signature_cpp]
"""
code_bytes = code.encode("utf8")
tree = self.get_cached_tree(path, code)
parts = re.split(r'::|\.', name)
tree = self.get_cached_tree(path, code)
parts = re.split(r'::|\.', name)
def walk(node: tree_sitter.Node, target_parts: List[str]) -> Optional[tree_sitter.Node]:
"""
@@ -638,7 +631,7 @@ class ASTParser:
"""
if not target_parts:
return None
target = target_parts[0]
target = target_parts[0]
best_match = None
for child in node.children:
@@ -649,7 +642,7 @@ class ASTParser:
if sub.type in ("class_specifier", "struct_specifier", "enum_specifier"):
check_node = sub
break
is_interesting = check_node.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition", "namespace_definition", "template_declaration", "field_declaration", "declaration")
if is_interesting:
node_name = self._get_name(check_node, code_bytes)
@@ -729,15 +722,15 @@ class ASTParser:
return code_bytes[found_node.start_byte:found_node.end_byte].decode("utf8", errors="replace").strip()
return f"ERROR: signature for '{name}' not found"
#endregion: Symbol Extraction
#region: Analysis & Updates
def get_code_outline(self, code: str, path: Optional[str] = None) -> str:
"""
Returns a hierarchical outline of the code (classes, structs, functions, methods).
[C: src/mcp_client.py:ts_c_get_code_outline, src/mcp_client.py:ts_cpp_get_code_outline, tests/test_ast_parser.py:test_ast_parser_get_code_outline_c, tests/test_ast_parser.py:test_ast_parser_get_code_outline_cpp]
Returns a hierarchical outline of the code (classes, structs, functions, methods).
[C: src/mcp_client.py:ts_c_get_code_outline, src/mcp_client.py:ts_cpp_get_code_outline, tests/test_ast_parser.py:test_ast_parser_get_code_outline_c, tests/test_ast_parser.py:test_ast_parser_get_code_outline_cpp]
"""
code_bytes = code.encode("utf8")
tree = self.get_cached_tree(path, code)
@@ -745,7 +738,7 @@ class ASTParser:
def walk(node: tree_sitter.Node, indent: int = 0) -> None:
"""
[C: src/mcp_client.py:_search_file, src/mcp_client.py:py_find_usages, src/mcp_client.py:py_get_hierarchy, src/mcp_client.py:trace, src/outline_tool.py:CodeOutliner.outline, src/outline_tool.py:CodeOutliner.walk, src/summarize.py:_summarise_python]
[C: src/mcp_client.py:_search_file, src/mcp_client.py:py_find_usages, src/mcp_client.py:py_get_hierarchy, src/mcp_client.py:trace, src/outline_tool.py:CodeOutliner.outline, src/outline_tool.py:CodeOutliner.walk, src/summarize.py:_summarise_python]
"""
ntype = node.type
label = ""
@@ -778,15 +771,12 @@ class ASTParser:
def update_definition(self, code: str, name: str, new_content: str, path: Optional[str] = None) -> str:
"""
Surgically replace the definition of a class or function by name.
[C: src/mcp_client.py:ts_c_update_definition, src/mcp_client.py:ts_cpp_update_definition, tests/test_ast_parser.py:test_ast_parser_update_definition_cpp]
Surgically replace the definition of a class or function by name.
[C: src/mcp_client.py:ts_c_update_definition, src/mcp_client.py:ts_cpp_update_definition, tests/test_ast_parser.py:test_ast_parser_update_definition_cpp]
"""
code_bytes = code.encode("utf8")
tree = self.get_cached_tree(path, code)
parts = re.split(r'::|\.', name)
tree = self.get_cached_tree(path, code)
parts = re.split(r'::|\.', name)
def walk(node: tree_sitter.Node, target_parts: List[str]) -> Optional[tree_sitter.Node]:
"""
@@ -794,7 +784,7 @@ class ASTParser:
"""
if not target_parts:
return None
target = target_parts[0]
target = target_parts[0]
best_match = None
for child in node.children:
@@ -876,12 +866,15 @@ class ASTParser:
code_bytearray[found_node.start_byte:found_node.end_byte] = bytes(new_content, "utf8")
return code_bytearray.decode("utf8")
return f"ERROR: definition '{name}' not found"
#endregion: Analysis & Updates
#region: Module Level Utilities
def reset_client() -> None:
pass
def get_file_id(path: Path) -> Optional[str]:
return None
#endregion: Module Level Utilities