Files
manual_slop/src/file_cache.py
T
ed b5e512f483 feat(sdm): inject structural dependency mapping tags across codebase
Adds [C: caller] tags to functions/methods and [M: mutation] / [U: usage] tags to class variables based on cross-module call analysis.
2026-05-13 22:35:52 -04:00

851 lines
35 KiB
Python

"""
File Cache - ASTParser (tree-sitter) for Python source code analysis.
This module provides AST-based code analysis using the tree-sitter library.
It is used to generate compressed "views" of Python code that preserve
structure while reducing token consumption.
Key Components:
- ASTParser: Main parser class using tree-sitter
- get_skeleton(): Compress function bodies to `...`
- get_curated_view(): Preserve `@core_logic` and `[HOT]` functions
- get_targeted_view(): Extract only specified symbols + dependencies
Caching:
- Module-level `_ast_cache` stores parsed trees with mtime invalidation
- Cache limit: 10 entries (simple LRU eviction)
- Cache key: file path + mtime
Thread Safety:
- Not thread-safe. Use separate ASTParser instances per thread.
- Cache is module-level shared across instances.
Views:
1. Skeleton: Signatures + docstrings only, bodies replaced with `...`
2. Curated: Skeleton + bodies marked with `@core_logic` or `[HOT]`
3. Targeted: Only specified symbols + their dependencies (depth 2)
Integration:
- Used by mcp_client.py for py_get_skeleton, py_get_curated_view
- Used by multi_agent_conductor.py for worker context injection
- Used by aggregate.py for summary generation
See Also:
- docs/guide_tools.md for AST tool documentation
- src/summarize.py for heuristic summaries
"""
from pathlib import Path
from typing import Optional, Any, List, Tuple, Dict
import tree_sitter
import tree_sitter_python
import tree_sitter_cpp
import tree_sitter_c
import re
_ast_cache: Dict[str, Tuple[float, tree_sitter.Tree]] = {}
class ASTParser:
"""
Parser for extracting AST-based views of source code.
Currently supports Python.
"""
#region: Core Operations
def __init__(self, language: str) -> None:
"""
[C: src/mcp_client.py:_DDGParser.__init__, src/mcp_client.py:_TextExtractor.__init__]
"""
if language not in ("python", "cpp", "c"):
raise ValueError(f"Language '{language}' not supported yet.")
self.language_name = language
# Load the tree-sitter language grammar
if language == "python":
self.language = tree_sitter.Language(tree_sitter_python.language())
elif language == "cpp":
self.language = tree_sitter.Language(tree_sitter_cpp.language())
elif language == "c":
self.language = tree_sitter.Language(tree_sitter_c.language())
self.parser = tree_sitter.Parser(self.language)
def parse(self, code: str) -> tree_sitter.Tree:
"""
Parse the given code and return the tree-sitter Tree.
[C: src/mcp_client.py:_search_file, src/mcp_client.py:derive_code_path, src/mcp_client.py:py_check_syntax, src/mcp_client.py:py_get_class_summary, src/mcp_client.py:py_get_definition, src/mcp_client.py:py_get_docstring, src/mcp_client.py:py_get_imports, src/mcp_client.py:py_get_signature, src/mcp_client.py:py_get_symbol_info, src/mcp_client.py:py_get_var_declaration, src/mcp_client.py:py_set_signature, src/mcp_client.py:py_set_var_declaration, src/mcp_client.py:py_update_definition, src/mcp_client.py:trace, src/outline_tool.py:CodeOutliner.outline, src/rag_engine.py:RAGEngine._chunk_code, src/summarize.py:_summarise_python, tests/test_ast_parser.py:test_ast_parser_parse, tests/test_tree_sitter_setup.py:test_tree_sitter_python_setup]
"""
return self.parser.parse(bytes(code, "utf8"))
def get_cached_tree(self, path: Optional[str], code: str) -> tree_sitter.Tree:
"""Get cached tree or parse and cache it."""
if not path:
return self.parse(code)
try:
p = Path(path)
mtime = p.stat().st_mtime if p.exists() else 0.0
except Exception:
mtime = 0.0
if path in _ast_cache:
cached_mtime, tree = _ast_cache[path]
if cached_mtime == mtime:
return tree
tree = self.parse(code)
if len(_ast_cache) >= 10:
# Simple LRU: remove the first added entry
try:
first_key = next(iter(_ast_cache))
del _ast_cache[first_key]
except StopIteration:
pass
_ast_cache[path] = (mtime, tree)
return tree
def _get_name(self, node: tree_sitter.Node, code_bytes: bytes) -> str:
name_node = node.child_by_field_name("name")
if name_node:
return code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace")
if node.type in ("function_definition", "field_declaration", "declaration"):
def find_id(n: tree_sitter.Node) -> str:
# In C++, prefer function_declarator or operator_name FIRST - look for it explicitly
if self.language_name in ("cpp", "c"):
# First, check if this node itself is a simple identifier (method name)
if n.type in ("identifier", "field_identifier", "destructor_name"):
return code_bytes[n.start_byte:n.end_byte].decode("utf8", errors="replace")
# Handle reference_declarator which wraps function_declarator for return types like T&
if n.type in ("reference_declarator", "pointer_declarator"):
for child in n.children:
if child.type == "function_declarator":
nested_decl = child.child_by_field_name("declarator")
if nested_decl:
return find_id(nested_decl)
for fc in child.children:
if fc.type in ("field_identifier", "identifier", "operator_name", "destructor_name"):
return code_bytes[fc.start_byte:fc.end_byte].decode("utf8", errors="replace")
if n.type in ("identifier", "field_identifier", "qualified_identifier", "operator_name", "destructor_name"):
return code_bytes[n.start_byte:n.end_byte].decode("utf8", errors="replace")
# AVOID qualified_identifier in C++ - it's used in type expressions
if self.language_name in ("cpp", "c"):
if n.type in ("type_identifier", "primitive_type", "builtin_type", "qualified_identifier", "type_parameter", "template_type"):
return ""
# AVOID parameter_list in C++ - it contains parameter names that can confuse identifier finding
if n.type in ("parameter_list", "parameter_declaration"):
return ""
# For function_definition, check direct function_declarator child
if n.type == "function_definition":
for child in n.children:
if child.type == "function_declarator":
nested_decl = child.child_by_field_name("declarator")
if nested_decl:
return find_id(nested_decl)
for fc in child.children:
if fc.type in ("field_identifier", "identifier", "operator_name", "destructor_name"):
return code_bytes[fc.start_byte:fc.end_byte].decode("utf8", errors="replace")
# For field_declarations with complex return types like T& or T*
# we need to look inside reference_declarator/pointer_declarator children
if n.type in ("field_declaration", "function_definition"):
for child in n.children:
if child.type in ("reference_declarator", "pointer_declarator"):
for subchild in child.children:
if subchild.type == "function_declarator":
nested_decl = subchild.child_by_field_name("declarator")
if nested_decl:
return find_id(nested_decl)
for fc in subchild.children:
if fc.type in ("field_identifier", "identifier", "operator_name", "destructor_name"):
return code_bytes[fc.start_byte:fc.end_byte].decode("utf8", errors="replace")
# Fallback to children, but avoid bodies and types
for child in n.children:
if child.type in ("compound_statement", "field_declaration_list", "class_body", "declaration_list", "enum_body", "type_identifier", "primitive_type", "builtin_type", "namespace_identifier", "qualified_identifier", "reference_declarator", "pointer_declarator"):
continue
res = find_id(child)
if res: return res
return ""
return find_id(node)
if node.type == "template_declaration":
for child in node.children:
if child.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition", "field_declaration", "declaration"):
return self._get_name(child, code_bytes)
if node.type in ("struct_specifier", "class_specifier", "class_definition", "enum_specifier", "enum_definition", "namespace_definition"):
for child in node.children:
if child.type in ("type_identifier", "identifier", "namespace_identifier", "qualified_identifier"):
return code_bytes[child.start_byte:child.end_byte].decode("utf8", errors="replace")
return ""
#endregion: Core Operations
#region: Skeleton & Curated Views
def get_skeleton(self, code: str, path: Optional[str] = None) -> str:
"""
Returns a skeleton of a Python file (preserving docstrings, stripping function bodies).
[C: src/mcp_client.py:py_get_skeleton, src/mcp_client.py:ts_c_get_skeleton, src/mcp_client.py:ts_cpp_get_skeleton, src/multi_agent_conductor.py:run_worker_lifecycle, tests/test_ast_parser.py:test_ast_parser_get_skeleton_c, tests/test_ast_parser.py:test_ast_parser_get_skeleton_cpp, tests/test_ast_parser.py:test_ast_parser_get_skeleton_python, tests/test_context_pruner.py:test_ast_caching, tests/test_context_pruner.py:test_performance_large_file]
"""
code_bytes = code.encode("utf8")
tree = self.get_cached_tree(path, code)
edits: List[Tuple[int, int, str]] = []
def is_docstring(node: tree_sitter.Node) -> bool:
if node.type == "expression_statement" and node.child_count > 0:
if node.children[0].type == "string":
return True
return False
def walk(node: tree_sitter.Node) -> None:
"""
[C: src/mcp_client.py:_search_file, src/mcp_client.py:py_find_usages, src/mcp_client.py:py_get_hierarchy, src/mcp_client.py:trace, src/outline_tool.py:CodeOutliner.outline, src/outline_tool.py:CodeOutliner.walk, src/summarize.py:_summarise_python]
"""
if node.type == "function_definition":
body = node.child_by_field_name("body")
if body and body.type in ("block", "compound_statement"):
indent = " " * body.start_point.column
first_stmt = None
for child in body.children:
if child.type != "comment":
first_stmt = child
break
initializer = None
for child in node.children:
if child.type == "field_initializer_list":
initializer = child
break
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
if end_byte > start_byte:
edits.append((start_byte, end_byte, f"\n{indent}..."))
else:
start_byte = initializer.start_byte if initializer else body.start_byte
end_byte = body.end_byte
repl = "..."
edits.append((start_byte, end_byte, repl))
for child in node.children:
walk(child)
walk(tree.root_node)
# Apply edits in reverse to maintain byte offsets
edits.sort(key=lambda x: x[0], reverse=True)
code_bytearray = bytearray(code_bytes)
for start, end, replacement in edits:
code_bytearray[start:end] = bytes(replacement, "utf8")
return code_bytearray.decode("utf8")
def get_curated_view(self, code: str, path: Optional[str] = None) -> str:
"""
Returns a curated skeleton of a Python file.
Preserves function bodies if they have @core_logic decorator or # [HOT] comment.
Otherwise strips bodies but preserves docstrings.
[C: src/multi_agent_conductor.py:run_worker_lifecycle, tests/test_ast_parser.py:test_ast_parser_get_curated_view]
"""
code_bytes = code.encode("utf8")
tree = self.get_cached_tree(path, code)
edits: List[Tuple[int, int, str]] = []
def is_docstring(node: tree_sitter.Node) -> bool:
if node.type == "expression_statement" and node.child_count > 0:
if node.children[0].type == "string":
return True
return False
def has_core_logic_decorator(node: tree_sitter.Node) -> bool:
# Check if parent is decorated_definition
parent = node.parent
if parent and parent.type == "decorated_definition":
for child in parent.children:
if child.type == "decorator":
# decorator -> ( '@', identifier ) or ( '@', call )
if b"@core_logic" in code_bytes[child.start_byte:child.end_byte]:
return True
return False
def has_hot_comment(func_node: tree_sitter.Node) -> bool:
# Check all descendants of the function_definition for a [HOT] comment
stack = [func_node]
while stack:
curr = stack.pop()
if curr.type == "comment":
comment_bytes = code_bytes[curr.start_byte:curr.end_byte]
if b"[HOT]" in comment_bytes:
return True
for child in curr.children:
stack.append(child)
return False
def walk(node: tree_sitter.Node) -> None:
"""
[C: src/mcp_client.py:_search_file, src/mcp_client.py:py_find_usages, src/mcp_client.py:py_get_hierarchy, src/mcp_client.py:trace, src/outline_tool.py:CodeOutliner.outline, src/outline_tool.py:CodeOutliner.walk, src/summarize.py:_summarise_python]
"""
if node.type == "function_definition":
body = node.child_by_field_name("body")
if body and body.type in ("block", "compound_statement"):
# Check if we should preserve it
preserve = has_core_logic_decorator(node) or has_hot_comment(node)
if not preserve:
indent = " " * body.start_point.column
first_stmt = None
for child in body.children:
if child.type != "comment":
first_stmt = child
break
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
if end_byte > start_byte:
edits.append((start_byte, end_byte, f"\n{indent}..."))
else:
start_byte = body.start_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
for child in node.children:
walk(child)
walk(tree.root_node)
# Apply edits in reverse to maintain byte offsets
edits.sort(key=lambda x: x[0], reverse=True)
code_bytearray = bytearray(code_bytes)
for start, end, replacement in edits:
code_bytearray[start:end] = bytes(replacement, "utf8")
return code_bytearray.decode("utf8")
#endregion: Skeleton & Curated Views
#region: Targeted Views
def get_targeted_view(self, code: str, function_names: List[str], path: Optional[str] = None) -> str:
"""
Returns a targeted view of the code including only the specified functions
and their dependencies up to depth 2.
[C: src/multi_agent_conductor.py:run_worker_lifecycle, tests/test_ast_parser.py:test_ast_parser_get_targeted_view, tests/test_context_pruner.py:test_class_targeted_extraction, tests/test_context_pruner.py:test_targeted_extraction]
"""
code_bytes = code.encode("utf8")
tree = self.get_cached_tree(path, code)
all_functions = {}
def collect_functions(node, class_name=None):
if node.type == "function_definition":
name_node = node.child_by_field_name("name")
if name_node:
func_name = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace")
full_name = f"{class_name}.{func_name}" if class_name else func_name
all_functions[full_name] = node
elif node.type == "class_definition":
name_node = node.child_by_field_name("name")
if name_node:
cname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace")
full_cname = f"{class_name}.{cname}" if class_name else cname
body = node.child_by_field_name("body")
if body:
collect_functions(body, full_cname)
return
for child in node.children:
collect_functions(child, class_name)
collect_functions(tree.root_node)
def get_calls(node):
calls = set()
def walk_calls(n):
if n.type == "call":
func_node = n.child_by_field_name("function")
if func_node:
if func_node.type == "identifier":
calls.add(code_bytes[func_node.start_byte:func_node.end_byte].decode("utf8", errors="replace"))
elif func_node.type == "attribute":
attr_node = func_node.child_by_field_name("attribute")
if attr_node:
calls.add(code_bytes[attr_node.start_byte:attr_node.end_byte].decode("utf8", errors="replace"))
for child in n.children:
walk_calls(child)
walk_calls(node)
return calls
to_include = set()
for target in function_names:
if target in all_functions:
to_include.add(target)
else:
for full_name in all_functions:
if full_name.split('.')[-1] == target:
to_include.add(full_name)
current_layer = set(to_include)
all_found = set(to_include)
for _ in range(2):
next_layer = set()
for name in current_layer:
if name in all_functions:
node = all_functions[name]
calls = get_calls(node)
for call in calls:
for func_name in all_functions:
if func_name == call or func_name.split('.')[-1] == call:
if func_name not in all_found:
next_layer.add(func_name)
all_found.add(func_name)
current_layer = next_layer
if not current_layer:
break
edits = []
def is_docstring(n) -> bool:
if n.type == "expression_statement" and n.child_count > 0:
if n.children[0].type == "string":
return True
return False
def check_for_targeted(node, parent_class=None):
if node.type == "function_definition":
name_node = node.child_by_field_name("name")
fname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if name_node else ""
fullname = f"{parent_class}.{fname}" if parent_class else fname
return fullname in all_found
if node.type == "class_definition":
name_node = node.child_by_field_name("name")
cname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if name_node else ""
full_cname = f"{parent_class}.{cname}" if parent_class else cname
body = node.child_by_field_name("body")
if body:
for child in body.children:
if check_for_targeted(child, full_cname):
return True
return False
for child in node.children:
if check_for_targeted(child, parent_class):
return True
return False
def walk_edits(node, parent_class=None):
if node.type == "function_definition":
name_node = node.child_by_field_name("name")
fname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if name_node else ""
fullname = f"{parent_class}.{fname}" if parent_class else fname
if fullname in all_found:
body = node.child_by_field_name("body")
if body and body.type in ("block", "compound_statement"):
indent = " " * body.start_point.column
first_stmt = None
for child in body.children:
if child.type != "comment":
first_stmt = child
break
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
if end_byte > start_byte:
edits.append((start_byte, end_byte, f"\n{indent}..."))
else:
start_byte = body.start_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
else:
edits.append((node.start_byte, node.end_byte, ""))
return
if node.type == "class_definition":
if check_for_targeted(node, parent_class):
name_node = node.child_by_field_name("name")
cname = code_bytes[name_node.start_byte:name_node.end_byte].decode("utf8", errors="replace") if name_node else ""
full_cname = f"{parent_class}.{cname}" if parent_class else cname
body = node.child_by_field_name("body")
if body:
for child in body.children:
walk_edits(child, full_cname)
else:
edits.append((node.start_byte, node.end_byte, ""))
return
if node.type in ("import_statement", "import_from_statement"):
return
if node.type == "module":
for child in node.children:
walk_edits(child, parent_class)
else:
if node.parent and node.parent.type == "module":
if node.type not in ("comment",):
edits.append((node.start_byte, node.end_byte, ""))
else:
for child in node.children:
walk_edits(child, parent_class)
walk_edits(tree.root_node)
edits.sort(key=lambda x: x[0], reverse=True)
code_bytearray = bytearray(code_bytes)
for start, end, replacement in edits:
code_bytearray[start:end] = bytes(replacement, "utf8")
result = code_bytearray.decode("utf8")
result = re.sub(r'\n\s*\n\s*\n+', '\n\n', result)
return result.strip() + "\n"
#endregion: Targeted Views
#region: Symbol Extraction
def get_definition(self, code: str, name: str, path: Optional[str] = None) -> str:
"""
Returns the full source code for a specific definition by name.
Supports 'ClassName::method' or 'method' for C++.
[C: src/mcp_client.py:trace, src/mcp_client.py:ts_c_get_definition, src/mcp_client.py:ts_cpp_get_definition, tests/test_ast_parser.py:test_ast_parser_get_definition_c, tests/test_ast_parser.py:test_ast_parser_get_definition_cpp, tests/test_ast_parser.py:test_ast_parser_get_definition_cpp_template]
"""
code_bytes = code.encode("utf8")
tree = self.get_cached_tree(path, code)
parts = re.split(r'::|\.', name)
def walk(node: tree_sitter.Node, target_parts: List[str]) -> Optional[tree_sitter.Node]:
"""
[C: src/mcp_client.py:_search_file, src/mcp_client.py:py_find_usages, src/mcp_client.py:py_get_hierarchy, src/mcp_client.py:trace, src/outline_tool.py:CodeOutliner.outline, src/outline_tool.py:CodeOutliner.walk, src/summarize.py:_summarise_python]
"""
if not target_parts:
return None
target = target_parts[0]
best_match = None
for child in node.children:
# If it's a field_declaration, it might wrap a class/struct/enum definition
check_node = child
if child.type == "field_declaration":
for sub in child.children:
if sub.type in ("class_specifier", "struct_specifier", "enum_specifier"):
check_node = sub
break
is_interesting = check_node.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition", "namespace_definition", "template_declaration", "field_declaration", "declaration")
if is_interesting:
node_name = self._get_name(check_node, code_bytes)
if node_name == target:
if len(target_parts) == 1:
match = check_node if child.type != "field_declaration" else child
# template_declaration should always be returned as-is (no body field but contains the definition)
if match.type == "template_declaration":
return match
if match.child_by_field_name("body"):
return match
if not best_match:
best_match = match
next_parts = target_parts[1:]
else:
next_parts = target_parts
body = check_node.child_by_field_name("body")
if not body and check_node.type == "template_declaration":
for sub in check_node.children:
if sub.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition"):
body = sub.child_by_field_name("body")
break
if body:
found = walk(body, next_parts)
if found:
if found.child_by_field_name("body"): return found
if not best_match: best_match = found
for sub in check_node.children:
if sub.type in ("field_declaration_list", "class_body", "declaration_list", "enum_body"):
found = walk(sub, next_parts)
if found:
if found.child_by_field_name("body"): return found
if not best_match: best_match = found
elif child.type in ("module", "translation_unit", "namespace_definition", "declaration_list", "field_declaration_list", "class_body", "preproc_if", "preproc_ifdef", "preproc_else", "preproc_elif"):
found = walk(child, target_parts)
if found:
if found.child_by_field_name("body"): return found
if not best_match: best_match = found
return best_match
def deep_search(node: tree_sitter.Node, target: str) -> Optional[tree_sitter.Node]:
best = None
if node.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition", "namespace_definition", "template_declaration", "declaration", "field_declaration"):
if self._get_name(node, code_bytes) == target:
if node.child_by_field_name("body"):
return node
best = node
for child in node.children:
res = deep_search(child, target)
if res:
if res.child_by_field_name("body"):
return res
if not best:
best = res
return best
found_node = walk(tree.root_node, parts)
# template_declaration doesn't have body field but is valid as-is
if not found_node or (not found_node.child_by_field_name("body") and found_node.type != "template_declaration"):
alt = deep_search(tree.root_node, name)
if alt:
if not found_node or alt.child_by_field_name("body"):
found_node = alt
if found_node:
return code_bytes[found_node.start_byte:found_node.end_byte].decode("utf8", errors="replace")
return f"ERROR: definition '{name}' not found"
def get_signature(self, code: str, name: str, path: Optional[str] = None) -> str:
"""
Returns only the signature part of a function or method.
For C/C++, this is the code from the start of the definition until the block start '{'.
[C: src/mcp_client.py:ts_c_get_signature, src/mcp_client.py:ts_cpp_get_signature, tests/test_ast_parser.py:test_ast_parser_get_signature_c, tests/test_ast_parser.py:test_ast_parser_get_signature_cpp]
"""
code_bytes = code.encode("utf8")
tree = self.get_cached_tree(path, code)
parts = re.split(r'::|\.', name)
def walk(node: tree_sitter.Node, target_parts: List[str]) -> Optional[tree_sitter.Node]:
"""
[C: src/mcp_client.py:_search_file, src/mcp_client.py:py_find_usages, src/mcp_client.py:py_get_hierarchy, src/mcp_client.py:trace, src/outline_tool.py:CodeOutliner.outline, src/outline_tool.py:CodeOutliner.walk, src/summarize.py:_summarise_python]
"""
if not target_parts:
return None
target = target_parts[0]
best_match = None
for child in node.children:
# If it's a field_declaration, it might wrap a class/struct/enum definition
check_node = child
if child.type == "field_declaration":
for sub in child.children:
if sub.type in ("class_specifier", "struct_specifier", "enum_specifier"):
check_node = sub
break
is_interesting = check_node.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition", "namespace_definition", "template_declaration", "field_declaration", "declaration")
if is_interesting:
node_name = self._get_name(check_node, code_bytes)
if node_name == target:
if len(target_parts) == 1:
match = check_node if child.type != "field_declaration" else child
# template_declaration should always be returned as-is (no body field but contains the definition)
if match.type == "template_declaration":
return match
if match.child_by_field_name("body"):
return match
if not best_match:
best_match = match
next_parts = target_parts[1:]
else:
next_parts = target_parts
body = check_node.child_by_field_name("body")
if not body and check_node.type == "template_declaration":
for sub in check_node.children:
if sub.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition"):
body = sub.child_by_field_name("body")
break
if body:
found = walk(body, next_parts)
if found:
if found.child_by_field_name("body"): return found
if not best_match: best_match = found
for sub in check_node.children:
if sub.type in ("field_declaration_list", "class_body", "declaration_list", "enum_body"):
found = walk(sub, next_parts)
if found:
if found.child_by_field_name("body"): return found
if not best_match: best_match = found
elif child.type in ("module", "translation_unit", "namespace_definition", "declaration_list", "field_declaration_list", "class_body", "preproc_if", "preproc_ifdef", "preproc_else", "preproc_elif"):
found = walk(child, target_parts)
if found:
if found.child_by_field_name("body"): return found
if not best_match: best_match = found
return best_match
def deep_search(node: tree_sitter.Node, target: str) -> Optional[tree_sitter.Node]:
best = None
if node.type in ("function_definition", "template_declaration", "declaration"):
if self._get_name(node, code_bytes) == target:
if node.child_by_field_name("body"):
return node
best = node
for child in node.children:
res = deep_search(child, target)
if res:
if res.child_by_field_name("body"):
return res
if not best:
best = res
return best
found_node = walk(tree.root_node, parts)
# template_declaration doesn't have body field but is valid as-is
if not found_node or (not found_node.child_by_field_name("body") and found_node.type != "template_declaration"):
alt = deep_search(tree.root_node, name)
if alt:
if not found_node or alt.child_by_field_name("body"):
found_node = alt
if found_node:
target_node = found_node
if found_node.type == "template_declaration":
for child in found_node.children:
if child.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier"):
target_node = child
break
body = target_node.child_by_field_name("body")
if body:
return code_bytes[found_node.start_byte:body.start_byte].decode("utf8", errors="replace").strip()
return code_bytes[found_node.start_byte:found_node.end_byte].decode("utf8", errors="replace").strip()
return f"ERROR: signature for '{name}' not found"
#endregion: Symbol Extraction
#region: Analysis & Updates
def get_code_outline(self, code: str, path: Optional[str] = None) -> str:
"""
Returns a hierarchical outline of the code (classes, structs, functions, methods).
[C: src/mcp_client.py:ts_c_get_code_outline, src/mcp_client.py:ts_cpp_get_code_outline, tests/test_ast_parser.py:test_ast_parser_get_code_outline_c, tests/test_ast_parser.py:test_ast_parser_get_code_outline_cpp]
"""
code_bytes = code.encode("utf8")
tree = self.get_cached_tree(path, code)
output = []
def walk(node: tree_sitter.Node, indent: int = 0) -> None:
"""
[C: src/mcp_client.py:_search_file, src/mcp_client.py:py_find_usages, src/mcp_client.py:py_get_hierarchy, src/mcp_client.py:trace, src/outline_tool.py:CodeOutliner.outline, src/outline_tool.py:CodeOutliner.walk, src/summarize.py:_summarise_python]
"""
ntype = node.type
label = ""
if ntype in ("class_definition", "class_specifier"):
label = "[Class]"
elif ntype == "struct_specifier":
label = "[Struct]"
elif ntype == "function_definition":
label = "[Method]" if indent > 0 else "[Func]"
if label:
name = self._get_name(node, code_bytes)
if name:
start = node.start_point.row + 1
end = node.end_point.row + 1
output.append(f"{' ' * indent}{label} {name} (Lines {start}-{end})")
body = node.child_by_field_name("body")
if body:
for child in body.children:
walk(child, indent + 1)
return
for child in node.children:
walk(child, indent)
walk(tree.root_node)
return "\n".join(output)
def update_definition(self, code: str, name: str, new_content: str, path: Optional[str] = None) -> str:
"""
Surgically replace the definition of a class or function by name.
[C: src/mcp_client.py:ts_c_update_definition, src/mcp_client.py:ts_cpp_update_definition, tests/test_ast_parser.py:test_ast_parser_update_definition_cpp]
"""
code_bytes = code.encode("utf8")
tree = self.get_cached_tree(path, code)
parts = re.split(r'::|\.', name)
def walk(node: tree_sitter.Node, target_parts: List[str]) -> Optional[tree_sitter.Node]:
"""
[C: src/mcp_client.py:_search_file, src/mcp_client.py:py_find_usages, src/mcp_client.py:py_get_hierarchy, src/mcp_client.py:trace, src/outline_tool.py:CodeOutliner.outline, src/outline_tool.py:CodeOutliner.walk, src/summarize.py:_summarise_python]
"""
if not target_parts:
return None
target = target_parts[0]
best_match = None
for child in node.children:
# If it's a field_declaration, it might wrap a class/struct/enum definition
check_node = child
if child.type == "field_declaration":
for sub in child.children:
if sub.type in ("class_specifier", "struct_specifier", "enum_specifier"):
check_node = sub
break
is_interesting = check_node.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition", "namespace_definition", "template_declaration", "field_declaration", "declaration")
if is_interesting:
node_name = self._get_name(check_node, code_bytes)
if node_name == target:
if len(target_parts) == 1:
match = check_node if child.type != "field_declaration" else child
# template_declaration should always be returned as-is (no body field but contains the definition)
if match.type == "template_declaration":
return match
if match.child_by_field_name("body"):
return match
if not best_match:
best_match = match
next_parts = target_parts[1:]
else:
next_parts = target_parts
body = check_node.child_by_field_name("body")
if not body and check_node.type == "template_declaration":
for sub in check_node.children:
if sub.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition"):
body = sub.child_by_field_name("body")
break
if body:
found = walk(body, next_parts)
if found:
if found.child_by_field_name("body"): return found
if not best_match: best_match = found
for sub in check_node.children:
if sub.type in ("field_declaration_list", "class_body", "declaration_list", "enum_body"):
found = walk(sub, next_parts)
if found:
if found.child_by_field_name("body"): return found
if not best_match: best_match = found
elif child.type in ("module", "translation_unit", "namespace_definition", "declaration_list", "field_declaration_list", "class_body", "preproc_if", "preproc_ifdef", "preproc_else", "preproc_elif"):
found = walk(child, target_parts)
if found:
if found.child_by_field_name("body"): return found
if not best_match: best_match = found
return best_match
def deep_search(node: tree_sitter.Node, target: str) -> Optional[tree_sitter.Node]:
best = None
if node.type in ("function_definition", "class_definition", "class_specifier", "struct_specifier", "enum_specifier", "enum_definition", "namespace_definition", "template_declaration", "declaration", "field_declaration"):
if self._get_name(node, code_bytes) == target:
if node.child_by_field_name("body"):
return node
best = node
for child in node.children:
res = deep_search(child, target)
if res:
if res.child_by_field_name("body"):
return res
if not best:
best = res
return best
found_node = walk(tree.root_node, parts)
# template_declaration doesn't have body field but is valid as-is
if not found_node or (not found_node.child_by_field_name("body") and found_node.type != "template_declaration"):
alt = deep_search(tree.root_node, name)
if alt:
if not found_node or alt.child_by_field_name("body"):
found_node = alt
if found_node:
code_bytearray = bytearray(code_bytes)
code_bytearray[found_node.start_byte:found_node.end_byte] = bytes(new_content, "utf8")
return code_bytearray.decode("utf8")
return f"ERROR: definition '{name}' not found"
#endregion: Analysis & Updates
#region: Module Level Utilities
def reset_client() -> None:
pass
def get_file_id(path: Path) -> Optional[str]:
return None
#endregion: Module Level Utilities