checkpoint: massive refactor
This commit is contained in:
248
file_cache.py
248
file_cache.py
@@ -10,164 +10,148 @@ from typing import Optional
|
||||
import tree_sitter
|
||||
import tree_sitter_python
|
||||
|
||||
|
||||
class ASTParser:
|
||||
"""
|
||||
"""
|
||||
Parser for extracting AST-based views of source code.
|
||||
Currently supports Python.
|
||||
"""
|
||||
def __init__(self, language: str):
|
||||
if language != "python":
|
||||
raise ValueError(f"Language '{language}' not supported yet.")
|
||||
self.language_name = language
|
||||
# Load the tree-sitter language grammar
|
||||
self.language = tree_sitter.Language(tree_sitter_python.language())
|
||||
self.parser = tree_sitter.Parser(self.language)
|
||||
|
||||
def parse(self, code: str) -> tree_sitter.Tree:
|
||||
"""Parse the given code and return the tree-sitter Tree."""
|
||||
return self.parser.parse(bytes(code, "utf8"))
|
||||
def __init__(self, language: str):
|
||||
if language != "python":
|
||||
raise ValueError(f"Language '{language}' not supported yet.")
|
||||
self.language_name = language
|
||||
# Load the tree-sitter language grammar
|
||||
self.language = tree_sitter.Language(tree_sitter_python.language())
|
||||
self.parser = tree_sitter.Parser(self.language)
|
||||
|
||||
def get_skeleton(self, code: str) -> str:
|
||||
"""
|
||||
def parse(self, code: str) -> tree_sitter.Tree:
|
||||
"""Parse the given code and return the tree-sitter Tree."""
|
||||
return self.parser.parse(bytes(code, "utf8"))
|
||||
|
||||
def get_skeleton(self, code: str) -> str:
|
||||
"""
|
||||
Returns a skeleton of a Python file (preserving docstrings, stripping function bodies).
|
||||
"""
|
||||
tree = self.parse(code)
|
||||
edits = []
|
||||
tree = self.parse(code)
|
||||
edits = []
|
||||
|
||||
def is_docstring(node):
|
||||
if node.type == "expression_statement" and node.child_count > 0:
|
||||
if node.children[0].type == "string":
|
||||
return True
|
||||
return False
|
||||
def is_docstring(node):
|
||||
if node.type == "expression_statement" and node.child_count > 0:
|
||||
if node.children[0].type == "string":
|
||||
return True
|
||||
return False
|
||||
|
||||
def walk(node):
|
||||
if node.type == "function_definition":
|
||||
body = node.child_by_field_name("body")
|
||||
if body and body.type == "block":
|
||||
indent = " " * body.start_point.column
|
||||
first_stmt = None
|
||||
for child in body.children:
|
||||
if child.type != "comment":
|
||||
first_stmt = child
|
||||
break
|
||||
def walk(node):
|
||||
if node.type == "function_definition":
|
||||
body = node.child_by_field_name("body")
|
||||
if body and body.type == "block":
|
||||
indent = " " * body.start_point.column
|
||||
first_stmt = None
|
||||
for child in body.children:
|
||||
if child.type != "comment":
|
||||
first_stmt = child
|
||||
break
|
||||
if first_stmt and is_docstring(first_stmt):
|
||||
start_byte = first_stmt.end_byte
|
||||
end_byte = body.end_byte
|
||||
if end_byte > start_byte:
|
||||
edits.append((start_byte, end_byte, f"\n{indent}..."))
|
||||
else:
|
||||
start_byte = body.start_byte
|
||||
end_byte = body.end_byte
|
||||
edits.append((start_byte, end_byte, "..."))
|
||||
for child in node.children:
|
||||
walk(child)
|
||||
walk(tree.root_node)
|
||||
# Apply edits in reverse to maintain byte offsets
|
||||
edits.sort(key=lambda x: x[0], reverse=True)
|
||||
code_bytes = bytearray(code, "utf8")
|
||||
for start, end, replacement in edits:
|
||||
code_bytes[start:end] = bytes(replacement, "utf8")
|
||||
return code_bytes.decode("utf8")
|
||||
|
||||
if first_stmt and is_docstring(first_stmt):
|
||||
start_byte = first_stmt.end_byte
|
||||
end_byte = body.end_byte
|
||||
if end_byte > start_byte:
|
||||
edits.append((start_byte, end_byte, f"\n{indent}..."))
|
||||
else:
|
||||
start_byte = body.start_byte
|
||||
end_byte = body.end_byte
|
||||
edits.append((start_byte, end_byte, "..."))
|
||||
|
||||
for child in node.children:
|
||||
walk(child)
|
||||
|
||||
walk(tree.root_node)
|
||||
|
||||
# Apply edits in reverse to maintain byte offsets
|
||||
edits.sort(key=lambda x: x[0], reverse=True)
|
||||
code_bytes = bytearray(code, "utf8")
|
||||
for start, end, replacement in edits:
|
||||
code_bytes[start:end] = bytes(replacement, "utf8")
|
||||
|
||||
return code_bytes.decode("utf8")
|
||||
|
||||
def get_curated_view(self, code: str) -> str:
|
||||
"""
|
||||
def get_curated_view(self, code: str) -> str:
|
||||
"""
|
||||
Returns a curated skeleton of a Python file.
|
||||
Preserves function bodies if they have @core_logic decorator or # [HOT] comment.
|
||||
Otherwise strips bodies but preserves docstrings.
|
||||
"""
|
||||
tree = self.parse(code)
|
||||
edits = []
|
||||
tree = self.parse(code)
|
||||
edits = []
|
||||
|
||||
def is_docstring(node):
|
||||
if node.type == "expression_statement" and node.child_count > 0:
|
||||
if node.children[0].type == "string":
|
||||
return True
|
||||
return False
|
||||
def is_docstring(node):
|
||||
if node.type == "expression_statement" and node.child_count > 0:
|
||||
if node.children[0].type == "string":
|
||||
return True
|
||||
return False
|
||||
|
||||
def has_core_logic_decorator(node):
|
||||
# Check if parent is decorated_definition
|
||||
parent = node.parent
|
||||
if parent and parent.type == "decorated_definition":
|
||||
for child in parent.children:
|
||||
if child.type == "decorator":
|
||||
# decorator -> ( '@', identifier ) or ( '@', call )
|
||||
if "@core_logic" in code[child.start_byte:child.end_byte]:
|
||||
return True
|
||||
return False
|
||||
def has_core_logic_decorator(node):
|
||||
# Check if parent is decorated_definition
|
||||
parent = node.parent
|
||||
if parent and parent.type == "decorated_definition":
|
||||
for child in parent.children:
|
||||
if child.type == "decorator":
|
||||
# decorator -> ( '@', identifier ) or ( '@', call )
|
||||
if "@core_logic" in code[child.start_byte:child.end_byte]:
|
||||
return True
|
||||
return False
|
||||
|
||||
def has_hot_comment(func_node):
|
||||
# Check all descendants of the function_definition for a [HOT] comment
|
||||
stack = [func_node]
|
||||
while stack:
|
||||
curr = stack.pop()
|
||||
if curr.type == "comment":
|
||||
comment_text = code[curr.start_byte:curr.end_byte]
|
||||
if "[HOT]" in comment_text:
|
||||
return True
|
||||
for child in curr.children:
|
||||
stack.append(child)
|
||||
return False
|
||||
|
||||
def walk(node):
|
||||
if node.type == "function_definition":
|
||||
body = node.child_by_field_name("body")
|
||||
if body and body.type == "block":
|
||||
# Check if we should preserve it
|
||||
preserve = has_core_logic_decorator(node) or has_hot_comment(node)
|
||||
|
||||
if not preserve:
|
||||
indent = " " * body.start_point.column
|
||||
first_stmt = None
|
||||
for child in body.children:
|
||||
if child.type != "comment":
|
||||
first_stmt = child
|
||||
break
|
||||
|
||||
if first_stmt and is_docstring(first_stmt):
|
||||
start_byte = first_stmt.end_byte
|
||||
end_byte = body.end_byte
|
||||
if end_byte > start_byte:
|
||||
edits.append((start_byte, end_byte, f"\n{indent}..."))
|
||||
else:
|
||||
start_byte = body.start_byte
|
||||
end_byte = body.end_byte
|
||||
edits.append((start_byte, end_byte, "..."))
|
||||
|
||||
for child in node.children:
|
||||
walk(child)
|
||||
|
||||
walk(tree.root_node)
|
||||
|
||||
# Apply edits in reverse to maintain byte offsets
|
||||
edits.sort(key=lambda x: x[0], reverse=True)
|
||||
code_bytes = bytearray(code, "utf8")
|
||||
for start, end, replacement in edits:
|
||||
code_bytes[start:end] = bytes(replacement, "utf8")
|
||||
|
||||
return code_bytes.decode("utf8")
|
||||
def has_hot_comment(func_node):
|
||||
# Check all descendants of the function_definition for a [HOT] comment
|
||||
stack = [func_node]
|
||||
while stack:
|
||||
curr = stack.pop()
|
||||
if curr.type == "comment":
|
||||
comment_text = code[curr.start_byte:curr.end_byte]
|
||||
if "[HOT]" in comment_text:
|
||||
return True
|
||||
for child in curr.children:
|
||||
stack.append(child)
|
||||
return False
|
||||
|
||||
def walk(node):
|
||||
if node.type == "function_definition":
|
||||
body = node.child_by_field_name("body")
|
||||
if body and body.type == "block":
|
||||
# Check if we should preserve it
|
||||
preserve = has_core_logic_decorator(node) or has_hot_comment(node)
|
||||
if not preserve:
|
||||
indent = " " * body.start_point.column
|
||||
first_stmt = None
|
||||
for child in body.children:
|
||||
if child.type != "comment":
|
||||
first_stmt = child
|
||||
break
|
||||
if first_stmt and is_docstring(first_stmt):
|
||||
start_byte = first_stmt.end_byte
|
||||
end_byte = body.end_byte
|
||||
if end_byte > start_byte:
|
||||
edits.append((start_byte, end_byte, f"\n{indent}..."))
|
||||
else:
|
||||
start_byte = body.start_byte
|
||||
end_byte = body.end_byte
|
||||
edits.append((start_byte, end_byte, "..."))
|
||||
for child in node.children:
|
||||
walk(child)
|
||||
walk(tree.root_node)
|
||||
# Apply edits in reverse to maintain byte offsets
|
||||
edits.sort(key=lambda x: x[0], reverse=True)
|
||||
code_bytes = bytearray(code, "utf8")
|
||||
for start, end, replacement in edits:
|
||||
code_bytes[start:end] = bytes(replacement, "utf8")
|
||||
return code_bytes.decode("utf8")
|
||||
|
||||
def reset_client():
|
||||
pass
|
||||
|
||||
pass
|
||||
|
||||
def content_block_type(path: Path) -> str:
|
||||
return "unsupported"
|
||||
|
||||
return "unsupported"
|
||||
|
||||
def get_file_id(path: Path) -> Optional[str]:
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
def evict(path: Path):
|
||||
pass
|
||||
|
||||
pass
|
||||
|
||||
def list_cached() -> list[dict]:
|
||||
return []
|
||||
return []
|
||||
|
||||
Reference in New Issue
Block a user