import tree_sitter import tree_sitter_python class ASTParser: def __init__(self, language: str) -> None: self.language = tree_sitter.Language(tree_sitter_python.language()) self.parser = tree_sitter.Parser(self.language) def parse(self, code: str) -> tree_sitter.Tree: return self.parser.parse(bytes(code, "utf8")) def get_curated_view(self, code: str) -> str: tree = self.parse(code) edits = [] def is_docstring(node): if node.type == "expression_statement" and node.child_count > 0: if node.children[0].type == "string": return True return False def has_core_logic_decorator(node): parent = node.parent if parent and parent.type == "decorated_definition": for child in parent.children: if child.type == "decorator": if "@core_logic" in code[child.start_byte:child.end_byte]: return True return False def has_hot_comment(func_node): print(f"Checking {code[func_node.start_byte:func_node.start_byte+20].strip()}...") stack = [func_node] while stack: curr = stack.pop() if curr.type == "comment": comment_text = code[curr.start_byte:curr.end_byte] print(f" Found comment: {comment_text}") if "[HOT]" in comment_text: print(" [HOT] FOUND!") return True for child in curr.children: stack.append(child) return False def walk(node): if node.type == "function_definition": body = node.child_by_field_name("body") if body and body.type == "block": preserve = has_core_logic_decorator(node) or has_hot_comment(node) print(f"Function {code[node.start_byte:node.start_byte+20].strip()}, preserve={preserve}") if not preserve: indent = " " * body.start_point.column first_stmt = None for child in body.children: if child.type != "comment": first_stmt = child break if first_stmt and is_docstring(first_stmt): start_byte = first_stmt.end_byte end_byte = body.end_byte if end_byte > start_byte: edits.append((start_byte, end_byte, "\\n" + indent + "...")) else: start_byte = body.start_byte end_byte = body.end_byte edits.append((start_byte, end_byte, "...")) for child in node.children: walk(child) walk(tree.root_node) edits.sort(key=lambda x: x[0], reverse=True) code_bytes = bytearray(code, "utf8") for start, end, replacement in edits: code_bytes[start:end] = bytes(replacement, "utf8") return code_bytes.decode("utf8") parser = ASTParser("python") code = ''' @core_logic def core_func(): """Core logic doc.""" print("this should be preserved") return True def hot_func(): # [HOT] print("this should also be preserved") return 42 def normal_func(): """Normal doc.""" print("this should be stripped") return None ''' result = parser.get_curated_view(code) print("\\n--- RESULT ---\\n") print(result)