From 4e564aad79855154ff45e46639e335ea37676660 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Wed, 25 Feb 2026 20:08:43 -0500 Subject: [PATCH] feat(mma): Implement AST Skeleton View generator using tree-sitter --- pyproject.toml | 2 ++ scripts/mma_exec.py | 56 ++++++++++++++++++++++++++++++++++++++ tests/test_mma_skeleton.py | 40 +++++++++++++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 tests/test_mma_skeleton.py diff --git a/pyproject.toml b/pyproject.toml index e11f7dc..59eef54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,8 @@ dependencies = [ "psutil>=7.2.2", "fastapi", "uvicorn", + "tree-sitter>=0.25.2", + "tree-sitter-python>=0.25.0", ] [dependency-groups] diff --git a/scripts/mma_exec.py b/scripts/mma_exec.py index b4444bb..466307e 100644 --- a/scripts/mma_exec.py +++ b/scripts/mma_exec.py @@ -1,6 +1,62 @@ import argparse import subprocess import json +import os +import tree_sitter +import tree_sitter_python + +def generate_skeleton(code: str) -> str: + """ + Parses Python code and replaces function/method bodies with '...', + preserving docstrings if present. + """ + try: + PY_LANGUAGE = tree_sitter.Language(tree_sitter_python.language()) + parser = tree_sitter.Parser(PY_LANGUAGE) + tree = parser.parse(bytes(code, "utf8")) + + edits = [] + + def is_docstring(node): + if node.type == "expression_statement" and node.child_count > 0: + if node.children[0].type == "string": + return True + return False + + def walk(node): + if node.type == "function_definition": + body = node.child_by_field_name("body") + if body and body.type == "block": + indent = " " * body.start_point.column + first_stmt = None + for child in body.children: + if child.type != "comment": + first_stmt = child + break + + if first_stmt and is_docstring(first_stmt): + start_byte = first_stmt.end_byte + end_byte = body.end_byte + if end_byte > start_byte: + edits.append((start_byte, end_byte, f"\n{indent}...")) + else: + start_byte = body.start_byte + end_byte = body.end_byte + edits.append((start_byte, end_byte, "...")) + + for child in node.children: + walk(child) + + walk(tree.root_node) + + edits.sort(key=lambda x: x[0], reverse=True) + code_bytes = bytearray(code, "utf8") + for start, end, replacement in edits: + code_bytes[start:end] = bytes(replacement, "utf8") + + return code_bytes.decode("utf8") + except Exception as e: + return f"# Error generating skeleton: {e}\n{code}" def get_model_for_role(role: str) -> str: """Returns the specific model to use for a given tier role.""" diff --git a/tests/test_mma_skeleton.py b/tests/test_mma_skeleton.py new file mode 100644 index 0000000..0a9c2c5 --- /dev/null +++ b/tests/test_mma_skeleton.py @@ -0,0 +1,40 @@ +import pytest +from scripts.mma_exec import generate_skeleton + +def test_generate_skeleton(): + sample_code = ''' +class Calculator: + """Performs basic math operations.""" + + def add(self, a: int, b: int) -> int: + """Adds two numbers.""" + result = a + b + return result + +def log_message(msg): + timestamp = "2026-02-25" + print(f"[{timestamp}] {msg}") +''' + + skeleton = generate_skeleton(sample_code) + + # Check that signatures are preserved + assert "class Calculator:" in skeleton + assert "def add(self, a: int, b: int) -> int:" in skeleton + assert "def log_message(msg):" in skeleton + + # Check that docstrings are preserved + assert '"""Performs basic math operations."""' in skeleton + assert '"""Adds two numbers."""' in skeleton + + # Check that implementation details are removed + assert "result = a + b" not in skeleton + assert "return result" not in skeleton + assert "timestamp =" not in skeleton + assert "print(" not in skeleton + + # Check that bodies are replaced with ellipsis + assert "..." in skeleton + +if __name__ == "__main__": + pytest.main([__file__]) \ No newline at end of file