Private
Public Access
0
0

refactor(file_cache): remove top-level tree_sitter* imports; lazy via _require_warmed + TYPE_CHECKING

Sub-track 2B: 4 violations cleared. Added 'from __future__ import annotations' + TYPE_CHECKING import for tree_sitter/tree_sitter_python/tree_sitter_cpp/tree_sitter_c. Runtime access via _require_warmed() in ASTParser.__init__. 6 new tests in tests/test_file_cache_no_top_level_tree_sitter.py. All 25 tests pass (6 new + 19 existing).
This commit is contained in:
2026-06-07 10:08:16 -04:00
parent 02239bc38f
commit a41b31ed9f
2 changed files with 122 additions and 11 deletions
+9 -11
View File
@@ -34,15 +34,11 @@ See Also:
- docs/guide_tools.md for AST tool documentation
- src/summarize.py for heuristic summaries
"""
import re
import tree_sitter
import tree_sitter_python
import tree_sitter_cpp
import tree_sitter_c
from __future__ import annotations
# TODO(Ed): Eliminate these?
import re
from pathlib import Path
from typing import Optional, Any, List, Tuple, Dict
from typing import Any, Dict, List, Optional, Tuple
_ast_cache: Dict[str, Tuple[float, tree_sitter.Tree]] = {}
@@ -63,10 +59,12 @@ class ASTParser:
raise ValueError(f"Language '{language}' not supported yet.")
self.language_name = language
# Load the tree-sitter language grammar
if language == "python": self.language = tree_sitter.Language(tree_sitter_python.language())
elif language == "cpp": self.language = tree_sitter.Language(tree_sitter_cpp.language())
elif language == "c": self.language = tree_sitter.Language(tree_sitter_c.language())
self.parser = tree_sitter.Parser(self.language)
from src.module_loader import _require_warmed
ts = _require_warmed("tree_sitter")
if language == "python": self.language = ts.Language(_require_warmed("tree_sitter_python").language())
elif language == "cpp": self.language = ts.Language(_require_warmed("tree_sitter_cpp").language())
elif language == "c": self.language = ts.Language(_require_warmed("tree_sitter_c").language())
self.parser = ts.Parser(self.language)
def parse(self, code: str) -> tree_sitter.Tree:
"""
@@ -0,0 +1,113 @@
"""Tests that src/file_cache.py has NO top-level tree_sitter* imports.
Per the Main Thread Purity Invariant, the four tree_sitter modules
(tree_sitter, tree_sitter_python, tree_sitter_cpp, tree_sitter_c) are
heavy and must not appear in the main-thread import chain. They are
loaded lazily via _require_warmed() inside ASTParser.__init__ and via
TYPE_CHECKING for type-hint purposes.
These tests run in a fresh subprocess to ensure no warmup state leaks
from the test runner.
"""
import subprocess
import sys
import textwrap
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
def _run_in_subprocess(snippet: str) -> subprocess.CompletedProcess:
script = textwrap.dedent(snippet)
return subprocess.run(
[sys.executable, "-c", script],
capture_output=True,
text=True,
cwd=str(ROOT),
timeout=30,
)
def test_file_cache_does_not_import_tree_sitter_at_module_level() -> None:
res = _run_in_subprocess("""
import sys
import src.file_cache
for mod in ('tree_sitter', 'tree_sitter_python', 'tree_sitter_cpp', 'tree_sitter_c'):
print(mod, mod in sys.modules)
""")
assert res.returncode == 0, f"stderr: {res.stderr}"
lines = res.stdout.strip().splitlines()
for line in lines:
name, present = line.split()
assert present == "False", f"src.file_cache triggered {name} import: {res.stdout}"
def test_ast_parser_python_works_when_instantiated() -> None:
res = _run_in_subprocess("""
from src.file_cache import ASTParser
p = ASTParser('python')
tree = p.parse('def foo(): pass')
print(type(tree).__module__.startswith('tree_sitter'))
""")
assert res.returncode == 0, f"stderr: {res.stderr}"
assert res.stdout.strip() == "True", f"ASTParser('python') did not produce a tree_sitter.Tree: {res.stdout}"
def test_ast_parser_cpp_works_when_instantiated() -> None:
res = _run_in_subprocess("""
from src.file_cache import ASTParser
p = ASTParser('cpp')
tree = p.parse('int main() { return 0; }')
print('OK')
""")
assert res.returncode == 0, f"stderr: {res.stderr}"
assert "OK" in res.stdout
def test_ast_parser_c_works_when_instantiated() -> None:
res = _run_in_subprocess("""
from src.file_cache import ASTParser
p = ASTParser('c')
tree = p.parse('int main() { return 0; }')
print('OK')
""")
assert res.returncode == 0, f"stderr: {res.stderr}"
assert "OK" in res.stdout
def test_tree_sitter_loaded_only_after_init() -> None:
res = _run_in_subprocess("""
import sys
import src.file_cache
pre = 'tree_sitter' in sys.modules
from src.file_cache import ASTParser
mid = 'tree_sitter' in sys.modules
ASTParser('python')
post = 'tree_sitter' in sys.modules
print(pre, mid, post)
""")
assert res.returncode == 0, f"stderr: {res.stderr}"
lines = res.stdout.strip().splitlines()
assert lines[0].split()[0] == "False", f"tree_sitter leaked at import: {res.stdout}"
assert lines[0].split()[1] == "False", f"tree_sitter leaked at ASTParser import: {res.stdout}"
assert lines[0].split()[2] == "True", f"tree_sitter not loaded after ASTParser('python'): {res.stdout}"
def test_audit_sees_no_tree_sitter_violation_in_file_cache() -> None:
res = _run_in_subprocess("""
import ast
from pathlib import Path
tree = ast.parse(Path('src/file_cache.py').read_text(encoding='utf-8'))
heavy = {'tree_sitter', 'tree_sitter_python', 'tree_sitter_cpp', 'tree_sitter_c'}
for node in tree.body:
if isinstance(node, ast.Import):
for alias in node.names:
top = alias.name.split('.')[0]
if top in heavy:
print('VIOLATION:', alias.name)
print('OK')
""")
assert res.returncode == 0, f"stderr: {res.stderr}"
assert "VIOLATION" not in res.stdout, f"file_cache.py still has tree_sitter: {res.stdout}"
assert "OK" in res.stdout