refactor(file_cache): remove top-level tree_sitter* imports; lazy via _require_warmed + TYPE_CHECKING
Sub-track 2B: 4 violations cleared. Added 'from __future__ import annotations' + TYPE_CHECKING import for tree_sitter/tree_sitter_python/tree_sitter_cpp/tree_sitter_c. Runtime access via _require_warmed() in ASTParser.__init__. 6 new tests in tests/test_file_cache_no_top_level_tree_sitter.py. All 25 tests pass (6 new + 19 existing).
This commit is contained in:
+15
-11
@@ -34,15 +34,17 @@ See Also:
|
||||
- docs/guide_tools.md for AST tool documentation
|
||||
- src/summarize.py for heuristic summaries
|
||||
"""
|
||||
import re
|
||||
import tree_sitter
|
||||
import tree_sitter_python
|
||||
import tree_sitter_cpp
|
||||
import tree_sitter_c
|
||||
from __future__ import annotations
|
||||
|
||||
# TODO(Ed): Eliminate these?
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional, Any, List, Tuple, Dict
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import tree_sitter
|
||||
import tree_sitter_python
|
||||
import tree_sitter_cpp
|
||||
import tree_sitter_c
|
||||
|
||||
|
||||
_ast_cache: Dict[str, Tuple[float, tree_sitter.Tree]] = {}
|
||||
@@ -63,10 +65,12 @@ class ASTParser:
|
||||
raise ValueError(f"Language '{language}' not supported yet.")
|
||||
self.language_name = language
|
||||
# Load the tree-sitter language grammar
|
||||
if language == "python": self.language = tree_sitter.Language(tree_sitter_python.language())
|
||||
elif language == "cpp": self.language = tree_sitter.Language(tree_sitter_cpp.language())
|
||||
elif language == "c": self.language = tree_sitter.Language(tree_sitter_c.language())
|
||||
self.parser = tree_sitter.Parser(self.language)
|
||||
from src.module_loader import _require_warmed
|
||||
ts = _require_warmed("tree_sitter")
|
||||
if language == "python": self.language = ts.Language(_require_warmed("tree_sitter_python").language())
|
||||
elif language == "cpp": self.language = ts.Language(_require_warmed("tree_sitter_cpp").language())
|
||||
elif language == "c": self.language = ts.Language(_require_warmed("tree_sitter_c").language())
|
||||
self.parser = ts.Parser(self.language)
|
||||
|
||||
def parse(self, code: str) -> tree_sitter.Tree:
|
||||
"""
|
||||
|
||||
@@ -0,0 +1,113 @@
|
||||
"""Tests that src/file_cache.py has NO top-level tree_sitter* imports.
|
||||
|
||||
Per the Main Thread Purity Invariant, the four tree_sitter modules
|
||||
(tree_sitter, tree_sitter_python, tree_sitter_cpp, tree_sitter_c) are
|
||||
heavy and must not appear in the main-thread import chain. They are
|
||||
loaded lazily via _require_warmed() inside ASTParser.__init__ and via
|
||||
TYPE_CHECKING for type-hint purposes.
|
||||
|
||||
These tests run in a fresh subprocess to ensure no warmup state leaks
|
||||
from the test runner.
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def _run_in_subprocess(snippet: str) -> subprocess.CompletedProcess:
|
||||
script = textwrap.dedent(snippet)
|
||||
return subprocess.run(
|
||||
[sys.executable, "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=str(ROOT),
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
|
||||
def test_file_cache_does_not_import_tree_sitter_at_module_level() -> None:
|
||||
res = _run_in_subprocess("""
|
||||
import sys
|
||||
import src.file_cache
|
||||
for mod in ('tree_sitter', 'tree_sitter_python', 'tree_sitter_cpp', 'tree_sitter_c'):
|
||||
print(mod, mod in sys.modules)
|
||||
""")
|
||||
assert res.returncode == 0, f"stderr: {res.stderr}"
|
||||
lines = res.stdout.strip().splitlines()
|
||||
for line in lines:
|
||||
name, present = line.split()
|
||||
assert present == "False", f"src.file_cache triggered {name} import: {res.stdout}"
|
||||
|
||||
|
||||
def test_ast_parser_python_works_when_instantiated() -> None:
|
||||
res = _run_in_subprocess("""
|
||||
from src.file_cache import ASTParser
|
||||
p = ASTParser('python')
|
||||
tree = p.parse('def foo(): pass')
|
||||
print(type(tree).__module__.startswith('tree_sitter'))
|
||||
""")
|
||||
assert res.returncode == 0, f"stderr: {res.stderr}"
|
||||
assert res.stdout.strip() == "True", f"ASTParser('python') did not produce a tree_sitter.Tree: {res.stdout}"
|
||||
|
||||
|
||||
def test_ast_parser_cpp_works_when_instantiated() -> None:
|
||||
res = _run_in_subprocess("""
|
||||
from src.file_cache import ASTParser
|
||||
p = ASTParser('cpp')
|
||||
tree = p.parse('int main() { return 0; }')
|
||||
print('OK')
|
||||
""")
|
||||
assert res.returncode == 0, f"stderr: {res.stderr}"
|
||||
assert "OK" in res.stdout
|
||||
|
||||
|
||||
def test_ast_parser_c_works_when_instantiated() -> None:
|
||||
res = _run_in_subprocess("""
|
||||
from src.file_cache import ASTParser
|
||||
p = ASTParser('c')
|
||||
tree = p.parse('int main() { return 0; }')
|
||||
print('OK')
|
||||
""")
|
||||
assert res.returncode == 0, f"stderr: {res.stderr}"
|
||||
assert "OK" in res.stdout
|
||||
|
||||
|
||||
def test_tree_sitter_loaded_only_after_init() -> None:
|
||||
res = _run_in_subprocess("""
|
||||
import sys
|
||||
import src.file_cache
|
||||
pre = 'tree_sitter' in sys.modules
|
||||
from src.file_cache import ASTParser
|
||||
mid = 'tree_sitter' in sys.modules
|
||||
ASTParser('python')
|
||||
post = 'tree_sitter' in sys.modules
|
||||
print(pre, mid, post)
|
||||
""")
|
||||
assert res.returncode == 0, f"stderr: {res.stderr}"
|
||||
lines = res.stdout.strip().splitlines()
|
||||
assert lines[0].split()[0] == "False", f"tree_sitter leaked at import: {res.stdout}"
|
||||
assert lines[0].split()[1] == "False", f"tree_sitter leaked at ASTParser import: {res.stdout}"
|
||||
assert lines[0].split()[2] == "True", f"tree_sitter not loaded after ASTParser('python'): {res.stdout}"
|
||||
|
||||
|
||||
def test_audit_sees_no_tree_sitter_violation_in_file_cache() -> None:
|
||||
res = _run_in_subprocess("""
|
||||
import ast
|
||||
from pathlib import Path
|
||||
tree = ast.parse(Path('src/file_cache.py').read_text(encoding='utf-8'))
|
||||
heavy = {'tree_sitter', 'tree_sitter_python', 'tree_sitter_cpp', 'tree_sitter_c'}
|
||||
for node in tree.body:
|
||||
if isinstance(node, ast.Import):
|
||||
for alias in node.names:
|
||||
top = alias.name.split('.')[0]
|
||||
if top in heavy:
|
||||
print('VIOLATION:', alias.name)
|
||||
print('OK')
|
||||
""")
|
||||
assert res.returncode == 0, f"stderr: {res.stderr}"
|
||||
assert "VIOLATION" not in res.stdout, f"file_cache.py still has tree_sitter: {res.stdout}"
|
||||
assert "OK" in res.stdout
|
||||
Reference in New Issue
Block a user