feat(audit): add main-thread import graph audit + baseline measurements
Phase 1, Tasks T1.2 + T1.4 of the startup_speedup_20260606 track. NEW: scripts/audit_main_thread_imports.py Static CI gate that AST-walks the import graph reachable from sloppy.py and fails (exit 1) if any heavy module is imported at the top of a main-thread-reachable file. Walks into if/elif/else and try/except branches (which run at import time) but skips function bodies (which only run when called). Allowlist: stdlib + the lean gui_2 skeleton (imgui_bundle, defer, src.imgui_scopes, src.theme_2, src.theme_models, src.paths, src.models, src.events). NEW: scripts/audit_gui2_imports.py Read-only analysis tool that lists every top-level and function-level import in src/gui_2.py, classified by location. Used in Phase 5D to identify which imports to remove. NEW: tests/test_audit_main_thread_imports.py 9 tests covering: --help exits 0, clean stdlib-only passes, heavy third-party fails, google.genai fails, transitive walks, function- body imports ignored, if-branch imports flagged, try-block imports flagged, file:line reported. All 9 pass. NEW: docs/reports/startup_baseline_20260606.txt 3-run median cold-start benchmark. Worst offenders: src.gui_2 (1770ms), simulation.user_agent (1517ms), google.genai (1001ms), openai (482ms), anthropic (441ms), imgui_bundle (255ms), src.theme_nerv* (485ms combined), src.markdown_table (243ms), src.command_palette (242ms). NEW: docs/reports/startup_audit_20260606.txt Audit output on the CURRENT codebase. Reports 67 violations across the main-thread import graph (incl. numpy in src/gui_2.py:9, tomli_w in src/gui_2.py:18, fastapi + requests in src/app_controller, tree_sitter_* in src/file_cache, pydantic in src/models, plus all the src.* subsystem imports that drag in heavy transitive deps). Phase 3-5 of the track will resolve these one by one. After Phase 3-5, this audit must exit 0 (no violations). Co-located reports in docs/reports/ per project convention; the other agent finished their work in docs/superpowers/ and is unrelated.
This commit is contained in:
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Audit top-level imports in src/gui_2.py and classify them.
|
||||
|
||||
For each top-level `import X` or `from X import Y` statement in gui_2.py,
|
||||
report:
|
||||
- file:line
|
||||
- the imported module
|
||||
- whether it's at module level (always loaded on main thread) or inside
|
||||
a function (potentially feature-gated)
|
||||
|
||||
This is a static analysis tool for the startup_speedup_20260606 track.
|
||||
The output is meant to be read by a human who knows which functions
|
||||
are first-frame vs feature-gated.
|
||||
|
||||
Output format (text):
|
||||
MODULE-LEVEL imports (these run on the main thread's import chain):
|
||||
src/gui_2.py:1: import imgui_bundle
|
||||
src/gui_2.py:15: from src.app_controller import AppController
|
||||
...
|
||||
|
||||
FUNCTION-LEVEL imports (potentially feature-gated; candidates for _require_warmed):
|
||||
src/gui_2.py:42 (inside _render_command_palette): from src.command_palette import ...
|
||||
...
|
||||
"""
|
||||
|
||||
import ast
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
def classify_imports(source: str) -> tuple[list[tuple[int, str, str]], list[tuple[int, str, str, str]]]:
|
||||
"""Parse a Python source and return (module_level, function_level) imports.
|
||||
|
||||
Each entry is (line, imported_name, full_statement).
|
||||
"""
|
||||
tree = ast.parse(source)
|
||||
module_level: list[tuple[int, str, str]] = []
|
||||
function_level: list[tuple[int, str, str, str]] = []
|
||||
|
||||
def imported_names(node: ast.stmt) -> list[str]:
|
||||
if isinstance(node, ast.Import):
|
||||
return [alias.name for alias in node.names]
|
||||
if isinstance(node, ast.ImportFrom):
|
||||
if not node.module or node.level != 0:
|
||||
return []
|
||||
return [node.module]
|
||||
return []
|
||||
|
||||
for node in tree.body:
|
||||
names = imported_names(node)
|
||||
if not names:
|
||||
continue
|
||||
for name in names:
|
||||
stmt = ast.unparse(node).strip().replace("\n", " ")
|
||||
module_level.append((node.lineno, name, stmt))
|
||||
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||
for child in node.body:
|
||||
names = imported_names(child)
|
||||
if not names:
|
||||
continue
|
||||
for name in names:
|
||||
stmt = ast.unparse(child).strip().replace("\n", " ")
|
||||
function_level.append((child.lineno, node.name, name, stmt))
|
||||
|
||||
return module_level, function_level
|
||||
|
||||
|
||||
def render_report(source_path: Path) -> str:
|
||||
source = source_path.read_text(encoding="utf-8", errors="replace")
|
||||
module_level, function_level = classify_imports(source)
|
||||
lines: list[str] = []
|
||||
lines.append(f"Audit of {source_path}")
|
||||
lines.append("=" * 80)
|
||||
lines.append("")
|
||||
lines.append(f"MODULE-LEVEL imports: {len(module_level)} (these run on the main thread's import chain)")
|
||||
lines.append("-" * 80)
|
||||
for lineno, name, stmt in module_level:
|
||||
lines.append(f" L{lineno:>5} {name:<40} {stmt[:60]}")
|
||||
lines.append("")
|
||||
lines.append(f"FUNCTION-LEVEL imports: {len(function_level)} (potentially feature-gated)")
|
||||
lines.append("-" * 80)
|
||||
if function_level:
|
||||
by_function: dict[str, list[tuple[int, str, str]]] = {}
|
||||
for lineno, fname, name, stmt in function_level:
|
||||
by_function.setdefault(fname, []).append((lineno, name, stmt))
|
||||
for fname in sorted(by_function):
|
||||
entries = by_function[fname]
|
||||
lines.append(f" {fname} ({len(entries)} imports)")
|
||||
for lineno, name, stmt in entries:
|
||||
lines.append(f" L{lineno:>5} {name:<40} {stmt[:60]}")
|
||||
else:
|
||||
lines.append(" (none)")
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
if len(argv) < 2:
|
||||
print("usage: audit_gui2_imports.py <path-to-gui_2.py>", file=sys.stderr)
|
||||
return 2
|
||||
path = Path(argv[1])
|
||||
if not path.exists():
|
||||
print(f"file not found: {path}", file=sys.stderr)
|
||||
return 2
|
||||
print(render_report(path))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main(sys.argv))
|
||||
@@ -0,0 +1,199 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Static CI gate: audit top-level imports in the main-thread import graph
|
||||
reachable from sloppy.py. Fails (exit 1) if any heavy module is imported
|
||||
at the top of a main-thread-reachable file.
|
||||
|
||||
The Main Thread Purity Invariant (see conductor/tracks/startup_speedup_20260606/
|
||||
spec.md:2.1) requires that the main thread's import chain contains only:
|
||||
- Python stdlib modules
|
||||
- The lean gui_2 skeleton: imgui_bundle, defer, src.imgui_scopes,
|
||||
src.theme_2 (default theme only), src.theme_models, src.paths,
|
||||
src.models, src.events
|
||||
- Modules that have been refactored to be lean (e.g., src.ai_client
|
||||
after Phase 3)
|
||||
|
||||
Function-level imports inside method bodies are NOT audited (they run
|
||||
on whichever thread calls the function, and the warmup mechanism in
|
||||
spec.md:2.2 Layer 3 makes that safe).
|
||||
|
||||
Usage:
|
||||
uv run python scripts/audit_main_thread_imports.py [--root <path>] [--entry <file>]
|
||||
|
||||
Defaults: --root=. --entry=sloppy.py
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
STDLIB = set(getattr(sys, "stdlib_module_names", set()) or set())
|
||||
LEAN_ALLOWLIST: set[str] = {
|
||||
"imgui_bundle",
|
||||
"defer",
|
||||
"defer.sugar",
|
||||
"src.imgui_scopes",
|
||||
"src.theme_2",
|
||||
"src.theme_models",
|
||||
"src.paths",
|
||||
"src.models",
|
||||
"src.events",
|
||||
"src.config",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Violation:
|
||||
file: Path
|
||||
lineno: int
|
||||
module: str
|
||||
statement: str
|
||||
|
||||
def render(self) -> str:
|
||||
return f" {self.file}:L{self.lineno} {self.module:<40} {self.statement[:80]}"
|
||||
|
||||
|
||||
def _top_module(import_name: str) -> str:
|
||||
return import_name.split(".")[0]
|
||||
|
||||
|
||||
def _collect_top_level_imports(path: Path) -> list[tuple[int, str, str]]:
|
||||
try:
|
||||
source = path.read_text(encoding="utf-8", errors="replace")
|
||||
except OSError:
|
||||
return []
|
||||
try:
|
||||
tree = ast.parse(source, filename=str(path))
|
||||
except SyntaxError:
|
||||
return []
|
||||
results: list[tuple[int, str, str]] = []
|
||||
for node in tree.body:
|
||||
results.extend(_walk_imports(node))
|
||||
return results
|
||||
|
||||
|
||||
def _walk_imports(node: ast.AST) -> list[tuple[int, str, str]]:
|
||||
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||
return []
|
||||
if isinstance(node, ast.Import):
|
||||
stmt = ast.unparse(node).strip()
|
||||
return [(node.lineno, alias.name, stmt) for alias in node.names]
|
||||
if isinstance(node, ast.ImportFrom):
|
||||
if node.level and node.level > 0:
|
||||
return []
|
||||
if not node.module:
|
||||
return []
|
||||
stmt = ast.unparse(node).strip()
|
||||
return [(node.lineno, node.module, stmt)]
|
||||
results: list[tuple[int, str, str]] = []
|
||||
for child in ast.iter_child_nodes(node):
|
||||
results.extend(_walk_imports(child))
|
||||
return results
|
||||
|
||||
|
||||
def _resolve_local(import_name: str, root: Path) -> Path | None:
|
||||
parts = import_name.split(".")
|
||||
base = root.joinpath(*parts[:-1]) if len(parts) > 1 else root
|
||||
candidate_py = base / f"{parts[-1]}.py"
|
||||
if candidate_py.is_file():
|
||||
return candidate_py
|
||||
candidate_pkg = base / parts[-1] / "__init__.py"
|
||||
if candidate_pkg.is_file():
|
||||
return candidate_pkg
|
||||
return None
|
||||
|
||||
|
||||
def _walk_import_graph(entry: Path, root: Path) -> list[Path]:
|
||||
visited: set[Path] = set()
|
||||
queue: list[Path] = [entry.resolve()]
|
||||
while queue:
|
||||
current = queue.pop(0)
|
||||
if current in visited:
|
||||
continue
|
||||
visited.add(current)
|
||||
for _lineno, name, _stmt in _collect_top_level_imports(current):
|
||||
resolved = _resolve_local(name, root)
|
||||
if resolved is not None:
|
||||
queue.append(resolved)
|
||||
return sorted(visited)
|
||||
|
||||
|
||||
def _is_allowed(module: str) -> bool:
|
||||
if module in STDLIB:
|
||||
return True
|
||||
if module in LEAN_ALLOWLIST:
|
||||
return True
|
||||
top = _top_module(module)
|
||||
if top in STDLIB or top in LEAN_ALLOWLIST:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def audit(root: Path, entry: Path) -> list[Violation]:
|
||||
entry = entry.resolve()
|
||||
root = root.resolve()
|
||||
if not entry.is_file():
|
||||
raise FileNotFoundError(f"entry not found: {entry}")
|
||||
graph = _walk_import_graph(entry, root)
|
||||
violations: list[Violation] = []
|
||||
for path in graph:
|
||||
for lineno, name, stmt in _collect_top_level_imports(path):
|
||||
if _is_allowed(name):
|
||||
continue
|
||||
violations.append(Violation(
|
||||
file=path.relative_to(root),
|
||||
lineno=lineno,
|
||||
module=name,
|
||||
statement=stmt,
|
||||
))
|
||||
return violations
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
ap = argparse.ArgumentParser(description="Audit main-thread import graph for heavy modules")
|
||||
ap.add_argument("--root", default=".", help="project root (default: cwd)")
|
||||
ap.add_argument("--entry", default="sloppy.py", help="entry point file (default: sloppy.py)")
|
||||
ap.add_argument("--verbose", action="store_true", help="print the import graph + each file's imports")
|
||||
args = ap.parse_args(argv[1:])
|
||||
|
||||
root = Path(args.root).resolve()
|
||||
entry = (root / args.entry).resolve()
|
||||
try:
|
||||
graph = _walk_import_graph(entry, root)
|
||||
except FileNotFoundError as e:
|
||||
print(f"error: {e}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
if args.verbose:
|
||||
print(f"# import graph from {entry.relative_to(root)} ({len(graph)} files reachable)")
|
||||
for path in graph:
|
||||
rel = path.relative_to(root)
|
||||
imports = _collect_top_level_imports(path)
|
||||
if not imports:
|
||||
continue
|
||||
print(f"\n## {rel}")
|
||||
for lineno, name, stmt in imports:
|
||||
mark = "OK " if _is_allowed(name) else "BAD"
|
||||
print(f" [{mark}] L{lineno:>4} {name:<40} {stmt[:60]}")
|
||||
|
||||
try:
|
||||
violations = audit(root, entry)
|
||||
except FileNotFoundError as e:
|
||||
print(f"error: {e}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
if not violations:
|
||||
print(f"OK: {len(graph)} files in main-thread import graph; no heavy top-level imports.")
|
||||
return 0
|
||||
|
||||
print(f"FAIL: {len(violations)} heavy top-level import(s) in main-thread import graph:")
|
||||
for v in violations:
|
||||
print(v.render())
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main(sys.argv))
|
||||
Reference in New Issue
Block a user