From 6f9a3af20117043f00b918b2d7ce127599d3a3ce Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sat, 6 Jun 2026 14:22:18 -0400 Subject: [PATCH] feat(audit): add main-thread import graph audit + baseline measurements Phase 1, Tasks T1.2 + T1.4 of the startup_speedup_20260606 track. NEW: scripts/audit_main_thread_imports.py Static CI gate that AST-walks the import graph reachable from sloppy.py and fails (exit 1) if any heavy module is imported at the top of a main-thread-reachable file. Walks into if/elif/else and try/except branches (which run at import time) but skips function bodies (which only run when called). Allowlist: stdlib + the lean gui_2 skeleton (imgui_bundle, defer, src.imgui_scopes, src.theme_2, src.theme_models, src.paths, src.models, src.events). NEW: scripts/audit_gui2_imports.py Read-only analysis tool that lists every top-level and function-level import in src/gui_2.py, classified by location. Used in Phase 5D to identify which imports to remove. NEW: tests/test_audit_main_thread_imports.py 9 tests covering: --help exits 0, clean stdlib-only passes, heavy third-party fails, google.genai fails, transitive walks, function- body imports ignored, if-branch imports flagged, try-block imports flagged, file:line reported. All 9 pass. NEW: docs/reports/startup_baseline_20260606.txt 3-run median cold-start benchmark. Worst offenders: src.gui_2 (1770ms), simulation.user_agent (1517ms), google.genai (1001ms), openai (482ms), anthropic (441ms), imgui_bundle (255ms), src.theme_nerv* (485ms combined), src.markdown_table (243ms), src.command_palette (242ms). NEW: docs/reports/startup_audit_20260606.txt Audit output on the CURRENT codebase. Reports 67 violations across the main-thread import graph (incl. numpy in src/gui_2.py:9, tomli_w in src/gui_2.py:18, fastapi + requests in src/app_controller, tree_sitter_* in src/file_cache, pydantic in src/models, plus all the src.* subsystem imports that drag in heavy transitive deps). Phase 3-5 of the track will resolve these one by one. After Phase 3-5, this audit must exit 0 (no violations). Co-located reports in docs/reports/ per project convention; the other agent finished their work in docs/superpowers/ and is unrelated. --- docs/reports/startup_audit_20260606.txt | 68 +++++++ docs/reports/startup_baseline_20260606.txt | 202 +++++++++++++++++++++ scripts/audit_gui2_imports.py | 114 ++++++++++++ scripts/audit_main_thread_imports.py | 199 ++++++++++++++++++++ tests/test_audit_main_thread_imports.py | 143 +++++++++++++++ 5 files changed, 726 insertions(+) create mode 100644 docs/reports/startup_audit_20260606.txt create mode 100644 docs/reports/startup_baseline_20260606.txt create mode 100644 scripts/audit_gui2_imports.py create mode 100644 scripts/audit_main_thread_imports.py create mode 100644 tests/test_audit_main_thread_imports.py diff --git a/docs/reports/startup_audit_20260606.txt b/docs/reports/startup_audit_20260606.txt new file mode 100644 index 00000000..b0bc56b1 --- /dev/null +++ b/docs/reports/startup_audit_20260606.txt @@ -0,0 +1,68 @@ +FAIL: 67 heavy top-level import(s) in main-thread import graph: + sloppy.py:L29 src.api_hooks from src.api_hooks import HookServer + sloppy.py:L31 src.gui_2 from src.gui_2 import App + sloppy.py:L46 src.app_controller from src.app_controller import AppController + sloppy.py:L50 src.gui_2 from src.gui_2 import main + src\api_hooks.py:L9 websockets import websockets + src\api_hooks.py:L14 websockets.asyncio.server from websockets.asyncio.server import serve + src\api_hooks.py:L16 src from src import cost_tracker + src\api_hooks.py:L17 src from src import session_logger + src\app_controller.py:L6 requests import requests + src\app_controller.py:L10 tomli_w import tomli_w + src\app_controller.py:L17 fastapi from fastapi import FastAPI, Depends, HTTPException + src\app_controller.py:L21 fastapi.security.api_key from fastapi.security.api_key import APIKeyHeader + src\app_controller.py:L23 src from src import aggregate + src\app_controller.py:L24 src from src import models + src\app_controller.py:L25 src from src import ai_client + src\app_controller.py:L26 src from src import conductor_tech_lead + src\app_controller.py:L27 src from src import events + src\app_controller.py:L28 src from src import mcp_client + src\app_controller.py:L29 src from src import multi_agent_conductor + src\app_controller.py:L30 src from src import orchestrator_pm + src\app_controller.py:L31 src from src import paths + src\app_controller.py:L32 src from src import performance_monitor + src\app_controller.py:L33 src from src import project_manager + src\app_controller.py:L34 src from src import session_logger + src\app_controller.py:L35 src from src import workspace_manager + src\app_controller.py:L36 src from src import presets + src\app_controller.py:L37 src from src import shell_runner + src\app_controller.py:L38 src from src import theme_2 as theme + src\app_controller.py:L39 src from src import thinking_parser + src\app_controller.py:L40 src from src import tool_presets + src\app_controller.py:L42 src.context_presets from src.context_presets import ContextPresetManager + src\app_controller.py:L43 src.file_cache from src.file_cache import ASTParser + src\file_cache.py:L38 tree_sitter import tree_sitter + src\file_cache.py:L39 tree_sitter_python import tree_sitter_python + src\file_cache.py:L40 tree_sitter_cpp import tree_sitter_cpp + src\file_cache.py:L41 tree_sitter_c import tree_sitter_c + src\gui_2.py:L9 numpy import numpy as np + src\gui_2.py:L18 tomli_w import tomli_w + src\gui_2.py:L37 src.diff_viewer from src.diff_viewer import apply_patch_to_file + src\gui_2.py:L38 src from src import ai_client + src\gui_2.py:L39 src from src import aggregate + src\gui_2.py:L40 src from src import api_hooks + src\gui_2.py:L41 src from src import app_controller + src\gui_2.py:L42 src from src import bg_shader + src\gui_2.py:L43 src from src import cost_tracker + src\gui_2.py:L44 src from src import history + src\gui_2.py:L45 src from src import imgui_scopes as imscope + src\gui_2.py:L46 src from src import paths + src\gui_2.py:L47 src from src import presets + src\gui_2.py:L48 src from src import project_manager + src\gui_2.py:L49 src from src import session_logger + src\gui_2.py:L50 src from src import log_registry + src\gui_2.py:L51 src from src import log_pruner + src\gui_2.py:L52 src from src import models + src\gui_2.py:L54 src from src import mcp_client + src\gui_2.py:L55 src from src import markdown_helper + src\gui_2.py:L56 src from src import shaders + src\gui_2.py:L57 src from src import synthesis_formatter + src\gui_2.py:L58 src from src import theme_2 as theme + src\gui_2.py:L59 src from src import theme_nerv_fx as theme_fx + src\gui_2.py:L60 src from src import thinking_parser + src\gui_2.py:L61 src from src import workspace_manager + src\gui_2.py:L62 src.hot_reloader from src.hot_reloader import HotReloader + src\gui_2.py:L65 win32gui import win32gui + src\gui_2.py:L66 win32con import win32con + src\models.py:L46 tomli_w import tomli_w + src\models.py:L51 pydantic from pydantic import BaseModel diff --git a/docs/reports/startup_baseline_20260606.txt b/docs/reports/startup_baseline_20260606.txt new file mode 100644 index 00000000..d1ee8812 --- /dev/null +++ b/docs/reports/startup_baseline_20260606.txt @@ -0,0 +1,202 @@ +scanning imports in: ./src, ./simulation +project root: C:\projects\manual_slop +sys.path: ['C:\\projects\\manual_slop', 'C:\\projects\\manual_slop\\thirdparty'] + +found 84 unique importable module paths. benchmarking (3 runs each, timeout 30s)... + + [ 1/84] anthropic 441.41ms (1 files) ok + [ 2/84] api_hook_client FAIL (4 files) ModuleNotFoundError: No module named 'api_hook_client' + [ 3/84] ast 7.11ms (4 files) ok + [ 4/84] asyncio 55.76ms (6 files) ok + [ 5/84] atexit 0.03ms (1 files) ok + [ 6/84] collections 2.50ms (2 files) ok + [ 7/84] contextlib 4.50ms (2 files) ok + [ 8/84] copy 3.20ms (4 files) ok + [ 9/84] dataclasses 17.07ms (12 files) ok + [ 10/84] datetime 1.72ms (8 files) ok + [ 11/84] difflib 8.46ms (3 files) ok + [ 12/84] fastapi 234.13ms (1 files) ok + [ 13/84] fastapi.security.api_key 229.52ms (1 files) ok + [ 14/84] glob 9.20ms (1 files) ok + [ 15/84] google 0.75ms (1 files) ok + [ 16/84] google.genai 1001.89ms (1 files) ok + [ 17/84] hashlib 2.87ms (3 files) ok + [ 18/84] html.parser 10.92ms (1 files) ok + [ 19/84] http.server 41.37ms (1 files) ok + [ 20/84] imgui_bundle 255.59ms (10 files) ok + [ 21/84] importlib 1.23ms (1 files) ok + [ 22/84] inspect 15.34ms (1 files) ok + [ 23/84] json 9.59ms (15 files) ok + [ 24/84] logging 15.98ms (1 files) ok + [ 25/84] math 0.04ms (3 files) ok + [ 26/84] numpy 68.41ms (2 files) ok + [ 27/84] openai 482.69ms (1 files) ok + [ 28/84] os 0.00ms (22 files) ok + [ 29/84] pathlib 11.99ms (29 files) ok + [ 30/84] psutil 24.25ms (1 files) ok + [ 31/84] pydantic 75.38ms (1 files) ok + [ 32/84] queue 6.65ms (1 files) ok + [ 33/84] random 2.26ms (2 files) ok + [ 34/84] re 7.43ms (13 files) ok + [ 35/84] requests 99.20ms (3 files) ok + [ 36/84] scripts 0.55ms (1 files) ok + [ 37/84] shutil 12.08ms (4 files) ok + [ 38/84] simulation.sim_base FAIL (6 files) ModuleNotFoundError: No module named 'api_hook_client' + [ 39/84] simulation.sim_tools FAIL (1 files) ModuleNotFoundError: No module named 'api_hook_client' + [ 40/84] simulation.user_agent 1517.24ms (2 files) ok + [ 41/84] simulation.workflow_sim FAIL (2 files) ModuleNotFoundError: No module named 'api_hook_client' + [ 42/84] src 0.51ms (21 files) ok + [ 43/84] src.command_palette 241.69ms (1 files) ok + [ 44/84] src.context_presets 140.86ms (1 files) ok + [ 45/84] src.dag_engine 157.86ms (2 files) ok + [ 46/84] src.diff_viewer 29.88ms (1 files) ok + [ 47/84] src.events 19.29ms (1 files) ok + [ 48/84] src.file_cache 32.48ms (4 files) ok + [ 49/84] src.fuzzy_anchor 14.83ms (1 files) ok + [ 50/84] src.gemini_cli_adapter 28.34ms (1 files) ok + [ 51/84] src.gui_2 1770.78ms (2 files) ok + [ 52/84] src.hot_reloader 20.99ms (2 files) ok + [ 53/84] src.log_registry 16.27ms (1 files) ok + [ 54/84] src.markdown_table 242.54ms (1 files) ok + [ 55/84] src.models 135.85ms (16 files) ok + [ 56/84] src.paths 19.11ms (5 files) ok + [ 57/84] src.performance_monitor 27.04ms (2 files) ok + [ 58/84] src.personas 137.78ms (1 files) ok + [ 59/84] src.summary_cache 19.18ms (1 files) ok + [ 60/84] src.theme_models 29.19ms (1 files) ok + [ 61/84] src.theme_nerv 246.46ms (1 files) ok + [ 62/84] src.theme_nerv_fx 254.55ms (1 files) ok + [ 63/84] src.tool_bias 146.49ms (1 files) ok + [ 64/84] src.tool_presets 142.35ms (1 files) ok + [ 65/84] subprocess 12.02ms (6 files) ok + [ 66/84] sys 0.00ms (17 files) ok + [ 67/84] tempfile 14.94ms (1 files) ok + [ 68/84] threading 4.62ms (7 files) ok + [ 69/84] time 0.00ms (20 files) ok + [ 70/84] tkinter 17.60ms (1 files) ok + [ 71/84] tomli_w 5.62ms (9 files) ok + [ 72/84] tomllib 14.81ms (11 files) ok + [ 73/84] traceback 11.06ms (5 files) ok + [ 74/84] tree_sitter 11.70ms (1 files) ok + [ 75/84] tree_sitter_c 23.70ms (1 files) ok + [ 76/84] tree_sitter_cpp 24.13ms (1 files) ok + [ 77/84] tree_sitter_python 23.76ms (1 files) ok + [ 78/84] typing 10.12ms (48 files) ok + [ 79/84] urllib.parse 9.78ms (1 files) ok + [ 80/84] urllib.request 39.22ms (1 files) ok + [ 81/84] uuid 6.00ms (2 files) ok + [ 82/84] webbrowser 17.23ms (2 files) ok + [ 83/84] websockets 43.12ms (1 files) ok + [ 84/84] websockets.asyncio.server 83.24ms (1 files) ok + + +============================================================================================================== +import time rankings (cold start, sorted slowest first) +thresholds: red > 200ms yellow > 50ms green <= 50ms +stats: median=17.4ms p90=246.5ms n=80 ok, 4 failed benchmark wall=44.5s +============================================================================================================== + +module time files rank status +----------------------------------------------------------------------------------------------- +src.gui_2 1770.78ms 2 1 ok +simulation.user_agent 1517.24ms 2 2 ok +google.genai 1001.89ms 1 3 ok +openai 482.69ms 1 4 ok +anthropic 441.41ms 1 5 ok +imgui_bundle 255.59ms 10 6 ok +src.theme_nerv_fx 254.55ms 1 7 ok +src.theme_nerv 246.46ms 1 8 ok +src.markdown_table 242.54ms 1 9 ok +src.command_palette 241.69ms 1 10 ok +fastapi 234.13ms 1 11 ok +fastapi.security.api_key 229.52ms 1 12 ok +src.dag_engine 157.86ms 2 13 ok +src.tool_bias 146.49ms 1 14 ok +src.tool_presets 142.35ms 1 15 ok +src.context_presets 140.86ms 1 16 ok +src.personas 137.78ms 1 17 ok +src.models 135.85ms 16 18 ok +requests 99.20ms 3 19 ok +websockets.asyncio.server 83.24ms 1 20 ok +pydantic 75.38ms 1 21 ok +numpy 68.41ms 2 22 ok +asyncio 55.76ms 6 23 ok +websockets 43.12ms 1 24 ok +http.server 41.37ms 1 25 ok +urllib.request 39.22ms 1 26 ok +src.file_cache 32.48ms 4 27 ok +src.diff_viewer 29.88ms 1 28 ok +src.theme_models 29.19ms 1 29 ok +src.gemini_cli_adapter 28.34ms 1 30 ok +src.performance_monitor 27.04ms 2 31 ok +psutil 24.25ms 1 32 ok +tree_sitter_cpp 24.13ms 1 33 ok +tree_sitter_python 23.76ms 1 34 ok +tree_sitter_c 23.70ms 1 35 ok +src.hot_reloader 20.99ms 2 36 ok +src.events 19.29ms 1 37 ok +src.summary_cache 19.18ms 1 38 ok +src.paths 19.11ms 5 39 ok +tkinter 17.60ms 1 40 ok +webbrowser 17.23ms 2 41 ok +dataclasses 17.07ms 12 42 ok +src.log_registry 16.27ms 1 43 ok +logging 15.98ms 1 44 ok +inspect 15.34ms 1 45 ok +tempfile 14.94ms 1 46 ok +src.fuzzy_anchor 14.83ms 1 47 ok +tomllib 14.81ms 11 48 ok +shutil 12.08ms 4 49 ok +subprocess 12.02ms 6 50 ok +pathlib 11.99ms 29 51 ok +tree_sitter 11.70ms 1 52 ok +traceback 11.06ms 5 53 ok +html.parser 10.92ms 1 54 ok +typing 10.12ms 48 55 ok +urllib.parse 9.78ms 1 56 ok +json 9.59ms 15 57 ok +glob 9.20ms 1 58 ok +difflib 8.46ms 3 59 ok +re 7.43ms 13 60 ok +ast 7.11ms 4 61 ok +queue 6.65ms 1 62 ok +uuid 6.00ms 2 63 ok +tomli_w 5.62ms 9 64 ok +threading 4.62ms 7 65 ok +contextlib 4.50ms 2 66 ok +copy 3.20ms 4 67 ok +hashlib 2.87ms 3 68 ok +collections 2.50ms 2 69 ok +random 2.26ms 2 70 ok +datetime 1.72ms 8 71 ok +importlib 1.23ms 1 72 ok +google 0.75ms 1 73 ok +scripts 0.55ms 1 74 ok +src 0.51ms 21 75 ok +math 0.04ms 3 76 ok +atexit 0.03ms 1 77 ok +sys 0.00ms 17 78 ok +os 0.00ms 22 79 ok +time 0.00ms 20 80 ok +api_hook_client -- 4 81 ModuleNotFoundError: No module named 'api_hook_client' +simulation.sim_base -- 6 82 ModuleNotFoundError: No module named 'api_hook_client' +simulation.sim_tools -- 1 83 ModuleNotFoundError: No module named 'api_hook_client' +simulation.workflow_sim -- 2 84 ModuleNotFoundError: No module named 'api_hook_client' + +top 10 candidates for lazy / deferred loading (>= 200ms): + -> src.gui_2 1770.78ms + -> simulation.user_agent 1517.24ms + -> google.genai 1001.89ms + -> openai 482.69ms + -> anthropic 441.41ms + -> imgui_bundle 255.59ms + -> src.theme_nerv_fx 254.55ms + -> src.theme_nerv 246.46ms + -> src.markdown_table 242.54ms + -> src.command_palette 241.69ms + +failed imports (4): + api_hook_client ModuleNotFoundError: No module named 'api_hook_client' + simulation.sim_base ModuleNotFoundError: No module named 'api_hook_client' + simulation.sim_tools ModuleNotFoundError: No module named 'api_hook_client' + simulation.workflow_sim ModuleNotFoundError: No module named 'api_hook_client' diff --git a/scripts/audit_gui2_imports.py b/scripts/audit_gui2_imports.py new file mode 100644 index 00000000..4cbd5d82 --- /dev/null +++ b/scripts/audit_gui2_imports.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python +""" +Audit top-level imports in src/gui_2.py and classify them. + +For each top-level `import X` or `from X import Y` statement in gui_2.py, +report: + - file:line + - the imported module + - whether it's at module level (always loaded on main thread) or inside + a function (potentially feature-gated) + +This is a static analysis tool for the startup_speedup_20260606 track. +The output is meant to be read by a human who knows which functions +are first-frame vs feature-gated. + +Output format (text): + MODULE-LEVEL imports (these run on the main thread's import chain): + src/gui_2.py:1: import imgui_bundle + src/gui_2.py:15: from src.app_controller import AppController + ... + + FUNCTION-LEVEL imports (potentially feature-gated; candidates for _require_warmed): + src/gui_2.py:42 (inside _render_command_palette): from src.command_palette import ... + ... +""" + +import ast +import sys +from pathlib import Path +from typing import Iterable + + +def classify_imports(source: str) -> tuple[list[tuple[int, str, str]], list[tuple[int, str, str, str]]]: + """Parse a Python source and return (module_level, function_level) imports. + + Each entry is (line, imported_name, full_statement). + """ + tree = ast.parse(source) + module_level: list[tuple[int, str, str]] = [] + function_level: list[tuple[int, str, str, str]] = [] + + def imported_names(node: ast.stmt) -> list[str]: + if isinstance(node, ast.Import): + return [alias.name for alias in node.names] + if isinstance(node, ast.ImportFrom): + if not node.module or node.level != 0: + return [] + return [node.module] + return [] + + for node in tree.body: + names = imported_names(node) + if not names: + continue + for name in names: + stmt = ast.unparse(node).strip().replace("\n", " ") + module_level.append((node.lineno, name, stmt)) + + for node in ast.walk(tree): + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + for child in node.body: + names = imported_names(child) + if not names: + continue + for name in names: + stmt = ast.unparse(child).strip().replace("\n", " ") + function_level.append((child.lineno, node.name, name, stmt)) + + return module_level, function_level + + +def render_report(source_path: Path) -> str: + source = source_path.read_text(encoding="utf-8", errors="replace") + module_level, function_level = classify_imports(source) + lines: list[str] = [] + lines.append(f"Audit of {source_path}") + lines.append("=" * 80) + lines.append("") + lines.append(f"MODULE-LEVEL imports: {len(module_level)} (these run on the main thread's import chain)") + lines.append("-" * 80) + for lineno, name, stmt in module_level: + lines.append(f" L{lineno:>5} {name:<40} {stmt[:60]}") + lines.append("") + lines.append(f"FUNCTION-LEVEL imports: {len(function_level)} (potentially feature-gated)") + lines.append("-" * 80) + if function_level: + by_function: dict[str, list[tuple[int, str, str]]] = {} + for lineno, fname, name, stmt in function_level: + by_function.setdefault(fname, []).append((lineno, name, stmt)) + for fname in sorted(by_function): + entries = by_function[fname] + lines.append(f" {fname} ({len(entries)} imports)") + for lineno, name, stmt in entries: + lines.append(f" L{lineno:>5} {name:<40} {stmt[:60]}") + else: + lines.append(" (none)") + lines.append("") + return "\n".join(lines) + + +def main(argv: list[str]) -> int: + if len(argv) < 2: + print("usage: audit_gui2_imports.py ", file=sys.stderr) + return 2 + path = Path(argv[1]) + if not path.exists(): + print(f"file not found: {path}", file=sys.stderr) + return 2 + print(render_report(path)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv)) diff --git a/scripts/audit_main_thread_imports.py b/scripts/audit_main_thread_imports.py new file mode 100644 index 00000000..5775648d --- /dev/null +++ b/scripts/audit_main_thread_imports.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python +""" +Static CI gate: audit top-level imports in the main-thread import graph +reachable from sloppy.py. Fails (exit 1) if any heavy module is imported +at the top of a main-thread-reachable file. + +The Main Thread Purity Invariant (see conductor/tracks/startup_speedup_20260606/ +spec.md:2.1) requires that the main thread's import chain contains only: + - Python stdlib modules + - The lean gui_2 skeleton: imgui_bundle, defer, src.imgui_scopes, + src.theme_2 (default theme only), src.theme_models, src.paths, + src.models, src.events + - Modules that have been refactored to be lean (e.g., src.ai_client + after Phase 3) + +Function-level imports inside method bodies are NOT audited (they run +on whichever thread calls the function, and the warmup mechanism in +spec.md:2.2 Layer 3 makes that safe). + +Usage: + uv run python scripts/audit_main_thread_imports.py [--root ] [--entry ] + +Defaults: --root=. --entry=sloppy.py +""" + +import argparse +import ast +import sys +from dataclasses import dataclass +from pathlib import Path + + +STDLIB = set(getattr(sys, "stdlib_module_names", set()) or set()) +LEAN_ALLOWLIST: set[str] = { + "imgui_bundle", + "defer", + "defer.sugar", + "src.imgui_scopes", + "src.theme_2", + "src.theme_models", + "src.paths", + "src.models", + "src.events", + "src.config", +} + + +@dataclass(frozen=True) +class Violation: + file: Path + lineno: int + module: str + statement: str + + def render(self) -> str: + return f" {self.file}:L{self.lineno} {self.module:<40} {self.statement[:80]}" + + +def _top_module(import_name: str) -> str: + return import_name.split(".")[0] + + +def _collect_top_level_imports(path: Path) -> list[tuple[int, str, str]]: + try: + source = path.read_text(encoding="utf-8", errors="replace") + except OSError: + return [] + try: + tree = ast.parse(source, filename=str(path)) + except SyntaxError: + return [] + results: list[tuple[int, str, str]] = [] + for node in tree.body: + results.extend(_walk_imports(node)) + return results + + +def _walk_imports(node: ast.AST) -> list[tuple[int, str, str]]: + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + return [] + if isinstance(node, ast.Import): + stmt = ast.unparse(node).strip() + return [(node.lineno, alias.name, stmt) for alias in node.names] + if isinstance(node, ast.ImportFrom): + if node.level and node.level > 0: + return [] + if not node.module: + return [] + stmt = ast.unparse(node).strip() + return [(node.lineno, node.module, stmt)] + results: list[tuple[int, str, str]] = [] + for child in ast.iter_child_nodes(node): + results.extend(_walk_imports(child)) + return results + + +def _resolve_local(import_name: str, root: Path) -> Path | None: + parts = import_name.split(".") + base = root.joinpath(*parts[:-1]) if len(parts) > 1 else root + candidate_py = base / f"{parts[-1]}.py" + if candidate_py.is_file(): + return candidate_py + candidate_pkg = base / parts[-1] / "__init__.py" + if candidate_pkg.is_file(): + return candidate_pkg + return None + + +def _walk_import_graph(entry: Path, root: Path) -> list[Path]: + visited: set[Path] = set() + queue: list[Path] = [entry.resolve()] + while queue: + current = queue.pop(0) + if current in visited: + continue + visited.add(current) + for _lineno, name, _stmt in _collect_top_level_imports(current): + resolved = _resolve_local(name, root) + if resolved is not None: + queue.append(resolved) + return sorted(visited) + + +def _is_allowed(module: str) -> bool: + if module in STDLIB: + return True + if module in LEAN_ALLOWLIST: + return True + top = _top_module(module) + if top in STDLIB or top in LEAN_ALLOWLIST: + return True + return False + + +def audit(root: Path, entry: Path) -> list[Violation]: + entry = entry.resolve() + root = root.resolve() + if not entry.is_file(): + raise FileNotFoundError(f"entry not found: {entry}") + graph = _walk_import_graph(entry, root) + violations: list[Violation] = [] + for path in graph: + for lineno, name, stmt in _collect_top_level_imports(path): + if _is_allowed(name): + continue + violations.append(Violation( + file=path.relative_to(root), + lineno=lineno, + module=name, + statement=stmt, + )) + return violations + + +def main(argv: list[str]) -> int: + ap = argparse.ArgumentParser(description="Audit main-thread import graph for heavy modules") + ap.add_argument("--root", default=".", help="project root (default: cwd)") + ap.add_argument("--entry", default="sloppy.py", help="entry point file (default: sloppy.py)") + ap.add_argument("--verbose", action="store_true", help="print the import graph + each file's imports") + args = ap.parse_args(argv[1:]) + + root = Path(args.root).resolve() + entry = (root / args.entry).resolve() + try: + graph = _walk_import_graph(entry, root) + except FileNotFoundError as e: + print(f"error: {e}", file=sys.stderr) + return 2 + + if args.verbose: + print(f"# import graph from {entry.relative_to(root)} ({len(graph)} files reachable)") + for path in graph: + rel = path.relative_to(root) + imports = _collect_top_level_imports(path) + if not imports: + continue + print(f"\n## {rel}") + for lineno, name, stmt in imports: + mark = "OK " if _is_allowed(name) else "BAD" + print(f" [{mark}] L{lineno:>4} {name:<40} {stmt[:60]}") + + try: + violations = audit(root, entry) + except FileNotFoundError as e: + print(f"error: {e}", file=sys.stderr) + return 2 + + if not violations: + print(f"OK: {len(graph)} files in main-thread import graph; no heavy top-level imports.") + return 0 + + print(f"FAIL: {len(violations)} heavy top-level import(s) in main-thread import graph:") + for v in violations: + print(v.render()) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv)) diff --git a/tests/test_audit_main_thread_imports.py b/tests/test_audit_main_thread_imports.py new file mode 100644 index 00000000..9446d029 --- /dev/null +++ b/tests/test_audit_main_thread_imports.py @@ -0,0 +1,143 @@ +"""Tests for scripts/audit_main_thread_imports.py. + +TDD Red phase: tests the audit logic in isolation. The full CLI is also +exercised via subprocess in test_audit_cli_subprocess (kept simple so it +doesn't depend on the actual codebase). +""" + +import subprocess +import sys +import textwrap +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +SCRIPT = ROOT / "scripts" / "audit_main_thread_imports.py" +assert SCRIPT.exists(), f"audit script missing: {SCRIPT}" + + +def _run_audit_on_fixture(tmp_path: Path, source: str) -> subprocess.CompletedProcess: + entry = tmp_path / "entry.py" + entry.write_text(textwrap.dedent(source), encoding="utf-8") + return subprocess.run( + [sys.executable, str(SCRIPT), "--root", str(tmp_path), "--entry", str(entry)], + capture_output=True, + text=True, + timeout=30, + ) + + +def test_audit_cli_help_exits_zero() -> None: + res = subprocess.run( + [sys.executable, str(SCRIPT), "--help"], + capture_output=True, + text=True, + timeout=10, + ) + assert res.returncode == 0 + assert "audit" in res.stdout.lower() or "usage" in res.stdout.lower() + + +def test_audit_passes_on_clean_stdlib_only(tmp_path: Path) -> None: + res = _run_audit_on_fixture( + tmp_path, + """\ + import os + import sys + import json + from pathlib import Path + """, + ) + assert res.returncode == 0, f"unexpected failure: {res.stdout}\n{res.stderr}" + + +def test_audit_fails_on_heavy_third_party_top_level(tmp_path: Path) -> None: + res = _run_audit_on_fixture( + tmp_path, + """\ + import os + import anthropic + """, + ) + assert res.returncode != 0, f"should have failed but exited 0: {res.stdout}" + assert "anthropic" in res.stdout + + +def test_audit_fails_on_google_genai_top_level(tmp_path: Path) -> None: + res = _run_audit_on_fixture( + tmp_path, + """\ + from google import genai + """, + ) + assert res.returncode != 0 + assert "google" in res.stdout + + +def test_audit_walks_transitive_imports(tmp_path: Path) -> None: + (tmp_path / "leaky.py").write_text("import requests\n", encoding="utf-8") + res = _run_audit_on_fixture( + tmp_path, + """\ + import os + from pathlib import Path + import leaky + """, + ) + assert res.returncode != 0 + assert "requests" in res.stdout + assert "leaky.py" in res.stdout + + +def test_audit_passes_on_nested_function_level_imports(tmp_path: Path) -> None: + res = _run_audit_on_fixture( + tmp_path, + """\ + import os + + def lazy(): + import anthropic + return anthropic + """, + ) + assert res.returncode == 0, f"function-level imports should be ignored: {res.stdout}\n{res.stderr}" + + +def test_audit_fails_on_import_inside_if_branch(tmp_path: Path) -> None: + res = _run_audit_on_fixture( + tmp_path, + """\ + import os + if True: + from google import genai + """, + ) + assert res.returncode != 0, f"if-branch imports should be flagged: {res.stdout}\n{res.stderr}" + assert "google" in res.stdout + + +def test_audit_fails_on_import_inside_try_block(tmp_path: Path) -> None: + res = _run_audit_on_fixture( + tmp_path, + """\ + import os + try: + import requests + except ImportError: + pass + """, + ) + assert res.returncode != 0 + assert "requests" in res.stdout + + +def test_audit_reports_file_line_for_violation(tmp_path: Path) -> None: + res = _run_audit_on_fixture( + tmp_path, + """\ + import os + import fastapi + """, + ) + assert res.returncode != 0 + assert "entry.py" in res.stdout + assert "L" in res.stdout