diff --git a/scripts/benchmark_imports.py b/scripts/benchmark_imports.py new file mode 100644 index 00000000..81d35873 --- /dev/null +++ b/scripts/benchmark_imports.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python +""" +benchmark cold-start import time for every top-level import in src/*.py and simulation/*.py. + +spawns a fresh python subprocess per import, mimicking the cold start of sloppy.py, +and prints a sorted, color-coded listing with outliers highlighted. + +usage: uv run python scripts/benchmark_imports.py [--runs N] [--timeout SEC] [--top N] +""" + +import argparse +import ast +import os +import subprocess +import sys +import time +from collections import defaultdict +from pathlib import Path +from statistics import median +from typing import Iterable + +GREEN = "\033[32m" +YELLOW = "\033[33m" +RED = "\033[31m" +BOLD = "\033[1m" +DIM = "\033[2m" +RESET = "\033[0m" + +DEFAULT_SCAN_DIRS = ("./src", "./simulation") +DEFAULT_RUNS = 3 +DEFAULT_TIMEOUT = 30 +DEFAULT_TOP = 10 +DEFAULT_SLOW_MS = 200.0 +DEFAULT_MODERATE_MS = 50.0 + + +def gather_imports(scan_dirs: Iterable[str]) -> dict[str, list[str]]: + imports: dict[str, set[str]] = defaultdict(set) + for scan_dir in scan_dirs: + for py_file in Path(scan_dir).rglob("*.py"): + try: + tree = ast.parse(py_file.read_text(encoding="utf-8", errors="replace")) + except (SyntaxError, OSError): + continue + for node in tree.body: + if isinstance(node, ast.Import): + for alias in node.names: + if alias.name == "__future__": + continue + imports[alias.name].add(str(py_file)) + elif isinstance(node, ast.ImportFrom): + if not node.module or node.level != 0: + continue + if node.module == "__future__": + continue + imports[node.module].add(str(py_file)) + return {k: sorted(v) for k, v in imports.items()} + + +def measure_import(module: str, sys_path: list[str], runs: int, timeout: int) -> tuple[float, str]: + times: list[float] = [] + last_err = "no runs" + path_setup = ";".join(f"sys.path.insert(0, {p!r})" for p in sys_path) + for _ in range(runs): + script = ( + "import sys, time;" + + path_setup + ";" + + f"t=time.perf_counter();" + + f"__import__({module!r});" + + f"print(time.perf_counter()-t)" + ) + try: + result = subprocess.run( + [sys.executable, "-c", script], + capture_output=True, + text=True, + timeout=timeout, + ) + except subprocess.TimeoutExpired: + last_err = f"timeout>{timeout}s" + continue + if result.returncode != 0: + err_lines = (result.stderr or "").strip().splitlines() + last_err = (err_lines[-1] if err_lines else "non-zero exit")[:120] + continue + try: + times.append(float((result.stdout or "").strip())) + except ValueError: + last_err = f"parse: {(result.stdout or '').strip()[:80]}" + if not times: + return (float("inf"), last_err) + return (median(times), "ok") + + +def color_for(t: float, slow_ms: float, moderate_ms: float) -> str: + if t == float("inf"): + return DIM + if t * 1000 > slow_ms: + return RED + if t * 1000 > moderate_ms: + return YELLOW + return GREEN + + +def main() -> int: + ap = argparse.ArgumentParser(description="Benchmark cold-start import times for src/ and simulation/ files") + ap.add_argument("--runs", type=int, default=DEFAULT_RUNS, help=f"subprocess runs per import (default {DEFAULT_RUNS})") + ap.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT, help=f"per-subprocess timeout in seconds (default {DEFAULT_TIMEOUT})") + ap.add_argument("--top", type=int, default=DEFAULT_TOP, help=f"top-N recommendations to list (default {DEFAULT_TOP})") + ap.add_argument("--slow-ms", type=float, default=DEFAULT_SLOW_MS, help=f"slow threshold in ms (default {DEFAULT_SLOW_MS})") + ap.add_argument("--moderate-ms", type=float, default=DEFAULT_MODERATE_MS, help=f"moderate threshold in ms (default {DEFAULT_MODERATE_MS})") + ap.add_argument("--no-color", action="store_true", help="disable ANSI color output (deprecated, prefer --color=never)") + ap.add_argument("--color", choices=("auto", "always", "never"), default="auto", help="color output mode (default auto: TTY only)") + ap.add_argument("--scan-dir", action="append", default=None, help="additional scan directory (repeatable)") + args = ap.parse_args() + + if args.no_color: + args.color = "never" + no_color_env = os.environ.get("NO_COLOR", "").strip().lower() in ("1", "true", "yes") + force_color_env = os.environ.get("FORCE_COLOR", "").strip().lower() in ("1", "true", "yes") + if args.color == "always" or force_color_env: + use_color = True + elif args.color == "never" or no_color_env: + use_color = False + else: + use_color = sys.stdout.isatty() + if not use_color: + global GREEN, YELLOW, RED, BOLD, DIM, RESET + GREEN = YELLOW = RED = BOLD = DIM = RESET = "" + + project_root = os.path.abspath(".") + thirdparty = os.path.join(project_root, "thirdparty") + sys_path = [project_root, thirdparty] + + scan_dirs: tuple[str, ...] = tuple(args.scan_dir) if args.scan_dir else DEFAULT_SCAN_DIRS + + print(f"{BOLD}scanning imports in: {', '.join(scan_dirs)}{RESET}") + print(f"project root: {project_root}") + print(f"sys.path: {sys_path}\n") + + imports = gather_imports(scan_dirs) + print(f"found {len(imports)} unique importable module paths. benchmarking ({args.runs} runs each, timeout {args.timeout}s)...\n") + + started = time.perf_counter() + results: list[tuple[str, float, str, int]] = [] + total = len(imports) + for i, module in enumerate(sorted(imports), 1): + t, status = measure_import(module, sys_path, args.runs, args.timeout) + n = len(imports[module]) + results.append((module, t, status, n)) + ms = f"{t*1000:8.2f}ms" if t != float("inf") else " FAIL" + col = color_for(t, args.slow_ms, args.moderate_ms) + print(f" [{i:>3}/{total}] {module:<42} {col}{ms:<12}{RESET} ({n} files) {DIM}{status}{RESET}", end="\r") + print() + + results.sort(key=lambda r: (r[1] == float("inf"), -r[1] if r[1] != float("inf") else 0)) + + valid = sorted(t for _, t, _, _ in results if t != float("inf") and t > 0) + med = median(valid) if valid else 0.0 + p90 = valid[int(len(valid) * 0.9)] if len(valid) >= 10 else (valid[-1] if valid else 0.0) + total_elapsed = time.perf_counter() - started + + bar = "=" * 110 + print(f"\n{BOLD}{bar}{RESET}") + print(f"{BOLD}import time rankings (cold start, sorted slowest first){RESET}") + print(f"thresholds: {RED}red > {args.slow_ms:.0f}ms{RESET} {YELLOW}yellow > {args.moderate_ms:.0f}ms{RESET} {GREEN}green <= {args.moderate_ms:.0f}ms{RESET}") + print(f"stats: median={med*1000:.1f}ms p90={p90*1000:.1f}ms n={len(valid)} ok, {total - len(valid)} failed benchmark wall={total_elapsed:.1f}s") + print(f"{BOLD}{bar}{RESET}\n") + + print(f"{'module':<44} {'time':>12} {'files':>6} {'rank':>5} status") + print("-" * 95) + for rank, (mod, t, status, n) in enumerate(results, 1): + col = color_for(t, args.slow_ms, args.moderate_ms) + time_s = f"{t*1000:9.2f}ms" if t != float("inf") else " --" + print(f"{col}{mod:<44} {time_s:>12} {n:>6} {rank:>5} {status}{RESET}") + + top_n = [(m, t) for m, t, _, _ in results if t != float("inf") and t > args.slow_ms / 1000.0][:args.top] + if top_n: + print(f"\n{BOLD}top {len(top_n)} candidates for lazy / deferred loading (>= {args.slow_ms:.0f}ms):{RESET}") + for m, t in top_n: + print(f" {RED}->{RESET} {m:<44} {t*1000:8.2f}ms") + + failed = [m for m, t, s, _ in results if t == float("inf")] + if failed: + print(f"\n{DIM}failed imports ({len(failed)}):{RESET}") + for m, t, status, _ in results: + if t == float("inf"): + print(f" {DIM}{m:<44} {status}{RESET}") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())