diff --git a/scripts/benchmark_imports.py b/scripts/benchmark_imports.py
new file mode 100644
index 00000000..81d35873
--- /dev/null
+++ b/scripts/benchmark_imports.py
@@ -0,0 +1,194 @@
+#!/usr/bin/env python
+"""
+benchmark cold-start import time for every top-level import in src/*.py and simulation/*.py.
+
+spawns a fresh python subprocess per import, mimicking the cold start of sloppy.py,
+and prints a sorted, color-coded listing with outliers highlighted.
+
+usage: uv run python scripts/benchmark_imports.py [--runs N] [--timeout SEC] [--top N]
+"""
+
+import argparse
+import ast
+import os
+import subprocess
+import sys
+import time
+from collections import defaultdict
+from pathlib import Path
+from statistics import median
+from typing import Iterable
+
+GREEN = "\033[32m"
+YELLOW = "\033[33m"
+RED = "\033[31m"
+BOLD = "\033[1m"
+DIM = "\033[2m"
+RESET = "\033[0m"
+
+DEFAULT_SCAN_DIRS = ("./src", "./simulation")
+DEFAULT_RUNS = 3
+DEFAULT_TIMEOUT = 30
+DEFAULT_TOP = 10
+DEFAULT_SLOW_MS = 200.0
+DEFAULT_MODERATE_MS = 50.0
+
+
+def gather_imports(scan_dirs: Iterable[str]) -> dict[str, list[str]]:
+ imports: dict[str, set[str]] = defaultdict(set)
+ for scan_dir in scan_dirs:
+  for py_file in Path(scan_dir).rglob("*.py"):
+   try:
+    tree = ast.parse(py_file.read_text(encoding="utf-8", errors="replace"))
+   except (SyntaxError, OSError):
+    continue
+   for node in tree.body:
+    if isinstance(node, ast.Import):
+     for alias in node.names:
+      if alias.name == "__future__":
+       continue
+      imports[alias.name].add(str(py_file))
+    elif isinstance(node, ast.ImportFrom):
+     if not node.module or node.level != 0:
+      continue
+     if node.module == "__future__":
+      continue
+     imports[node.module].add(str(py_file))
+ return {k: sorted(v) for k, v in imports.items()}
+
+
+def measure_import(module: str, sys_path: list[str], runs: int, timeout: int) -> tuple[float, str]:
+ times: list[float] = []
+ last_err = "no runs"
+ path_setup = ";".join(f"sys.path.insert(0, {p!r})" for p in sys_path)
+ for _ in range(runs):
+  script = (
+   "import sys, time;"
+   + path_setup + ";"
+   + f"t=time.perf_counter();"
+   + f"__import__({module!r});"
+   + f"print(time.perf_counter()-t)"
+  )
+  try:
+   result = subprocess.run(
+    [sys.executable, "-c", script],
+    capture_output=True,
+    text=True,
+    timeout=timeout,
+   )
+  except subprocess.TimeoutExpired:
+   last_err = f"timeout>{timeout}s"
+   continue
+  if result.returncode != 0:
+   err_lines = (result.stderr or "").strip().splitlines()
+   last_err = (err_lines[-1] if err_lines else "non-zero exit")[:120]
+   continue
+  try:
+   times.append(float((result.stdout or "").strip()))
+  except ValueError:
+   last_err = f"parse: {(result.stdout or '').strip()[:80]}"
+ if not times:
+  return (float("inf"), last_err)
+ return (median(times), "ok")
+
+
+def color_for(t: float, slow_ms: float, moderate_ms: float) -> str:
+ if t == float("inf"):
+  return DIM
+ if t * 1000 > slow_ms:
+  return RED
+ if t * 1000 > moderate_ms:
+  return YELLOW
+ return GREEN
+
+
+def main() -> int:
+ ap = argparse.ArgumentParser(description="Benchmark cold-start import times for src/ and simulation/ files")
+ ap.add_argument("--runs", type=int, default=DEFAULT_RUNS, help=f"subprocess runs per import (default {DEFAULT_RUNS})")
+ ap.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT, help=f"per-subprocess timeout in seconds (default {DEFAULT_TIMEOUT})")
+ ap.add_argument("--top", type=int, default=DEFAULT_TOP, help=f"top-N recommendations to list (default {DEFAULT_TOP})")
+ ap.add_argument("--slow-ms", type=float, default=DEFAULT_SLOW_MS, help=f"slow threshold in ms (default {DEFAULT_SLOW_MS})")
+ ap.add_argument("--moderate-ms", type=float, default=DEFAULT_MODERATE_MS, help=f"moderate threshold in ms (default {DEFAULT_MODERATE_MS})")
+ ap.add_argument("--no-color", action="store_true", help="disable ANSI color output (deprecated, prefer --color=never)")
+ ap.add_argument("--color", choices=("auto", "always", "never"), default="auto", help="color output mode (default auto: TTY only)")
+ ap.add_argument("--scan-dir", action="append", default=None, help="additional scan directory (repeatable)")
+ args = ap.parse_args()
+
+ if args.no_color:
+  args.color = "never"
+ no_color_env = os.environ.get("NO_COLOR", "").strip().lower() in ("1", "true", "yes")
+ force_color_env = os.environ.get("FORCE_COLOR", "").strip().lower() in ("1", "true", "yes")
+ if args.color == "always" or force_color_env:
+  use_color = True
+ elif args.color == "never" or no_color_env:
+  use_color = False
+ else:
+  use_color = sys.stdout.isatty()
+ if not use_color:
+  global GREEN, YELLOW, RED, BOLD, DIM, RESET
+  GREEN = YELLOW = RED = BOLD = DIM = RESET = ""
+
+ project_root = os.path.abspath(".")
+ thirdparty = os.path.join(project_root, "thirdparty")
+ sys_path = [project_root, thirdparty]
+
+ scan_dirs: tuple[str, ...] = tuple(args.scan_dir) if args.scan_dir else DEFAULT_SCAN_DIRS
+
+ print(f"{BOLD}scanning imports in: {', '.join(scan_dirs)}{RESET}")
+ print(f"project root: {project_root}")
+ print(f"sys.path: {sys_path}\n")
+
+ imports = gather_imports(scan_dirs)
+ print(f"found {len(imports)} unique importable module paths. benchmarking ({args.runs} runs each, timeout {args.timeout}s)...\n")
+
+ started = time.perf_counter()
+ results: list[tuple[str, float, str, int]] = []
+ total = len(imports)
+ for i, module in enumerate(sorted(imports), 1):
+  t, status = measure_import(module, sys_path, args.runs, args.timeout)
+  n = len(imports[module])
+  results.append((module, t, status, n))
+  ms = f"{t*1000:8.2f}ms" if t != float("inf") else "    FAIL"
+  col = color_for(t, args.slow_ms, args.moderate_ms)
+  print(f"  [{i:>3}/{total}] {module:<42} {col}{ms:<12}{RESET}  ({n} files)  {DIM}{status}{RESET}", end="\r")
+ print()
+
+ results.sort(key=lambda r: (r[1] == float("inf"), -r[1] if r[1] != float("inf") else 0))
+
+ valid = sorted(t for _, t, _, _ in results if t != float("inf") and t > 0)
+ med = median(valid) if valid else 0.0
+ p90 = valid[int(len(valid) * 0.9)] if len(valid) >= 10 else (valid[-1] if valid else 0.0)
+ total_elapsed = time.perf_counter() - started
+
+ bar = "=" * 110
+ print(f"\n{BOLD}{bar}{RESET}")
+ print(f"{BOLD}import time rankings (cold start, sorted slowest first){RESET}")
+ print(f"thresholds: {RED}red > {args.slow_ms:.0f}ms{RESET}   {YELLOW}yellow > {args.moderate_ms:.0f}ms{RESET}   {GREEN}green <= {args.moderate_ms:.0f}ms{RESET}")
+ print(f"stats: median={med*1000:.1f}ms   p90={p90*1000:.1f}ms   n={len(valid)} ok, {total - len(valid)} failed   benchmark wall={total_elapsed:.1f}s")
+ print(f"{BOLD}{bar}{RESET}\n")
+
+ print(f"{'module':<44} {'time':>12}  {'files':>6}  {'rank':>5}  status")
+ print("-" * 95)
+ for rank, (mod, t, status, n) in enumerate(results, 1):
+  col = color_for(t, args.slow_ms, args.moderate_ms)
+  time_s = f"{t*1000:9.2f}ms" if t != float("inf") else "       --"
+  print(f"{col}{mod:<44} {time_s:>12}  {n:>6}  {rank:>5}  {status}{RESET}")
+
+ top_n = [(m, t) for m, t, _, _ in results if t != float("inf") and t > args.slow_ms / 1000.0][:args.top]
+ if top_n:
+  print(f"\n{BOLD}top {len(top_n)} candidates for lazy / deferred loading (>= {args.slow_ms:.0f}ms):{RESET}")
+  for m, t in top_n:
+   print(f"  {RED}->{RESET} {m:<44} {t*1000:8.2f}ms")
+
+ failed = [m for m, t, s, _ in results if t == float("inf")]
+ if failed:
+  print(f"\n{DIM}failed imports ({len(failed)}):{RESET}")
+  for m, t, status, _ in results:
+   if t == float("inf"):
+    print(f"  {DIM}{m:<44} {status}{RESET}")
+
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())