feat(run_tests_batched): add --plan and --audit modes (Phase 1 stub)

2026-06-08 00:50:37 -04:00
parent 29ac64adc6
commit 57285d048b
1 changed files with 43 additions and 99 deletions
@@ -1,109 +1,53 @@
-"""Run the test suite in alphabetical batches of 32 files.
-
-Behavior:
-  - Per-batch subprocess timeout of 180s. Exceeding the timeout counts
-    as a batch failure (the watchdog in tests/conftest.py bounds the
-    actual pytest hang at 30s, but the outer timeout is the
-    runner-level safety net).
-  - Per-batch elapsed time reported in the header line.
-  - pytest's own exit-code based failure detection (subprocess
-    CalledProcessError) is preserved for batches that finish but
-    contain test failures.
-  - Final summary lists all files in any failed batch (per file, not
-    per batch, so the user can re-run individual files).
-
-Usage:
-  uv run python scripts/run_tests_batched.py
-  uv run python scripts/run_tests_batched.py --batch-size 16
-  uv run python scripts/run_tests_batched.py --timeout 300
-"""
-
-from __future__ import annotations
-
 import argparse
-import os
-import subprocess
 import sys
-import time
+from pathlib import Path

+sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "tests"))

-def run_tests(batch_size: int, timeout: int) -> int:
- test_dir: str = "tests"
- if not os.path.isdir(test_dir):
-  print(f"ERROR: '{test_dir}' directory not found", file=sys.stderr)
-  return 2
- test_files: list[str] = sorted(
-  f for f in os.listdir(test_dir)
-  if f.startswith("test_") and f.endswith(".py")
- )
- if not test_files:
-  print(f"ERROR: no test files found in '{test_dir}'", file=sys.stderr)
-  return 2
- batches: list[list[str]] = [
-  test_files[i : i + batch_size] for i in range(0, len(test_files), batch_size)
- ]
- failed_files: list[str] = []
- batch_timings: list[float] = []
+from categorizer import categorize_all
+from batcher import plan

- print(f"Starting test execution of {len(test_files)} files in {len(batches)} batches of {batch_size} (timeout {timeout}s per batch)...")
- print()
-
- for batch_idx, batch in enumerate(batches, start=1):
-  cmd: list[str] = ["uv", "run", "pytest", "--maxfail=10"] + [
-   os.path.join(test_dir, f) for f in batch
-  ]
-  print(f"Batch {batch_idx}/{len(batches)} ({len(batch)} files):")
-  start: float = time.perf_counter()
-  try:
-   subprocess.run(cmd, check=True, timeout=timeout)
-  except subprocess.TimeoutExpired:
-   elapsed: float = time.perf_counter() - start
-   batch_timings.append(elapsed)
-   print(f"  >>> Batch {batch_idx} TIMED OUT after {elapsed:.1f}s (limit {timeout}s)")
-   failed_files.extend(batch)
-  except subprocess.CalledProcessError as e:
-   elapsed = time.perf_counter() - start
-   batch_timings.append(elapsed)
-   print(f"  >>> Batch {batch_idx} FAILED after {elapsed:.1f}s (pytest exit {e.returncode})")
-   failed_files.extend(batch)
-  else:
-   elapsed = time.perf_counter() - start
-   batch_timings.append(elapsed)
-   print(f"  >>> Batch {batch_idx} passed in {elapsed:.1f}s")
-  print()
-
- print("=" * 70)
- if failed_files:
-  failed_batch_count: int = sum(
-   1 for batch in batches if any(f in failed_files for f in batch)
-  )
-  print(f"Total batches: {len(batches)}; failed batches: {failed_batch_count}")
-  print(f"Failed files: {len(failed_files)}")
-  print("=" * 70)
-  for f in failed_files:
-   print(f" - {f}")
-  print("=" * 70)
-  return 1
- total_time: float = sum(batch_timings)
- avg: float = total_time / max(len(batch_timings), 1)
- print(f"All {len(batches)} batches passed in {total_time:.1f}s (avg {avg:.1f}s per batch)")
- print("=" * 70)
+def _print_plan(records, options) -> int:
+ batches = plan(records, include_opt_in=options.include_opt_in, xdist=not options.no_xdist)
+ for b in batches:
+  status = "SKIP" if b.skip_reason else "RUN"
+  print(f"[{status}] {b.label}: {len(b.files)} files, est {b.estimated_seconds:.1f}s, args={b.pytest_args}")
+  if b.skip_reason:
+   print(f"    reason: {b.skip_reason}")
 return 0

+def _print_audit(records, strict: bool) -> int:
+ auto = [r for r in records if r.source == "auto"]
+ print(f"Auto-inferred (unclassified) records: {len(auto)}")
+ for r in auto:
+  print(f" {r.filename}: fc={r.fixture_class.value}, subs={r.subsystems}, bg={r.batch_group}")
+ if strict:
+  bad = [r for r in auto if len(r.subsystems) > 1]
+  if bad:
+   print(f"STRICT: {len(bad)} auto-inferred files have multiple subsystems (probably cross-cutting):")
+   for r in bad:
+    print(f" {r.filename}: subs={r.subsystems}")
+   return 1
+ return 0

-def main() -> None:
- ap: argparse.ArgumentParser = argparse.ArgumentParser(description=__doc__.split("\n\n")[0])
- ap.add_argument("--batch-size", type=int, default=32, help="tests per batch (default: 32)")
- ap.add_argument("--timeout", type=int, default=180, help="seconds per batch (default: 180)")
- args: argparse.Namespace = ap.parse_args()
- if args.batch_size <= 0:
-  print("ERROR: --batch-size must be positive", file=sys.stderr)
-  sys.exit(2)
- if args.timeout <= 0:
-  print("ERROR: --timeout must be positive", file=sys.stderr)
-  sys.exit(2)
- sys.exit(run_tests(args.batch_size, args.timeout))
-
+def main() -> int:
+ p = argparse.ArgumentParser()
+ p.add_argument("--tests-dir", default="tests")
+ p.add_argument("--registry", default="tests/test_categories.toml")
+ p.add_argument("--tiers", default="1,2,3,H")
+ p.add_argument("--include-opt-in", action="store_true")
+ p.add_argument("--no-xdist", action="store_true")
+ p.add_argument("--plan", action="store_true")
+ p.add_argument("--audit", action="store_true")
+ p.add_argument("--strict", action="store_true")
+ options = p.parse_args()
+ records = categorize_all(Path(options.tests_dir), Path(options.registry))
+ if options.audit:
+  return _print_audit(records, strict=options.strict)
+ if options.plan:
+  return _print_plan(records, options)
+ print("Phase 1 stub: no actual test execution yet. Use --plan or --audit.")
+ return 0

 if __name__ == "__main__":
- main()
+ sys.exit(main())