diff --git a/scripts/run_tests_batched.py b/scripts/run_tests_batched.py index c08ebd2b..bbfa9027 100644 --- a/scripts/run_tests_batched.py +++ b/scripts/run_tests_batched.py @@ -1,109 +1,53 @@ -"""Run the test suite in alphabetical batches of 32 files. - -Behavior: - - Per-batch subprocess timeout of 180s. Exceeding the timeout counts - as a batch failure (the watchdog in tests/conftest.py bounds the - actual pytest hang at 30s, but the outer timeout is the - runner-level safety net). - - Per-batch elapsed time reported in the header line. - - pytest's own exit-code based failure detection (subprocess - CalledProcessError) is preserved for batches that finish but - contain test failures. - - Final summary lists all files in any failed batch (per file, not - per batch, so the user can re-run individual files). - -Usage: - uv run python scripts/run_tests_batched.py - uv run python scripts/run_tests_batched.py --batch-size 16 - uv run python scripts/run_tests_batched.py --timeout 300 -""" - -from __future__ import annotations - import argparse -import os -import subprocess import sys -import time +from pathlib import Path +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "tests")) -def run_tests(batch_size: int, timeout: int) -> int: - test_dir: str = "tests" - if not os.path.isdir(test_dir): - print(f"ERROR: '{test_dir}' directory not found", file=sys.stderr) - return 2 - test_files: list[str] = sorted( - f for f in os.listdir(test_dir) - if f.startswith("test_") and f.endswith(".py") - ) - if not test_files: - print(f"ERROR: no test files found in '{test_dir}'", file=sys.stderr) - return 2 - batches: list[list[str]] = [ - test_files[i : i + batch_size] for i in range(0, len(test_files), batch_size) - ] - failed_files: list[str] = [] - batch_timings: list[float] = [] +from categorizer import categorize_all +from batcher import plan - print(f"Starting test execution of {len(test_files)} files in {len(batches)} batches of {batch_size} (timeout {timeout}s per batch)...") - print() - - for batch_idx, batch in enumerate(batches, start=1): - cmd: list[str] = ["uv", "run", "pytest", "--maxfail=10"] + [ - os.path.join(test_dir, f) for f in batch - ] - print(f"Batch {batch_idx}/{len(batches)} ({len(batch)} files):") - start: float = time.perf_counter() - try: - subprocess.run(cmd, check=True, timeout=timeout) - except subprocess.TimeoutExpired: - elapsed: float = time.perf_counter() - start - batch_timings.append(elapsed) - print(f" >>> Batch {batch_idx} TIMED OUT after {elapsed:.1f}s (limit {timeout}s)") - failed_files.extend(batch) - except subprocess.CalledProcessError as e: - elapsed = time.perf_counter() - start - batch_timings.append(elapsed) - print(f" >>> Batch {batch_idx} FAILED after {elapsed:.1f}s (pytest exit {e.returncode})") - failed_files.extend(batch) - else: - elapsed = time.perf_counter() - start - batch_timings.append(elapsed) - print(f" >>> Batch {batch_idx} passed in {elapsed:.1f}s") - print() - - print("=" * 70) - if failed_files: - failed_batch_count: int = sum( - 1 for batch in batches if any(f in failed_files for f in batch) - ) - print(f"Total batches: {len(batches)}; failed batches: {failed_batch_count}") - print(f"Failed files: {len(failed_files)}") - print("=" * 70) - for f in failed_files: - print(f" - {f}") - print("=" * 70) - return 1 - total_time: float = sum(batch_timings) - avg: float = total_time / max(len(batch_timings), 1) - print(f"All {len(batches)} batches passed in {total_time:.1f}s (avg {avg:.1f}s per batch)") - print("=" * 70) +def _print_plan(records, options) -> int: + batches = plan(records, include_opt_in=options.include_opt_in, xdist=not options.no_xdist) + for b in batches: + status = "SKIP" if b.skip_reason else "RUN" + print(f"[{status}] {b.label}: {len(b.files)} files, est {b.estimated_seconds:.1f}s, args={b.pytest_args}") + if b.skip_reason: + print(f" reason: {b.skip_reason}") return 0 +def _print_audit(records, strict: bool) -> int: + auto = [r for r in records if r.source == "auto"] + print(f"Auto-inferred (unclassified) records: {len(auto)}") + for r in auto: + print(f" {r.filename}: fc={r.fixture_class.value}, subs={r.subsystems}, bg={r.batch_group}") + if strict: + bad = [r for r in auto if len(r.subsystems) > 1] + if bad: + print(f"STRICT: {len(bad)} auto-inferred files have multiple subsystems (probably cross-cutting):") + for r in bad: + print(f" {r.filename}: subs={r.subsystems}") + return 1 + return 0 -def main() -> None: - ap: argparse.ArgumentParser = argparse.ArgumentParser(description=__doc__.split("\n\n")[0]) - ap.add_argument("--batch-size", type=int, default=32, help="tests per batch (default: 32)") - ap.add_argument("--timeout", type=int, default=180, help="seconds per batch (default: 180)") - args: argparse.Namespace = ap.parse_args() - if args.batch_size <= 0: - print("ERROR: --batch-size must be positive", file=sys.stderr) - sys.exit(2) - if args.timeout <= 0: - print("ERROR: --timeout must be positive", file=sys.stderr) - sys.exit(2) - sys.exit(run_tests(args.batch_size, args.timeout)) - +def main() -> int: + p = argparse.ArgumentParser() + p.add_argument("--tests-dir", default="tests") + p.add_argument("--registry", default="tests/test_categories.toml") + p.add_argument("--tiers", default="1,2,3,H") + p.add_argument("--include-opt-in", action="store_true") + p.add_argument("--no-xdist", action="store_true") + p.add_argument("--plan", action="store_true") + p.add_argument("--audit", action="store_true") + p.add_argument("--strict", action="store_true") + options = p.parse_args() + records = categorize_all(Path(options.tests_dir), Path(options.registry)) + if options.audit: + return _print_audit(records, strict=options.strict) + if options.plan: + return _print_plan(records, options) + print("Phase 1 stub: no actual test execution yet. Use --plan or --audit.") + return 0 if __name__ == "__main__": - main() + sys.exit(main())