"""Run the test suite in alphabetical batches of 32 files. Behavior: - Per-batch subprocess timeout of 180s. Exceeding the timeout counts as a batch failure (the watchdog in tests/conftest.py bounds the actual pytest hang at 30s, but the outer timeout is the runner-level safety net). - Per-batch elapsed time reported in the header line. - pytest's own exit-code based failure detection (subprocess CalledProcessError) is preserved for batches that finish but contain test failures. - Final summary lists all files in any failed batch (per file, not per batch, so the user can re-run individual files). Usage: uv run python scripts/run_tests_batched.py uv run python scripts/run_tests_batched.py --batch-size 16 uv run python scripts/run_tests_batched.py --timeout 300 """ from __future__ import annotations import argparse import os import subprocess import sys import time def run_tests(batch_size: int, timeout: int) -> int: test_dir: str = "tests" if not os.path.isdir(test_dir): print(f"ERROR: '{test_dir}' directory not found", file=sys.stderr) return 2 test_files: list[str] = sorted( f for f in os.listdir(test_dir) if f.startswith("test_") and f.endswith(".py") ) if not test_files: print(f"ERROR: no test files found in '{test_dir}'", file=sys.stderr) return 2 batches: list[list[str]] = [ test_files[i : i + batch_size] for i in range(0, len(test_files), batch_size) ] failed_files: list[str] = [] batch_timings: list[float] = [] print(f"Starting test execution of {len(test_files)} files in {len(batches)} batches of {batch_size} (timeout {timeout}s per batch)...") print() for batch_idx, batch in enumerate(batches, start=1): cmd: list[str] = ["uv", "run", "pytest", "--maxfail=10"] + [ os.path.join(test_dir, f) for f in batch ] print(f"Batch {batch_idx}/{len(batches)} ({len(batch)} files):") start: float = time.perf_counter() try: subprocess.run(cmd, check=True, timeout=timeout) except subprocess.TimeoutExpired: elapsed: float = time.perf_counter() - start batch_timings.append(elapsed) print(f" >>> Batch {batch_idx} TIMED OUT after {elapsed:.1f}s (limit {timeout}s)") failed_files.extend(batch) except subprocess.CalledProcessError as e: elapsed = time.perf_counter() - start batch_timings.append(elapsed) print(f" >>> Batch {batch_idx} FAILED after {elapsed:.1f}s (pytest exit {e.returncode})") failed_files.extend(batch) else: elapsed = time.perf_counter() - start batch_timings.append(elapsed) print(f" >>> Batch {batch_idx} passed in {elapsed:.1f}s") print() print("=" * 70) if failed_files: failed_batch_count: int = sum( 1 for batch in batches if any(f in failed_files for f in batch) ) print(f"Total batches: {len(batches)}; failed batches: {failed_batch_count}") print(f"Failed files: {len(failed_files)}") print("=" * 70) for f in failed_files: print(f" - {f}") print("=" * 70) return 1 total_time: float = sum(batch_timings) avg: float = total_time / max(len(batch_timings), 1) print(f"All {len(batches)} batches passed in {total_time:.1f}s (avg {avg:.1f}s per batch)") print("=" * 70) return 0 def main() -> None: ap: argparse.ArgumentParser = argparse.ArgumentParser(description=__doc__.split("\n\n")[0]) ap.add_argument("--batch-size", type=int, default=32, help="tests per batch (default: 32)") ap.add_argument("--timeout", type=int, default=180, help="seconds per batch (default: 180)") args: argparse.Namespace = ap.parse_args() if args.batch_size <= 0: print("ERROR: --batch-size must be positive", file=sys.stderr) sys.exit(2) if args.timeout <= 0: print("ERROR: --timeout must be positive", file=sys.stderr) sys.exit(2) sys.exit(run_tests(args.batch_size, args.timeout)) if __name__ == "__main__": main()