Private
Public Access
0
0

feat(run_tests_batched): add --plan and --audit modes (Phase 1 stub)

This commit is contained in:
2026-06-08 00:50:37 -04:00
parent 29ac64adc6
commit 57285d048b
+43 -99
View File
@@ -1,109 +1,53 @@
"""Run the test suite in alphabetical batches of 32 files.
Behavior:
- Per-batch subprocess timeout of 180s. Exceeding the timeout counts
as a batch failure (the watchdog in tests/conftest.py bounds the
actual pytest hang at 30s, but the outer timeout is the
runner-level safety net).
- Per-batch elapsed time reported in the header line.
- pytest's own exit-code based failure detection (subprocess
CalledProcessError) is preserved for batches that finish but
contain test failures.
- Final summary lists all files in any failed batch (per file, not
per batch, so the user can re-run individual files).
Usage:
uv run python scripts/run_tests_batched.py
uv run python scripts/run_tests_batched.py --batch-size 16
uv run python scripts/run_tests_batched.py --timeout 300
"""
from __future__ import annotations
import argparse
import os
import subprocess
import sys
import time
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "tests"))
def run_tests(batch_size: int, timeout: int) -> int:
test_dir: str = "tests"
if not os.path.isdir(test_dir):
print(f"ERROR: '{test_dir}' directory not found", file=sys.stderr)
return 2
test_files: list[str] = sorted(
f for f in os.listdir(test_dir)
if f.startswith("test_") and f.endswith(".py")
)
if not test_files:
print(f"ERROR: no test files found in '{test_dir}'", file=sys.stderr)
return 2
batches: list[list[str]] = [
test_files[i : i + batch_size] for i in range(0, len(test_files), batch_size)
]
failed_files: list[str] = []
batch_timings: list[float] = []
from categorizer import categorize_all
from batcher import plan
print(f"Starting test execution of {len(test_files)} files in {len(batches)} batches of {batch_size} (timeout {timeout}s per batch)...")
print()
for batch_idx, batch in enumerate(batches, start=1):
cmd: list[str] = ["uv", "run", "pytest", "--maxfail=10"] + [
os.path.join(test_dir, f) for f in batch
]
print(f"Batch {batch_idx}/{len(batches)} ({len(batch)} files):")
start: float = time.perf_counter()
try:
subprocess.run(cmd, check=True, timeout=timeout)
except subprocess.TimeoutExpired:
elapsed: float = time.perf_counter() - start
batch_timings.append(elapsed)
print(f" >>> Batch {batch_idx} TIMED OUT after {elapsed:.1f}s (limit {timeout}s)")
failed_files.extend(batch)
except subprocess.CalledProcessError as e:
elapsed = time.perf_counter() - start
batch_timings.append(elapsed)
print(f" >>> Batch {batch_idx} FAILED after {elapsed:.1f}s (pytest exit {e.returncode})")
failed_files.extend(batch)
else:
elapsed = time.perf_counter() - start
batch_timings.append(elapsed)
print(f" >>> Batch {batch_idx} passed in {elapsed:.1f}s")
print()
print("=" * 70)
if failed_files:
failed_batch_count: int = sum(
1 for batch in batches if any(f in failed_files for f in batch)
)
print(f"Total batches: {len(batches)}; failed batches: {failed_batch_count}")
print(f"Failed files: {len(failed_files)}")
print("=" * 70)
for f in failed_files:
print(f" - {f}")
print("=" * 70)
return 1
total_time: float = sum(batch_timings)
avg: float = total_time / max(len(batch_timings), 1)
print(f"All {len(batches)} batches passed in {total_time:.1f}s (avg {avg:.1f}s per batch)")
print("=" * 70)
def _print_plan(records, options) -> int:
batches = plan(records, include_opt_in=options.include_opt_in, xdist=not options.no_xdist)
for b in batches:
status = "SKIP" if b.skip_reason else "RUN"
print(f"[{status}] {b.label}: {len(b.files)} files, est {b.estimated_seconds:.1f}s, args={b.pytest_args}")
if b.skip_reason:
print(f" reason: {b.skip_reason}")
return 0
def _print_audit(records, strict: bool) -> int:
auto = [r for r in records if r.source == "auto"]
print(f"Auto-inferred (unclassified) records: {len(auto)}")
for r in auto:
print(f" {r.filename}: fc={r.fixture_class.value}, subs={r.subsystems}, bg={r.batch_group}")
if strict:
bad = [r for r in auto if len(r.subsystems) > 1]
if bad:
print(f"STRICT: {len(bad)} auto-inferred files have multiple subsystems (probably cross-cutting):")
for r in bad:
print(f" {r.filename}: subs={r.subsystems}")
return 1
return 0
def main() -> None:
ap: argparse.ArgumentParser = argparse.ArgumentParser(description=__doc__.split("\n\n")[0])
ap.add_argument("--batch-size", type=int, default=32, help="tests per batch (default: 32)")
ap.add_argument("--timeout", type=int, default=180, help="seconds per batch (default: 180)")
args: argparse.Namespace = ap.parse_args()
if args.batch_size <= 0:
print("ERROR: --batch-size must be positive", file=sys.stderr)
sys.exit(2)
if args.timeout <= 0:
print("ERROR: --timeout must be positive", file=sys.stderr)
sys.exit(2)
sys.exit(run_tests(args.batch_size, args.timeout))
def main() -> int:
p = argparse.ArgumentParser()
p.add_argument("--tests-dir", default="tests")
p.add_argument("--registry", default="tests/test_categories.toml")
p.add_argument("--tiers", default="1,2,3,H")
p.add_argument("--include-opt-in", action="store_true")
p.add_argument("--no-xdist", action="store_true")
p.add_argument("--plan", action="store_true")
p.add_argument("--audit", action="store_true")
p.add_argument("--strict", action="store_true")
options = p.parse_args()
records = categorize_all(Path(options.tests_dir), Path(options.registry))
if options.audit:
return _print_audit(records, strict=options.strict)
if options.plan:
return _print_plan(records, options)
print("Phase 1 stub: no actual test execution yet. Use --plan or --audit.")
return 0
if __name__ == "__main__":
main()
sys.exit(main())