manual_slop/scripts/run_tests_batched.py

"""Run the test suite in alphabetical batches of 32 files.

Behavior:
  - Per-batch subprocess timeout of 180s. Exceeding the timeout counts
    as a batch failure (the watchdog in tests/conftest.py bounds the
    actual pytest hang at 30s, but the outer timeout is the
    runner-level safety net).
  - Per-batch elapsed time reported in the header line.
  - pytest's own exit-code based failure detection (subprocess
    CalledProcessError) is preserved for batches that finish but
    contain test failures.
  - Final summary lists all files in any failed batch (per file, not
    per batch, so the user can re-run individual files).

Usage:
  uv run python scripts/run_tests_batched.py
  uv run python scripts/run_tests_batched.py --batch-size 16
  uv run python scripts/run_tests_batched.py --timeout 300
"""

from __future__ import annotations

import argparse
import os
import subprocess
import sys
import time


def run_tests(batch_size: int, timeout: int) -> int:
 test_dir: str = "tests"
 if not os.path.isdir(test_dir):
  print(f"ERROR: '{test_dir}' directory not found", file=sys.stderr)
  return 2
 test_files: list[str] = sorted(
  f for f in os.listdir(test_dir)
  if f.startswith("test_") and f.endswith(".py")
 )
 if not test_files:
  print(f"ERROR: no test files found in '{test_dir}'", file=sys.stderr)
  return 2
 batches: list[list[str]] = [
  test_files[i : i + batch_size] for i in range(0, len(test_files), batch_size)
 ]
 failed_files: list[str] = []
 batch_timings: list[float] = []

 print(f"Starting test execution of {len(test_files)} files in {len(batches)} batches of {batch_size} (timeout {timeout}s per batch)...")
 print()

 for batch_idx, batch in enumerate(batches, start=1):
  cmd: list[str] = ["uv", "run", "pytest", "--maxfail=10"] + [
   os.path.join(test_dir, f) for f in batch
  ]
  print(f"Batch {batch_idx}/{len(batches)} ({len(batch)} files):")
  start: float = time.perf_counter()
  try:
   subprocess.run(cmd, check=True, timeout=timeout)
   batch_failed: bool = False
  except subprocess.TimeoutExpired:
   elapsed: float = time.perf_counter() - start
   batch_timings.append(elapsed)
   print(f"  >>> Batch {batch_idx} TIMED OUT after {elapsed:.1f}s (limit {timeout}s)")
   batch_failed = True
  except subprocess.CalledProcessError as e:
   elapsed = time.perf_counter() - start
   batch_timings.append(elapsed)
   print(f"  >>> Batch {batch_idx} FAILED after {elapsed:.1f}s (pytest exit {e.returncode})")
   batch_failed = True
  else:
   elapsed = time.perf_counter() - start
   batch_timings.append(elapsed)
   print(f"  >>> Batch {batch_idx} passed in {elapsed:.1f}s")
  if batch_failed:
   failed_files.extend(batch)
  print()

 print("=" * 70)
 if failed_files:
  print(f"Total batches: {len(batches)}; failed: {len(batches) - batch_timings[:len(batches)].count(...) if False else sum(1 for t, b in zip(batch_timings, batches) if any(f in failed_files for f in b))}")
  print(f"Failed files: {len(failed_files)}")
  print("=" * 70)
  for f in failed_files:
   print(f" - {f}")
  return 1
 total_time: float = sum(batch_timings)
 avg: float = total_time / max(len(batch_timings), 1)
 print(f"All {len(batches)} batches passed in {total_time:.1f}s (avg {avg:.1f}s per batch)")
 print("=" * 70)
 return 0


def main() -> None:
 ap: argparse.ArgumentParser = argparse.ArgumentParser(description=__doc__.split("\n\n")[0])
 ap.add_argument("--batch-size", type=int, default=64, help="tests per batch (default: 32)")
 ap.add_argument("--timeout", type=int, default=1000, help="seconds per batch (default: 180)")
 args: argparse.Namespace = ap.parse_args()
 if args.batch_size <= 0:
  print("ERROR: --batch-size must be positive", file=sys.stderr)
  sys.exit(2)
 if args.timeout <= 0:
  print("ERROR: --timeout must be positive", file=sys.stderr)
  sys.exit(2)
 sys.exit(run_tests(args.batch_size, args.timeout))


if __name__ == "__main__":
 main()