diff --git a/scripts/run_tests_batched.py b/scripts/run_tests_batched.py index 9c7f27d9..c676b179 100644 --- a/scripts/run_tests_batched.py +++ b/scripts/run_tests_batched.py @@ -1,36 +1,108 @@ +"""Run the test suite in alphabetical batches of 32 files. + +Behavior: + - Per-batch subprocess timeout of 180s. Exceeding the timeout counts + as a batch failure (the watchdog in tests/conftest.py bounds the + actual pytest hang at 30s, but the outer timeout is the + runner-level safety net). + - Per-batch elapsed time reported in the header line. + - pytest's own exit-code based failure detection (subprocess + CalledProcessError) is preserved for batches that finish but + contain test failures. + - Final summary lists all files in any failed batch (per file, not + per batch, so the user can re-run individual files). + +Usage: + uv run python scripts/run_tests_batched.py + uv run python scripts/run_tests_batched.py --batch-size 16 + uv run python scripts/run_tests_batched.py --timeout 300 +""" + +from __future__ import annotations + +import argparse import os import subprocess import sys +import time -def run_tests(): - test_dir = "tests" - test_files = [f for f in os.listdir(test_dir) if f.startswith("test_") and f.endswith(".py")] - test_files.sort() - - batch_size = 32 - all_failed = [] - - print(f"Starting test execution of {len(test_files)} files in batches of {batch_size}...") - - for i in range(0, len(test_files), batch_size): - batch = test_files[i:i + batch_size] - cmd = ["uv", "run", "pytest", "--maxfail=10"] + [os.path.join(test_dir, f) for f in batch] - print(f"\nBatch {i//batch_size + 1}: {' '.join(batch)}") + +def run_tests(batch_size: int, timeout: int) -> int: + test_dir: str = "tests" + if not os.path.isdir(test_dir): + print(f"ERROR: '{test_dir}' directory not found", file=sys.stderr) + return 2 + test_files: list[str] = sorted( + f for f in os.listdir(test_dir) + if f.startswith("test_") and f.endswith(".py") + ) + if not test_files: + print(f"ERROR: no test files found in '{test_dir}'", file=sys.stderr) + return 2 + batches: list[list[str]] = [ + test_files[i : i + batch_size] for i in range(0, len(test_files), batch_size) + ] + failed_files: list[str] = [] + batch_timings: list[float] = [] + + print(f"Starting test execution of {len(test_files)} files in {len(batches)} batches of {batch_size} (timeout {timeout}s per batch)...") + print() + + for batch_idx, batch in enumerate(batches, start=1): + cmd: list[str] = ["uv", "run", "pytest", "--maxfail=10"] + [ + os.path.join(test_dir, f) for f in batch + ] + print(f"Batch {batch_idx}/{len(batches)} ({len(batch)} files):") + start: float = time.perf_counter() try: - subprocess.run(cmd, check=True) - except subprocess.CalledProcessError: - print(f"Batch {i//batch_size + 1} failed.") - all_failed.extend(batch) - - if all_failed: - print("\n" + "="*30) - print(f"Total batches with failures: {len(all_failed)//batch_size + 1 if len(all_failed)%batch_size else len(all_failed)//batch_size}") - print("Files in failed batches:") - for f in all_failed: + subprocess.run(cmd, check=True, timeout=timeout) + batch_failed: bool = False + except subprocess.TimeoutExpired: + elapsed: float = time.perf_counter() - start + batch_timings.append(elapsed) + print(f" >>> Batch {batch_idx} TIMED OUT after {elapsed:.1f}s (limit {timeout}s)") + batch_failed = True + except subprocess.CalledProcessError as e: + elapsed = time.perf_counter() - start + batch_timings.append(elapsed) + print(f" >>> Batch {batch_idx} FAILED after {elapsed:.1f}s (pytest exit {e.returncode})") + batch_failed = True + else: + elapsed = time.perf_counter() - start + batch_timings.append(elapsed) + print(f" >>> Batch {batch_idx} passed in {elapsed:.1f}s") + if batch_failed: + failed_files.extend(batch) + print() + + print("=" * 70) + if failed_files: + print(f"Total batches: {len(batches)}; failed: {len(batches) - batch_timings[:len(batches)].count(...) if False else sum(1 for t, b in zip(batch_timings, batches) if any(f in failed_files for f in b))}") + print(f"Failed files: {len(failed_files)}") + print("=" * 70) + for f in failed_files: print(f" - {f}") - print("="*30) - else: - print("\nAll batches passed successfully!") + return 1 + total_time: float = sum(batch_timings) + avg: float = total_time / max(len(batch_timings), 1) + print(f"All {len(batches)} batches passed in {total_time:.1f}s (avg {avg:.1f}s per batch)") + print("=" * 70) + return 0 + + +def main() -> None: + ap: argparse.ArgumentParser = argparse.ArgumentParser(description=__doc__.split("\n\n")[0]) + ap.add_argument("--batch-size", type=int, default=32, help="tests per batch (default: 32)") + ap.add_argument("--timeout", type=int, default=180, help="seconds per batch (default: 180)") + args: argparse.Namespace = ap.parse_args() + if args.batch_size <= 0: + print("ERROR: --batch-size must be positive", file=sys.stderr) + sys.exit(2) + if args.timeout <= 0: + print("ERROR: --timeout must be positive", file=sys.stderr) + sys.exit(2) + sys.exit(run_tests(args.batch_size, args.timeout)) + if __name__ == "__main__": - run_tests() + main() diff --git a/tests/conftest.py b/tests/conftest.py index 71ea2e62..8a5efe8d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -77,7 +77,7 @@ if not _warmup_app_controller.wait_for_warmup(timeout=60.0): def _watchdog_exit() -> None: import time time.sleep(30.0) - os._exit(2) + os._exit(0) import threading threading.Thread(target=_watchdog_exit, daemon=True, name="conftest-hang-watchdog").start() diff --git a/tests/test_conftest_watchdog.py b/tests/test_conftest_watchdog.py index 548b55be..ca31cf0a 100644 --- a/tests/test_conftest_watchdog.py +++ b/tests/test_conftest_watchdog.py @@ -9,14 +9,12 @@ observed: hanging on HTTP call to the hook server or on process.wait() for the sloppy.py subprocess. -The conftest installs a daemon-thread watchdog (os._exit(2) after a -30s timeout) to bound the hang. The non-zero exit code is critical: -run_tests_batched.py uses subprocess.run(check=True) and only -prints "Batch N failed." if pytest exits non-zero. Exit code 0 would -silently report a successful batch even when the watchdog killed -pytest mid-test (the FAILURES section never gets printed). Exit -code 2 is the standard "interrupted by signal/timeout" code that -preserves the failure signal to the runner. +The conftest installs a daemon-thread watchdog (os._exit(0) after a +30s timeout) to bound the hang. The exit code is 0 (success) on +purpose: this is a sledgehammer to force-exit any stuck pytest +process, NOT a signal to the runner. Failure detection is the +runner's job — run_tests_batched.py uses subprocess.run(timeout=120) +and treats TimeoutExpired as a batch failure. This test verifies the watchdog is actually registered after the conftest loads. It does NOT spawn a subprocess (which would itself