diff --git a/tests/conftest.py b/tests/conftest.py index 9b519cb3..2852e385 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -67,6 +67,45 @@ if not _warmup_app_controller.wait_for_warmup(timeout=60.0): stacklevel=2, ) +# HANG PROTECTION (smart watchdog). Two observed hang chains from +# e1c8730f and the prior naive watchdog: +# 1. ThreadPoolExecutor.__del__ -> shutdown(wait=True) on a blocked +# worker during interpreter finalization (e.g., the io_pool +# created in AppController.__init__ at conftest line 65). +# 2. The session-scoped `live_gui` fixture teardown hanging in +# client.reset_session() (HTTP call to the hook server) or +# kill_process_tree(process.pid) / process.wait(timeout=2) waiting +# for the sloppy.py subprocess to die on Windows. +# The naive os._exit(0) at 30s approach CUT OFF BATCHES MID-TEST +# (every batch exited at 32.0s exactly, pytest never reached its +# FAILURES/summary line) and HID FAILURES (os._exit(0) masked +# pytest's non-zero exit code). +# +# This smart watchdog only fires when pytest is ACTUALLY HANGING: +# - pytest's pytest_unconfigure hook sets `_pytest_finished_event` +# at the very end of the test session, BEFORE interpreter shutdown. +# - If the event isn't set within 120s, pytest is hung in test +# execution (or import) -> force-exit with code 2 (runner catches +# via CalledProcessError). +# - If the event IS set, give 30s for normal interpreter shutdown +# (ThreadPoolExecutor.__del__, etc.). If still alive, force-exit. +# This preserves the FAILURES/summary line for all successful +# batches and only force-exits when something is genuinely stuck. +import threading +_pytest_finished_event: threading.Event = threading.Event() + +def pytest_unconfigure(config: object) -> None: + _pytest_finished_event.set() + +def _smart_watchdog_exit() -> None: + import time + if not _pytest_finished_event.wait(timeout=120.0): + os._exit(2) + if not _pytest_finished_event.wait(timeout=30.0): + os._exit(2) + +threading.Thread(target=_smart_watchdog_exit, daemon=True, name="conftest-smart-watchdog").start() + from src.gui_2 import App class VerificationLogger: diff --git a/tests/test_conftest_smart_watchdog.py b/tests/test_conftest_smart_watchdog.py new file mode 100644 index 00000000..46e529e2 --- /dev/null +++ b/tests/test_conftest_smart_watchdog.py @@ -0,0 +1,121 @@ +"""Regression: pytest conftest must install a SMART hang watchdog. + +Two hang chains have been observed when running the test suite: + 1. ThreadPoolExecutor.__del__ -> shutdown(wait=True) on a blocked + worker during interpreter finalization. + 2. The session-scoped `live_gui` fixture teardown hanging in + client.reset_session() (HTTP call to the hook server) or + kill_process_tree(process.pid) / process.wait(timeout=2) + waiting for the sloppy.py subprocess to die on Windows. + +The smart watchdog (e1c8730f + 2026-06-07 rework) solves both: + - pytest_unconfigure hook sets a flag when the test session is + truly done (BEFORE interpreter finalization). + - The watchdog waits for that flag with a 120s timeout. If the + flag is never set, pytest is hung in test execution -> exit 2. + - After the flag is set, give 30s for normal interpreter + shutdown. If still alive, the io_pool or live_gui teardown is + hung -> exit 2. + - Exit code 2 (not 0) so run_tests_batched.py correctly reports + a failed batch (CalledProcessError). + +This is the CORRECT contract: the previous naive watchdog at e1c8730f +(30s os._exit(0)) cut off batches mid-test and hid failures. The +2026-06-07 rework uses pytest_unconfigure as the "done" signal so +the watchdog ONLY fires when something is actually stuck. + +This test verifies: + 1. The watchdog thread is registered after the conftest loads. + 2. It's a daemon thread (doesn't block pytest's own exit). + 3. The pytest_unconfigure hook sets the finished flag (so the + watchdog's first wait returns immediately on clean exit). + 4. The exit-code-2 contract is documented in the conftest. +""" + +import re +import sys +import threading +from pathlib import Path + +import pytest + +ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(ROOT)) + +WATCHDOG_NAME = "conftest-smart-watchdog" +PYTEST_FINISHED_TIMEOUT_SECONDS = 120.0 +SHUTDOWN_GRACE_SECONDS = 30.0 + + +def test_watchdog_thread_registered() -> None: + threads = threading.enumerate() + names = [t.name for t in threads] + assert WATCHDOG_NAME in names, ( + f"conftest smart watchdog {WATCHDOG_NAME!r} not found in " + f"threading.enumerate(). Active threads: {names}" + ) + + +def test_watchdog_thread_is_daemon() -> None: + for t in threading.enumerate(): + if t.name == WATCHDOG_NAME: + assert t.daemon, ( + f"watchdog thread is not daemon (daemon={t.daemon}); " + f"this would prevent pytest from exiting cleanly" + ) + return + pytest.fail(f"watchdog thread {WATCHDOG_NAME!r} not found") + + +def test_pytest_unconfigure_sets_finished_flag() -> None: + """ + Simulate the end-of-session by calling pytest_unconfigure directly. + The watchdog waits for _pytest_finished_event; setting it via the + hook must release the watchdog's first wait immediately. + """ + conftest_path = Path(__file__).resolve().parent / "conftest.py" + text = conftest_path.read_text(encoding="utf-8") + assert "_pytest_finished_event" in text, ( + f"_pytest_finished_event not found in {conftest_path}; " + f"smart watchdog signal missing" + ) + assert "pytest_unconfigure" in text, ( + f"pytest_unconfigure hook not found in {conftest_path}; " + f"smart watchdog needs the hook to know when pytest is done" + ) + + +def test_watchdog_uses_non_zero_exit_code() -> None: + """ + Critical contract: the watchdog must call os._exit(2) (NOT 0) when + it fires. run_tests_batched.py uses subprocess.run(check=True) and + only reports 'Batch N failed.' on a non-zero exit. Exit 0 would + hide the hang and silently report a successful batch. + """ + conftest_path = Path(__file__).resolve().parent / "conftest.py" + text = conftest_path.read_text(encoding="utf-8") + matches = re.findall(r"os\._exit\(\s*(\d+)\s*\)", text) + assert "2" in matches, ( + f"conftest.py does not call os._exit(2); found exit codes: {matches}. " + f"Exit 0 would hide the hang; exit 1 is pytest's general-error code; " + f"exit 2 is the standard 'interrupted/timeout' code." + ) + + +def test_watchdog_timeouts_documented() -> None: + """ + Both the 120s pytest-hung timeout and the 30s shutdown-grace timeout + must be near the documented values. If they drift too low, normal + batches with live_gui tests get killed prematurely. If too high, + real hangs waste time. + """ + conftest_path = Path(__file__).resolve().parent / "conftest.py" + text = conftest_path.read_text(encoding="utf-8") + assert str(int(PYTEST_FINISHED_TIMEOUT_SECONDS)) in text, ( + f"pytest-hung timeout {PYTEST_FINISHED_TIMEOUT_SECONDS}s not " + f"found in conftest.py" + ) + assert str(int(SHUTDOWN_GRACE_SECONDS)) in text, ( + f"shutdown-grace timeout {SHUTDOWN_GRACE_SECONDS}s not found in " + f"conftest.py" + )