44b0b5d4ee
Re-add hang protection after the user's run showed pytest hanging in interpreter shutdown (ThreadPoolExecutor.__del__ / live_gui teardown) after Batch 1 completed successfully. The previous naive watchdog (e1c8730f, 30s os._exit(0)) cut off batches mid-test; the immediate removal (4103c08e) let real hangs wait 1000s for the runner's subprocess timeout. This SMART watchdog only fires when pytest is ACTUALLY hanging: - pytest_unconfigure hook sets _pytest_finished_event when the test session is done (BEFORE interpreter finalization). - Watchdog waits for the event with 120s timeout: * If not set in 120s: pytest is hung in test execution -> os._exit(2). * If set: pytest finished cleanly; give 30s for normal interpreter shutdown (ThreadPoolExecutor.__del__, etc.). * If still alive after grace: io_pool / live_gui teardown is hung -> os._exit(2). - Exit code 2 (not 0) so run_tests_batched.py correctly reports a failed batch (CalledProcessError). The 0 in the previous version masked hangs and hid test failures. Contract: - Normal batch (35s execution, 2s shutdown): pytest_unconfigure fires at 35s, watchdog's first wait returns immediately, 30s grace elapses without fire, pytest exits with 0. Runner: passed. - Hung batch: pytest_unconfigure never fires, watchdog fires os._exit(2) at 120s. Runner: failed. - Hung shutdown (io_pool.__del__ blocks): pytest_unconfigure fires, 30s grace elapses, watchdog fires os._exit(2). Runner: failed. 5 new tests in tests/test_conftest_smart_watchdog.py: - test_watchdog_thread_registered: daemon thread named conftest-smart-watchdog - test_watchdog_thread_is_daemon: doesn't block pytest exit - test_pytest_unconfigure_sets_finished_flag: hook exists in conftest - test_watchdog_uses_non_zero_exit_code: os._exit(2) is used - test_watchdog_timeouts_documented: 120s and 30s are present
122 lines
4.5 KiB
Python
122 lines
4.5 KiB
Python
"""Regression: pytest conftest must install a SMART hang watchdog.
|
|
|
|
Two hang chains have been observed when running the test suite:
|
|
1. ThreadPoolExecutor.__del__ -> shutdown(wait=True) on a blocked
|
|
worker during interpreter finalization.
|
|
2. The session-scoped `live_gui` fixture teardown hanging in
|
|
client.reset_session() (HTTP call to the hook server) or
|
|
kill_process_tree(process.pid) / process.wait(timeout=2)
|
|
waiting for the sloppy.py subprocess to die on Windows.
|
|
|
|
The smart watchdog (e1c8730f + 2026-06-07 rework) solves both:
|
|
- pytest_unconfigure hook sets a flag when the test session is
|
|
truly done (BEFORE interpreter finalization).
|
|
- The watchdog waits for that flag with a 120s timeout. If the
|
|
flag is never set, pytest is hung in test execution -> exit 2.
|
|
- After the flag is set, give 30s for normal interpreter
|
|
shutdown. If still alive, the io_pool or live_gui teardown is
|
|
hung -> exit 2.
|
|
- Exit code 2 (not 0) so run_tests_batched.py correctly reports
|
|
a failed batch (CalledProcessError).
|
|
|
|
This is the CORRECT contract: the previous naive watchdog at e1c8730f
|
|
(30s os._exit(0)) cut off batches mid-test and hid failures. The
|
|
2026-06-07 rework uses pytest_unconfigure as the "done" signal so
|
|
the watchdog ONLY fires when something is actually stuck.
|
|
|
|
This test verifies:
|
|
1. The watchdog thread is registered after the conftest loads.
|
|
2. It's a daemon thread (doesn't block pytest's own exit).
|
|
3. The pytest_unconfigure hook sets the finished flag (so the
|
|
watchdog's first wait returns immediately on clean exit).
|
|
4. The exit-code-2 contract is documented in the conftest.
|
|
"""
|
|
|
|
import re
|
|
import sys
|
|
import threading
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
WATCHDOG_NAME = "conftest-smart-watchdog"
|
|
PYTEST_FINISHED_TIMEOUT_SECONDS = 120.0
|
|
SHUTDOWN_GRACE_SECONDS = 30.0
|
|
|
|
|
|
def test_watchdog_thread_registered() -> None:
|
|
threads = threading.enumerate()
|
|
names = [t.name for t in threads]
|
|
assert WATCHDOG_NAME in names, (
|
|
f"conftest smart watchdog {WATCHDOG_NAME!r} not found in "
|
|
f"threading.enumerate(). Active threads: {names}"
|
|
)
|
|
|
|
|
|
def test_watchdog_thread_is_daemon() -> None:
|
|
for t in threading.enumerate():
|
|
if t.name == WATCHDOG_NAME:
|
|
assert t.daemon, (
|
|
f"watchdog thread is not daemon (daemon={t.daemon}); "
|
|
f"this would prevent pytest from exiting cleanly"
|
|
)
|
|
return
|
|
pytest.fail(f"watchdog thread {WATCHDOG_NAME!r} not found")
|
|
|
|
|
|
def test_pytest_unconfigure_sets_finished_flag() -> None:
|
|
"""
|
|
Simulate the end-of-session by calling pytest_unconfigure directly.
|
|
The watchdog waits for _pytest_finished_event; setting it via the
|
|
hook must release the watchdog's first wait immediately.
|
|
"""
|
|
conftest_path = Path(__file__).resolve().parent / "conftest.py"
|
|
text = conftest_path.read_text(encoding="utf-8")
|
|
assert "_pytest_finished_event" in text, (
|
|
f"_pytest_finished_event not found in {conftest_path}; "
|
|
f"smart watchdog signal missing"
|
|
)
|
|
assert "pytest_unconfigure" in text, (
|
|
f"pytest_unconfigure hook not found in {conftest_path}; "
|
|
f"smart watchdog needs the hook to know when pytest is done"
|
|
)
|
|
|
|
|
|
def test_watchdog_uses_non_zero_exit_code() -> None:
|
|
"""
|
|
Critical contract: the watchdog must call os._exit(2) (NOT 0) when
|
|
it fires. run_tests_batched.py uses subprocess.run(check=True) and
|
|
only reports 'Batch N failed.' on a non-zero exit. Exit 0 would
|
|
hide the hang and silently report a successful batch.
|
|
"""
|
|
conftest_path = Path(__file__).resolve().parent / "conftest.py"
|
|
text = conftest_path.read_text(encoding="utf-8")
|
|
matches = re.findall(r"os\._exit\(\s*(\d+)\s*\)", text)
|
|
assert "2" in matches, (
|
|
f"conftest.py does not call os._exit(2); found exit codes: {matches}. "
|
|
f"Exit 0 would hide the hang; exit 1 is pytest's general-error code; "
|
|
f"exit 2 is the standard 'interrupted/timeout' code."
|
|
)
|
|
|
|
|
|
def test_watchdog_timeouts_documented() -> None:
|
|
"""
|
|
Both the 120s pytest-hung timeout and the 30s shutdown-grace timeout
|
|
must be near the documented values. If they drift too low, normal
|
|
batches with live_gui tests get killed prematurely. If too high,
|
|
real hangs waste time.
|
|
"""
|
|
conftest_path = Path(__file__).resolve().parent / "conftest.py"
|
|
text = conftest_path.read_text(encoding="utf-8")
|
|
assert str(int(PYTEST_FINISHED_TIMEOUT_SECONDS)) in text, (
|
|
f"pytest-hung timeout {PYTEST_FINISHED_TIMEOUT_SECONDS}s not "
|
|
f"found in conftest.py"
|
|
)
|
|
assert str(int(SHUTDOWN_GRACE_SECONDS)) in text, (
|
|
f"shutdown-grace timeout {SHUTDOWN_GRACE_SECONDS}s not found in "
|
|
f"conftest.py"
|
|
)
|