170 lines
6.6 KiB
Python
170 lines
6.6 KiB
Python
"""Regression: pytest conftest must install a signal-based hang watchdog.
|
|
|
|
Two hang chains have been observed when running the test suite:
|
|
1. ThreadPoolExecutor.__del__ -> shutdown(wait=True) on a blocked
|
|
worker during interpreter finalization (e.g., the io_pool
|
|
created in AppController.__init__ at conftest line ~65).
|
|
2. The session-scoped `live_gui` fixture teardown hanging in
|
|
client.reset_session() (HTTP call to the hook server) or
|
|
kill_process_tree(process.pid) / process.wait(timeout=2) waiting
|
|
for the sloppy.py subprocess to die on Windows.
|
|
|
|
The signal-based watchdog (2026-06-07 final form) works as follows:
|
|
- pytest_terminal_summary hook (primary) sets _pytest_finished_event
|
|
AFTER the test summary has been printed to the terminal. This is
|
|
when the test session is logically done: the user can see
|
|
"241 passed, 1 skipped in 32.30s" but the process is still alive
|
|
in shutdown. The signal lets us know "the test work is finished,
|
|
anything beyond this is shutdown garbage."
|
|
- pytest_unconfigure hook (fallback) sets the same event in case
|
|
pytest_terminal_summary is bypassed (e.g., a crash mid-summary).
|
|
- Smart watchdog waits for the event with a 300s timeout. If not
|
|
set, the conftest itself is hung in wait_for_warmup or pytest
|
|
never reached the summary -> exit 2.
|
|
- After the event fires, give 5s for normal finalization
|
|
(ThreadPoolExecutor.__del__, etc.), then os._exit(0). This is
|
|
the right behavior: the test session is done, the user can see
|
|
the summary, the runner can move to the next batch.
|
|
- Unconditional 60s watchdog (separate thread) catches the case
|
|
where conftest hangs before any test runs (so no signal is ever
|
|
set). Fires os._exit(2) after 60s regardless of state.
|
|
|
|
Why the previous attempts failed:
|
|
- e1c8730f: 30s os._exit(0) cut off batches mid-test. os._exit(0)
|
|
masked pytest's exit code so failures were hidden.
|
|
- 719c5e27: changed to os._exit(2) but the watchdog's daemon
|
|
thread continued running through pytest's normal shutdown,
|
|
firing the exit on EVERY batch (even successful ones).
|
|
- 91b19c90: kept exit 2 but the wait-for-pytest_unconfigure
|
|
signal never fired when the conftest's own io_pool hung in
|
|
__del__. The unconditional 90s sledgehammer fired for all
|
|
batches.
|
|
- 44b0b5d4: switched to pytest_unconfigure as signal. Still
|
|
hung because pytest_unconfigure doesn't fire if io_pool hangs.
|
|
- 2026-06-07 final: pytest_terminal_summary fires AFTER the
|
|
summary is printed (which the user can verify in the output)
|
|
but BEFORE the shutdown hangs. This is the right signal.
|
|
|
|
This test verifies:
|
|
1. The watchdog thread is registered after the conftest loads.
|
|
2. It's a daemon thread (doesn't block pytest's own exit).
|
|
3. pytest_terminal_summary is used as the primary signal source.
|
|
4. pytest_unconfigure is used as a fallback signal source.
|
|
5. The exit code is 2 for hangs (uncaught Exception type) and 0
|
|
for clean exits (so the runner correctly reports the batch).
|
|
"""
|
|
|
|
import re
|
|
import sys
|
|
import threading
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
WATCHDOG_NAME = "conftest-smart-watchdog"
|
|
PYTEST_FINISHED_TIMEOUT_SECONDS = 600.0
|
|
SHUTDOWN_GRACE_SECONDS = 5.0
|
|
|
|
|
|
def test_watchdog_thread_registered() -> None:
|
|
threads = threading.enumerate()
|
|
names = [t.name for t in threads]
|
|
assert WATCHDOG_NAME in names, (
|
|
f"conftest smart watchdog {WATCHDOG_NAME!r} not found in "
|
|
f"threading.enumerate(). Active threads: {names}"
|
|
)
|
|
|
|
|
|
def test_watchdog_thread_is_daemon() -> None:
|
|
for t in threading.enumerate():
|
|
if t.name == WATCHDOG_NAME:
|
|
assert t.daemon, (
|
|
f"watchdog thread is not daemon (daemon={t.daemon}); "
|
|
f"this would prevent pytest from exiting cleanly"
|
|
)
|
|
return
|
|
pytest.fail(f"watchdog thread {WATCHDOG_NAME!r} not found")
|
|
|
|
|
|
def test_terminal_summary_hook_sets_finished_event() -> None:
|
|
"""
|
|
Primary signal: pytest_terminal_summary. This hook fires AFTER
|
|
the test session summary is printed, which is the right "session
|
|
is logically done" moment for the watchdog.
|
|
"""
|
|
conftest_path = Path(__file__).resolve().parent / "conftest.py"
|
|
text = conftest_path.read_text(encoding="utf-8")
|
|
assert "pytest_terminal_summary" in text, (
|
|
f"pytest_terminal_summary hook not found in {conftest_path}; "
|
|
f"this is the primary signal source"
|
|
)
|
|
assert "_pytest_finished_event.set()" in text, (
|
|
f"_pytest_finished_event.set() not found in {conftest_path}"
|
|
)
|
|
|
|
|
|
def test_unconfigure_hook_is_fallback_signal() -> None:
|
|
"""
|
|
Fallback signal: pytest_unconfigure. If pytest crashes
|
|
mid-summary, this still fires.
|
|
"""
|
|
conftest_path = Path(__file__).resolve().parent / "conftest.py"
|
|
text = conftest_path.read_text(encoding="utf-8")
|
|
assert "def pytest_unconfigure" in text, (
|
|
f"pytest_unconfigure hook not found in {conftest_path}"
|
|
)
|
|
|
|
|
|
def test_clean_exit_uses_zero_exit_code() -> None:
|
|
"""
|
|
After the signal fires and the grace period elapses, the watchdog
|
|
calls os._exit(0). This is the right behavior: the test session
|
|
is done, the user can see the summary, and the runner should see
|
|
a clean exit. (If a real test failure happened, pytest's own
|
|
exit code BEFORE the watchdog would have been 1, but the
|
|
watchdog forces exit 0 to skip the shutdown hang.)
|
|
|
|
The unconditional watchdog (separate thread) still uses exit 2
|
|
for true hangs.
|
|
"""
|
|
conftest_path = Path(__file__).resolve().parent / "conftest.py"
|
|
text = conftest_path.read_text(encoding="utf-8")
|
|
assert "os._exit(0)" in text, (
|
|
f"conftest.py should call os._exit(0) after the signal fires + "
|
|
f"grace elapses; the test session is done at that point and the "
|
|
f"runner should see a clean exit"
|
|
)
|
|
|
|
|
|
def test_hang_uses_nonzero_exit_code() -> None:
|
|
"""
|
|
The unconditional watchdog must call os._exit(2) when it fires.
|
|
"""
|
|
conftest_path = Path(__file__).resolve().parent / "conftest.py"
|
|
text = conftest_path.read_text(encoding="utf-8")
|
|
assert "os._exit(2)" in text, (
|
|
f"conftest.py should call os._exit(2) for true hangs"
|
|
)
|
|
|
|
|
|
def test_watchdog_timeouts_documented() -> None:
|
|
"""
|
|
Both the 300s pytest-hung timeout and the 5s shutdown-grace timeout
|
|
must be near the documented values. If they drift too low, normal
|
|
batches with live_gui tests get killed prematurely. If too high,
|
|
real hangs waste time.
|
|
"""
|
|
conftest_path = Path(__file__).resolve().parent / "conftest.py"
|
|
text = conftest_path.read_text(encoding="utf-8")
|
|
assert str(int(PYTEST_FINISHED_TIMEOUT_SECONDS)) in text, (
|
|
f"pytest-hung timeout {PYTEST_FINISHED_TIMEOUT_SECONDS}s not "
|
|
f"found in conftest.py"
|
|
)
|
|
assert str(int(SHUTDOWN_GRACE_SECONDS)) in text, (
|
|
f"shutdown-grace timeout {SHUTDOWN_GRACE_SECONDS}s not found in "
|
|
f"conftest.py"
|
|
)
|