Private
Public Access
0
0

test(infra): poll-for-event race fixes + watchdog timeout bump + spec update

This commit is contained in:
2026-06-10 15:14:35 -04:00
parent 563e609505
commit 2c924fe6df
4 changed files with 55 additions and 13 deletions
@@ -4,9 +4,11 @@
This track fixes a pre-existing RAG test failure that halted the `tier-3-live_gui` batch during the `mma_tier_usage_reset_fix_20260610` verification run on 2026-06-10. This track fixes a pre-existing RAG test failure that halted the `tier-3-live_gui` batch during the `mma_tier_usage_reset_fix_20260610` verification run on 2026-06-10.
**The bug:** `tests/test_rag_phase4_final_verify.py::test_phase4_final_verify` fails because `rag_status` stays at `'idle'` after the test sets `rag_enabled=True`, `rag_source='chroma'`, `rag_emb_provider='local'` via the Hook API. The test polls for `rag_status == 'ready'` for 50 seconds (100 × 0.5s) and never sees it. **The original bug (FIXED):** `tests/test_rag_phase4_final_verify.py::test_phase4_final_verify` failed with "RAG sync failed. Status: idle" because `_handle_reset_session` set `self.rag_config = None` and the `rag_*` setters check `if self.rag_config:` before doing anything — so the 4 setters fired by the test were all no-ops.
The test was failing before any changes from the `mma_tier_usage_reset_fix_20260610` track. It is a pre-existing fragility in the RAG sync flow that the previous track's batch run exposed. **Fix:** reset `rag_config` to a fresh `RAGConfig()` default (not None) in `_handle_reset_session`, so the setters can mutate it and trigger the sync.
**Status (post-fix):** RAG sync now reaches `'ready'`; the test fails on a SEPARATE downstream assertion (retrieval order — see "Residual issue" below).
## Reproduction (already verified) ## Reproduction (already verified)
+1 -1
View File
@@ -181,7 +181,7 @@ def _check_required_test_dependencies() -> None:
raise pytest.UsageError(msg) raise pytest.UsageError(msg)
def _smart_watchdog_exit() -> None: def _smart_watchdog_exit() -> None:
if not _pytest_finished_event.wait(timeout=600.0): if not _pytest_finished_event.wait(timeout=900.0):
os._exit(2) os._exit(2)
import time import time
time.sleep(5.0) time.sleep(5.0)
+45 -8
View File
@@ -26,10 +26,27 @@ def test_reset_session_clears_mma_tier_usage(live_gui) -> None:
'tier_usage': {'Tier 1': {'model': 'polluted'}}, 'tier_usage': {'Tier 1': {'model': 'polluted'}},
'tickets': [] 'tickets': []
}) })
time.sleep(0.5) # Poll until the polluted entry is visible. Without this, the reset
# can fire BEFORE the push_event task is processed (async via io_pool
# + GUI render loop), and the test would falsely pass even if the
# reset didn't actually clear anything.
for _ in range(40):
state = client.get_gui_state()
mma = state.get('mma_state', {})
tier1 = mma.get('tier_usage', {}).get('Tier 1', {})
if tier1.get('model') == 'polluted':
break
time.sleep(0.25)
# Trigger the reset # Trigger the reset
client.reset_session() client.reset_session()
time.sleep(0.5) # Poll until the polluted entry is gone
for _ in range(40):
state = client.get_gui_state()
mma = state.get('mma_state', {})
tier1 = mma.get('tier_usage', {}).get('Tier 1', {})
if tier1.get('model') != 'polluted':
break
time.sleep(0.25)
# Verify the polluted entry is gone # Verify the polluted entry is gone
state = client.get_gui_state() state = client.get_gui_state()
mma = state.get('mma_state', {}) mma = state.get('mma_state', {})
@@ -50,10 +67,21 @@ def test_reset_session_clears_mma_status(live_gui) -> None:
'tier_usage': {}, 'tier_usage': {},
'tickets': [] 'tickets': []
}) })
time.sleep(0.5) # Poll for the polluted status to be visible BEFORE the reset
for _ in range(40):
state = client.get_gui_state()
if state.get('mma_status') == 'running':
break
time.sleep(0.25)
client.reset_session() client.reset_session()
time.sleep(0.5) # Poll for the reset to have taken effect. Without this, the
state = client.get_gui_state() # mma_state_update task can fire AFTER the reset, setting status
# back to 'running' (race condition surfaced in batched live_gui).
for _ in range(40):
state = client.get_gui_state()
if state.get('mma_status') == 'idle':
break
time.sleep(0.25)
assert state.get('mma_status') == 'idle', ( assert state.get('mma_status') == 'idle', (
f"mma_status not reset: {state.get('mma_status')!r}" f"mma_status not reset: {state.get('mma_status')!r}"
) )
@@ -70,10 +98,19 @@ def test_reset_session_clears_active_tier(live_gui) -> None:
'tier_usage': {}, 'tier_usage': {},
'tickets': [] 'tickets': []
}) })
time.sleep(0.5) # Poll for the polluted active_tier to be visible BEFORE the reset
for _ in range(40):
state = client.get_gui_state()
if state.get('active_tier') == 'Tier 2 (Tech Lead)':
break
time.sleep(0.25)
client.reset_session() client.reset_session()
time.sleep(0.5) # Poll for the reset to have taken effect
state = client.get_gui_state() for _ in range(40):
state = client.get_gui_state()
if state.get('active_tier') is None:
break
time.sleep(0.25)
assert state.get('active_tier') is None, ( assert state.get('active_tier') is None, (
f"active_tier not reset: {state.get('active_tier')!r}" f"active_tier not reset: {state.get('active_tier')!r}"
) )
+5 -2
View File
@@ -112,10 +112,13 @@ def test_mock_timeout(live_gui) -> None:
client.set_value("ai_input", "Trigger timeout") client.set_value("ai_input", "Trigger timeout")
client.click("btn_gen_send") client.click("btn_gen_send")
# Wait for terminal response # Wait for terminal response. The mock subprocess sleeps for 65s
# then exits; allow 180s for the event to land (the io_pool is busy
# in batched live_gui context, and the event propagation through
# _pending_gui_tasks can be slow under contention).
event = None event = None
start = time.time() start = time.time()
while time.time() - start < 80: while time.time() - start < 180:
ev = client.wait_for_event("response", timeout=5) ev = client.wait_for_event("response", timeout=5)
if ev and ev.get("payload", {}).get("status") != "streaming...": if ev and ev.get("payload", {}).get("status") != "streaming...":
event = ev event = ev