diff --git a/conductor/tracks/rag_phase4_sync_fix_20260610/spec.md b/conductor/tracks/rag_phase4_sync_fix_20260610/spec.md index 09fea985..2ba335e5 100644 --- a/conductor/tracks/rag_phase4_sync_fix_20260610/spec.md +++ b/conductor/tracks/rag_phase4_sync_fix_20260610/spec.md @@ -4,9 +4,11 @@ This track fixes a pre-existing RAG test failure that halted the `tier-3-live_gui` batch during the `mma_tier_usage_reset_fix_20260610` verification run on 2026-06-10. -**The bug:** `tests/test_rag_phase4_final_verify.py::test_phase4_final_verify` fails because `rag_status` stays at `'idle'` after the test sets `rag_enabled=True`, `rag_source='chroma'`, `rag_emb_provider='local'` via the Hook API. The test polls for `rag_status == 'ready'` for 50 seconds (100 × 0.5s) and never sees it. +**The original bug (FIXED):** `tests/test_rag_phase4_final_verify.py::test_phase4_final_verify` failed with "RAG sync failed. Status: idle" because `_handle_reset_session` set `self.rag_config = None` and the `rag_*` setters check `if self.rag_config:` before doing anything — so the 4 setters fired by the test were all no-ops. -The test was failing before any changes from the `mma_tier_usage_reset_fix_20260610` track. It is a pre-existing fragility in the RAG sync flow that the previous track's batch run exposed. +**Fix:** reset `rag_config` to a fresh `RAGConfig()` default (not None) in `_handle_reset_session`, so the setters can mutate it and trigger the sync. + +**Status (post-fix):** RAG sync now reaches `'ready'`; the test fails on a SEPARATE downstream assertion (retrieval order — see "Residual issue" below). ## Reproduction (already verified) diff --git a/tests/conftest.py b/tests/conftest.py index af042e09..06e2e6a3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -181,7 +181,7 @@ def _check_required_test_dependencies() -> None: raise pytest.UsageError(msg) def _smart_watchdog_exit() -> None: - if not _pytest_finished_event.wait(timeout=600.0): + if not _pytest_finished_event.wait(timeout=900.0): os._exit(2) import time time.sleep(5.0) diff --git a/tests/test_reset_session_clears_mma_and_rag.py b/tests/test_reset_session_clears_mma_and_rag.py index ec584a71..6d9d1fec 100644 --- a/tests/test_reset_session_clears_mma_and_rag.py +++ b/tests/test_reset_session_clears_mma_and_rag.py @@ -26,10 +26,27 @@ def test_reset_session_clears_mma_tier_usage(live_gui) -> None: 'tier_usage': {'Tier 1': {'model': 'polluted'}}, 'tickets': [] }) - time.sleep(0.5) + # Poll until the polluted entry is visible. Without this, the reset + # can fire BEFORE the push_event task is processed (async via io_pool + # + GUI render loop), and the test would falsely pass even if the + # reset didn't actually clear anything. + for _ in range(40): + state = client.get_gui_state() + mma = state.get('mma_state', {}) + tier1 = mma.get('tier_usage', {}).get('Tier 1', {}) + if tier1.get('model') == 'polluted': + break + time.sleep(0.25) # Trigger the reset client.reset_session() - time.sleep(0.5) + # Poll until the polluted entry is gone + for _ in range(40): + state = client.get_gui_state() + mma = state.get('mma_state', {}) + tier1 = mma.get('tier_usage', {}).get('Tier 1', {}) + if tier1.get('model') != 'polluted': + break + time.sleep(0.25) # Verify the polluted entry is gone state = client.get_gui_state() mma = state.get('mma_state', {}) @@ -50,10 +67,21 @@ def test_reset_session_clears_mma_status(live_gui) -> None: 'tier_usage': {}, 'tickets': [] }) - time.sleep(0.5) + # Poll for the polluted status to be visible BEFORE the reset + for _ in range(40): + state = client.get_gui_state() + if state.get('mma_status') == 'running': + break + time.sleep(0.25) client.reset_session() - time.sleep(0.5) - state = client.get_gui_state() + # Poll for the reset to have taken effect. Without this, the + # mma_state_update task can fire AFTER the reset, setting status + # back to 'running' (race condition surfaced in batched live_gui). + for _ in range(40): + state = client.get_gui_state() + if state.get('mma_status') == 'idle': + break + time.sleep(0.25) assert state.get('mma_status') == 'idle', ( f"mma_status not reset: {state.get('mma_status')!r}" ) @@ -70,10 +98,19 @@ def test_reset_session_clears_active_tier(live_gui) -> None: 'tier_usage': {}, 'tickets': [] }) - time.sleep(0.5) + # Poll for the polluted active_tier to be visible BEFORE the reset + for _ in range(40): + state = client.get_gui_state() + if state.get('active_tier') == 'Tier 2 (Tech Lead)': + break + time.sleep(0.25) client.reset_session() - time.sleep(0.5) - state = client.get_gui_state() + # Poll for the reset to have taken effect + for _ in range(40): + state = client.get_gui_state() + if state.get('active_tier') is None: + break + time.sleep(0.25) assert state.get('active_tier') is None, ( f"active_tier not reset: {state.get('active_tier')!r}" ) diff --git a/tests/test_z_negative_flows.py b/tests/test_z_negative_flows.py index 19f29424..2593952d 100644 --- a/tests/test_z_negative_flows.py +++ b/tests/test_z_negative_flows.py @@ -112,10 +112,13 @@ def test_mock_timeout(live_gui) -> None: client.set_value("ai_input", "Trigger timeout") client.click("btn_gen_send") - # Wait for terminal response + # Wait for terminal response. The mock subprocess sleeps for 65s + # then exits; allow 180s for the event to land (the io_pool is busy + # in batched live_gui context, and the event propagation through + # _pending_gui_tasks can be slow under contention). event = None start = time.time() - while time.time() - start < 80: + while time.time() - start < 180: ev = client.wait_for_event("response", timeout=5) if ev and ev.get("payload", {}).get("status") != "streaming...": event = ev