test(audit): fix critical test suite deadlocks and write exhaustive architectural report
- Fix 'Triple Bingo' history synchronization explosion during streaming - Implement stateless event buffering in ApiHookClient to prevent dropped events - Ensure 'tool_execution' events emit consistently across all LLM providers - Add hard timeouts to all background thread wait() conditions - Add thorough teardown cleanup to conftest.py's reset_ai_client fixture - Write highly detailed report_gemini.md exposing asyncio lifecycle flaws
This commit is contained in:
@@ -56,6 +56,28 @@ class VerificationLogger:
|
||||
f.write(f"{status} {self.test_name} ({result_msg})\n\n")
|
||||
print(f"[FINAL] {self.test_name}: {status} - {result_msg}")
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_ai_client() -> Generator[None, None, None]:
|
||||
"""
|
||||
Autouse fixture that resets the ai_client global state before each test.
|
||||
This is critical for preventing state pollution between tests.
|
||||
"""
|
||||
import ai_client
|
||||
import mcp_client
|
||||
ai_client.reset_session()
|
||||
# Reset callbacks to None or default to ensure no carry-over
|
||||
ai_client.confirm_and_run_callback = None
|
||||
ai_client.comms_log_callback = None
|
||||
ai_client.tool_log_callback = None
|
||||
# Clear all event listeners
|
||||
ai_client.events.clear()
|
||||
# Reset provider/model to defaults
|
||||
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||
# Reset MCP client state
|
||||
mcp_client.configure([], [])
|
||||
yield
|
||||
ai_client.reset_session()
|
||||
|
||||
@pytest.fixture
|
||||
def vlogger(request) -> VerificationLogger:
|
||||
"""Fixture to provide a VerificationLogger instance to a test."""
|
||||
@@ -109,8 +131,8 @@ def mock_app() -> Generator[App, None, None]:
|
||||
app = App()
|
||||
yield app
|
||||
if hasattr(app, 'controller'):
|
||||
app.controller.stop_services()
|
||||
if hasattr(app, 'shutdown'):
|
||||
app.controller.shutdown()
|
||||
elif hasattr(app, 'shutdown'):
|
||||
app.shutdown()
|
||||
|
||||
@pytest.fixture
|
||||
@@ -142,7 +164,7 @@ def app_instance() -> Generator[App, None, None]:
|
||||
yield app
|
||||
# Cleanup: Ensure background threads and asyncio loop are stopped
|
||||
if hasattr(app, 'controller'):
|
||||
app.controller.stop_services()
|
||||
app.controller.shutdown()
|
||||
|
||||
if hasattr(app, 'shutdown'):
|
||||
app.shutdown()
|
||||
@@ -209,10 +231,13 @@ def live_gui() -> Generator[tuple[subprocess.Popen, str], None, None]:
|
||||
|
||||
# Check if already running (shouldn't be)
|
||||
try:
|
||||
resp = requests.get("http://127.0.0.1:8999/status", timeout=0.1)
|
||||
already_up = resp.status_code == 200
|
||||
except: already_up = False
|
||||
diag.log_state("Hook Server Port 8999", "Down", "UP" if already_up else "Down")
|
||||
resp = requests.get("http://127.0.0.1:8999/status", timeout=0.5)
|
||||
if resp.status_code == 200:
|
||||
print("[Fixture] WARNING: Hook Server already up on port 8999. Test state might be polluted.")
|
||||
# Optionally try to reset it
|
||||
try: requests.post("http://127.0.0.1:8999/api/gui", json={"action": "click", "item": "btn_reset"}, timeout=1)
|
||||
except: pass
|
||||
except: pass
|
||||
|
||||
print(f"\n[Fixture] Starting {gui_script} --enable-test-hooks in {temp_workspace}...")
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
@@ -52,7 +52,6 @@ def test_tools_sim_live(live_gui: Any) -> None:
|
||||
sim.run() # Ensure history is updated via the async queue
|
||||
time.sleep(2)
|
||||
sim.teardown()
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_execution_sim_live(live_gui: Any) -> None:
|
||||
"""Run the Execution & Modals simulation against a live GUI."""
|
||||
@@ -60,7 +59,11 @@ def test_execution_sim_live(live_gui: Any) -> None:
|
||||
assert client.wait_for_server(timeout=10)
|
||||
sim = ExecutionSimulation(client)
|
||||
sim.setup("LiveExecutionSim")
|
||||
# Enable manual approval to test modals
|
||||
client.set_value('manual_approve', True)
|
||||
client.set_value('current_provider', 'gemini_cli')
|
||||
client.set_value('gcli_path', f'"{sys.executable}" "{os.path.abspath("tests/mock_gemini_cli.py")}"')
|
||||
sim.run()
|
||||
time.sleep(2)
|
||||
sim.teardown()
|
||||
|
||||
|
||||
@@ -56,7 +56,8 @@ def test_gemini_cli_parameter_resilience(live_gui: Any) -> None:
|
||||
"""
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
client.click("btn_reset")
|
||||
time.sleep(1.5)
|
||||
time.sleep(1.0)
|
||||
|
||||
client.set_value("auto_add_history", True)
|
||||
client.set_value("manual_approve", True)
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
@@ -130,7 +131,8 @@ def test_gemini_cli_loop_termination(live_gui: Any) -> None:
|
||||
"""
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
client.click("btn_reset")
|
||||
time.sleep(1.5)
|
||||
time.sleep(1.0)
|
||||
|
||||
client.set_value("auto_add_history", True)
|
||||
client.set_value("manual_approve", True)
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
|
||||
@@ -13,7 +13,8 @@ def test_gemini_cli_full_integration(live_gui: Any) -> None:
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
# 0. Reset session and enable history
|
||||
client.click("btn_reset")
|
||||
time.sleep(1.5)
|
||||
time.sleep(1.0)
|
||||
|
||||
client.set_value("auto_add_history", True)
|
||||
client.set_value("manual_approve", True)
|
||||
# Switch to manual_slop project explicitly
|
||||
@@ -80,7 +81,8 @@ def test_gemini_cli_rejection_and_history(live_gui: Any) -> None:
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
# 0. Reset session
|
||||
client.click("btn_reset")
|
||||
time.sleep(1.5)
|
||||
time.sleep(1.0)
|
||||
|
||||
client.set_value("auto_add_history", True)
|
||||
client.set_value("manual_approve", True)
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
|
||||
@@ -68,3 +68,11 @@ def test_visual_mma_components(live_gui):
|
||||
assert tickets[1]['status'] == "running"
|
||||
|
||||
print("Visual MMA component verification PASSED.")
|
||||
|
||||
# Clean up the pending modal to prevent polluting subsequent tests
|
||||
print("Cleaning up pending MMA modal...")
|
||||
client.post_gui({
|
||||
"action": "click",
|
||||
"item": "btn_approve_mma_step"
|
||||
})
|
||||
time.sleep(0.5)
|
||||
|
||||
Reference in New Issue
Block a user