test(audit): fix critical test suite deadlocks and write exhaustive architectural report

- Fix 'Triple Bingo' history synchronization explosion during streaming - Implement stateless event buffering in ApiHookClient to prevent dropped events - Ensure 'tool_execution' events emit consistently across all LLM providers - Add hard timeouts to all background thread wait() conditions - Add thorough teardown cleanup to conftest.py's reset_ai_client fixture - Write highly detailed report_gemini.md exposing asyncio lifecycle flaws
2026-03-05 01:42:47 -05:00
parent bfdbd43785
commit 35480a26dc
15 changed files with 715 additions and 481 deletions
@@ -56,6 +56,28 @@ class VerificationLogger:
   f.write(f"{status} {self.test_name} ({result_msg})\n\n")
  print(f"[FINAL] {self.test_name}: {status} - {result_msg}")

+@pytest.fixture(autouse=True)
+def reset_ai_client() -> Generator[None, None, None]:
+ """
+ Autouse fixture that resets the ai_client global state before each test.
+ This is critical for preventing state pollution between tests.
+ """
+ import ai_client
+ import mcp_client
+ ai_client.reset_session()
+ # Reset callbacks to None or default to ensure no carry-over
+ ai_client.confirm_and_run_callback = None
+ ai_client.comms_log_callback = None
+ ai_client.tool_log_callback = None
+ # Clear all event listeners
+ ai_client.events.clear()
+ # Reset provider/model to defaults
+ ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
+ # Reset MCP client state
+ mcp_client.configure([], [])
+ yield
+ ai_client.reset_session()
+
@pytest.fixture
 def vlogger(request) -> VerificationLogger:
 """Fixture to provide a VerificationLogger instance to a test."""
@@ -109,8 +131,8 @@ def mock_app() -> Generator[App, None, None]:
  app = App()
  yield app
  if hasattr(app, 'controller'):
-   app.controller.stop_services()
-  if hasattr(app, 'shutdown'):
+   app.controller.shutdown()
+  elif hasattr(app, 'shutdown'):
   app.shutdown()

@pytest.fixture
@@ -142,7 +164,7 @@ def app_instance() -> Generator[App, None, None]:
  yield app
  # Cleanup: Ensure background threads and asyncio loop are stopped
  if hasattr(app, 'controller'):
-   app.controller.stop_services()
+   app.controller.shutdown()

  if hasattr(app, 'shutdown'):
   app.shutdown()
@@ -209,10 +231,13 @@ def live_gui() -> Generator[tuple[subprocess.Popen, str], None, None]:
 
 # Check if already running (shouldn't be)
 try:
-  resp = requests.get("http://127.0.0.1:8999/status", timeout=0.1)
-  already_up = resp.status_code == 200
- except: already_up = False
- diag.log_state("Hook Server Port 8999", "Down", "UP" if already_up else "Down")
+  resp = requests.get("http://127.0.0.1:8999/status", timeout=0.5)
+  if resp.status_code == 200:
+   print("[Fixture] WARNING: Hook Server already up on port 8999. Test state might be polluted.")
+   # Optionally try to reset it
+   try: requests.post("http://127.0.0.1:8999/api/gui", json={"action": "click", "item": "btn_reset"}, timeout=1)
+   except: pass
+ except: pass

 print(f"\n[Fixture] Starting {gui_script} --enable-test-hooks in {temp_workspace}...")
 os.makedirs("logs", exist_ok=True)
@@ -52,7 +52,6 @@ def test_tools_sim_live(live_gui: Any) -> None:
 sim.run() # Ensure history is updated via the async queue
 time.sleep(2)
 sim.teardown()
-
@pytest.mark.integration
 def test_execution_sim_live(live_gui: Any) -> None:
 """Run the Execution & Modals simulation against a live GUI."""
@@ -60,7 +59,11 @@ def test_execution_sim_live(live_gui: Any) -> None:
 assert client.wait_for_server(timeout=10)
 sim = ExecutionSimulation(client)
 sim.setup("LiveExecutionSim")
+ # Enable manual approval to test modals
+ client.set_value('manual_approve', True)
 client.set_value('current_provider', 'gemini_cli')
 client.set_value('gcli_path', f'"{sys.executable}" "{os.path.abspath("tests/mock_gemini_cli.py")}"')
 sim.run()
+ time.sleep(2)
 sim.teardown()
+
@@ -56,7 +56,8 @@ def test_gemini_cli_parameter_resilience(live_gui: Any) -> None:
    """
 client = ApiHookClient("http://127.0.0.1:8999")
 client.click("btn_reset")
- time.sleep(1.5)
+ time.sleep(1.0)
+
 client.set_value("auto_add_history", True)
 client.set_value("manual_approve", True)
 client.select_list_item("proj_files", "manual_slop")
@@ -130,7 +131,8 @@ def test_gemini_cli_loop_termination(live_gui: Any) -> None:
    """
 client = ApiHookClient("http://127.0.0.1:8999")
 client.click("btn_reset")
- time.sleep(1.5)
+ time.sleep(1.0)
+
 client.set_value("auto_add_history", True)
 client.set_value("manual_approve", True)
 client.select_list_item("proj_files", "manual_slop")
@@ -13,7 +13,8 @@ def test_gemini_cli_full_integration(live_gui: Any) -> None:
 client = ApiHookClient("http://127.0.0.1:8999")
 # 0. Reset session and enable history
 client.click("btn_reset")
- time.sleep(1.5)
+ time.sleep(1.0)
+
 client.set_value("auto_add_history", True)
 client.set_value("manual_approve", True)
 # Switch to manual_slop project explicitly
@@ -80,7 +81,8 @@ def test_gemini_cli_rejection_and_history(live_gui: Any) -> None:
 client = ApiHookClient("http://127.0.0.1:8999")
 # 0. Reset session
 client.click("btn_reset")
- time.sleep(1.5)
+ time.sleep(1.0)
+
 client.set_value("auto_add_history", True)
 client.set_value("manual_approve", True)
 client.select_list_item("proj_files", "manual_slop")
@@ -68,3 +68,11 @@ def test_visual_mma_components(live_gui):
  assert tickets[1]['status'] == "running"

 print("Visual MMA component verification PASSED.")
+ 
+ # Clean up the pending modal to prevent polluting subsequent tests
+ print("Cleaning up pending MMA modal...")
+ client.post_gui({
+  "action": "click",
+  "item": "btn_approve_mma_step"
+ })
+ time.sleep(0.5)