test(audit): fix critical test suite deadlocks and write exhaustive architectural report

- Fix 'Triple Bingo' history synchronization explosion during streaming

- Implement stateless event buffering in ApiHookClient to prevent dropped events

- Ensure 'tool_execution' events emit consistently across all LLM providers

- Add hard timeouts to all background thread wait() conditions

- Add thorough teardown cleanup to conftest.py's reset_ai_client fixture

- Write highly detailed report_gemini.md exposing asyncio lifecycle flaws
This commit is contained in:
2026-03-05 01:42:47 -05:00
parent bfdbd43785
commit 35480a26dc
15 changed files with 715 additions and 481 deletions

View File

@@ -56,6 +56,28 @@ class VerificationLogger:
f.write(f"{status} {self.test_name} ({result_msg})\n\n")
print(f"[FINAL] {self.test_name}: {status} - {result_msg}")
@pytest.fixture(autouse=True)
def reset_ai_client() -> Generator[None, None, None]:
"""
Autouse fixture that resets the ai_client global state before each test.
This is critical for preventing state pollution between tests.
"""
import ai_client
import mcp_client
ai_client.reset_session()
# Reset callbacks to None or default to ensure no carry-over
ai_client.confirm_and_run_callback = None
ai_client.comms_log_callback = None
ai_client.tool_log_callback = None
# Clear all event listeners
ai_client.events.clear()
# Reset provider/model to defaults
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
# Reset MCP client state
mcp_client.configure([], [])
yield
ai_client.reset_session()
@pytest.fixture
def vlogger(request) -> VerificationLogger:
"""Fixture to provide a VerificationLogger instance to a test."""
@@ -109,8 +131,8 @@ def mock_app() -> Generator[App, None, None]:
app = App()
yield app
if hasattr(app, 'controller'):
app.controller.stop_services()
if hasattr(app, 'shutdown'):
app.controller.shutdown()
elif hasattr(app, 'shutdown'):
app.shutdown()
@pytest.fixture
@@ -142,7 +164,7 @@ def app_instance() -> Generator[App, None, None]:
yield app
# Cleanup: Ensure background threads and asyncio loop are stopped
if hasattr(app, 'controller'):
app.controller.stop_services()
app.controller.shutdown()
if hasattr(app, 'shutdown'):
app.shutdown()
@@ -209,10 +231,13 @@ def live_gui() -> Generator[tuple[subprocess.Popen, str], None, None]:
# Check if already running (shouldn't be)
try:
resp = requests.get("http://127.0.0.1:8999/status", timeout=0.1)
already_up = resp.status_code == 200
except: already_up = False
diag.log_state("Hook Server Port 8999", "Down", "UP" if already_up else "Down")
resp = requests.get("http://127.0.0.1:8999/status", timeout=0.5)
if resp.status_code == 200:
print("[Fixture] WARNING: Hook Server already up on port 8999. Test state might be polluted.")
# Optionally try to reset it
try: requests.post("http://127.0.0.1:8999/api/gui", json={"action": "click", "item": "btn_reset"}, timeout=1)
except: pass
except: pass
print(f"\n[Fixture] Starting {gui_script} --enable-test-hooks in {temp_workspace}...")
os.makedirs("logs", exist_ok=True)

View File

@@ -52,7 +52,6 @@ def test_tools_sim_live(live_gui: Any) -> None:
sim.run() # Ensure history is updated via the async queue
time.sleep(2)
sim.teardown()
@pytest.mark.integration
def test_execution_sim_live(live_gui: Any) -> None:
"""Run the Execution & Modals simulation against a live GUI."""
@@ -60,7 +59,11 @@ def test_execution_sim_live(live_gui: Any) -> None:
assert client.wait_for_server(timeout=10)
sim = ExecutionSimulation(client)
sim.setup("LiveExecutionSim")
# Enable manual approval to test modals
client.set_value('manual_approve', True)
client.set_value('current_provider', 'gemini_cli')
client.set_value('gcli_path', f'"{sys.executable}" "{os.path.abspath("tests/mock_gemini_cli.py")}"')
sim.run()
time.sleep(2)
sim.teardown()

View File

@@ -56,7 +56,8 @@ def test_gemini_cli_parameter_resilience(live_gui: Any) -> None:
"""
client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset")
time.sleep(1.5)
time.sleep(1.0)
client.set_value("auto_add_history", True)
client.set_value("manual_approve", True)
client.select_list_item("proj_files", "manual_slop")
@@ -130,7 +131,8 @@ def test_gemini_cli_loop_termination(live_gui: Any) -> None:
"""
client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset")
time.sleep(1.5)
time.sleep(1.0)
client.set_value("auto_add_history", True)
client.set_value("manual_approve", True)
client.select_list_item("proj_files", "manual_slop")

View File

@@ -13,7 +13,8 @@ def test_gemini_cli_full_integration(live_gui: Any) -> None:
client = ApiHookClient("http://127.0.0.1:8999")
# 0. Reset session and enable history
client.click("btn_reset")
time.sleep(1.5)
time.sleep(1.0)
client.set_value("auto_add_history", True)
client.set_value("manual_approve", True)
# Switch to manual_slop project explicitly
@@ -80,7 +81,8 @@ def test_gemini_cli_rejection_and_history(live_gui: Any) -> None:
client = ApiHookClient("http://127.0.0.1:8999")
# 0. Reset session
client.click("btn_reset")
time.sleep(1.5)
time.sleep(1.0)
client.set_value("auto_add_history", True)
client.set_value("manual_approve", True)
client.select_list_item("proj_files", "manual_slop")

View File

@@ -68,3 +68,11 @@ def test_visual_mma_components(live_gui):
assert tickets[1]['status'] == "running"
print("Visual MMA component verification PASSED.")
# Clean up the pending modal to prevent polluting subsequent tests
print("Cleaning up pending MMA modal...")
client.post_gui({
"action": "click",
"item": "btn_approve_mma_step"
})
time.sleep(0.5)