test(audit): fix critical test suite deadlocks and write exhaustive architectural report

- Fix 'Triple Bingo' history synchronization explosion during streaming

- Implement stateless event buffering in ApiHookClient to prevent dropped events

- Ensure 'tool_execution' events emit consistently across all LLM providers

- Add hard timeouts to all background thread wait() conditions

- Add thorough teardown cleanup to conftest.py's reset_ai_client fixture

- Write highly detailed report_gemini.md exposing asyncio lifecycle flaws
This commit is contained in:
2026-03-05 01:42:47 -05:00
parent bfdbd43785
commit 35480a26dc
15 changed files with 715 additions and 481 deletions

View File

@@ -289,9 +289,9 @@ def reset_session() -> None:
_gemini_cache = None
_gemini_cache_md_hash = None
_gemini_cache_created_at = None
if _gemini_cli_adapter:
_gemini_cli_adapter.session_id = None
_gemini_cli_adapter = None
_anthropic_client = None
with _anthropic_history_lock:
_anthropic_history = []
_deepseek_client = None
@@ -724,6 +724,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
name, args = fc.name, dict(fc.args)
out = ""
tool_executed = False
events.emit("tool_execution", payload={"status": "started", "tool": name, "args": args, "round": r_idx})
if name == TOOL_NAME and pre_tool_callback:
scr = cast(str, args.get("script", ""))
_append_comms("OUT", "tool_call", {"name": TOOL_NAME, "script": scr})
@@ -735,7 +736,6 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
tool_executed = True
if not tool_executed:
events.emit("tool_execution", payload={"status": "started", "tool": name, "args": args, "round": r_idx})
if name and name in mcp_client.TOOL_NAMES:
_append_comms("OUT", "tool_call", {"name": name, "args": args})
if name in mcp_client.MUTATING_TOOLS and pre_tool_callback:
@@ -840,6 +840,7 @@ def _send_gemini_cli(md_content: str, user_message: str, base_dir: str,
call_id = cast(str, fc.get("id"))
out = ""
tool_executed = False
events.emit("tool_execution", payload={"status": "started", "tool": name, "args": args, "round": r_idx})
if name == TOOL_NAME and pre_tool_callback:
scr = cast(str, args.get("script", ""))
_append_comms("OUT", "tool_call", {"name": TOOL_NAME, "id": call_id, "script": scr})
@@ -851,8 +852,8 @@ def _send_gemini_cli(md_content: str, user_message: str, base_dir: str,
tool_executed = True
if not tool_executed:
events.emit("tool_execution", payload={"status": "started", "tool": name, "args": args, "round": r_idx})
if name in mcp_client.TOOL_NAMES:
if name and name in mcp_client.TOOL_NAMES:
_append_comms("OUT", "tool_call", {"name": name, "id": call_id, "args": args})
if name in mcp_client.MUTATING_TOOLS and pre_tool_callback:
desc = f"# MCP MUTATING TOOL: {name}\n" + "\n".join(f"# {k}: {repr(v)}" for k, v in args.items())
@@ -1181,6 +1182,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
b_input = cast(dict[str, Any], getattr(block, "input"))
output = ""
tool_executed = False
events.emit("tool_execution", payload={"status": "started", "tool": b_name, "args": b_input, "round": round_idx})
if b_name == TOOL_NAME and pre_tool_callback:
script = cast(str, b_input.get("script", ""))
_append_comms("OUT", "tool_call", {"name": TOOL_NAME, "id": b_id, "script": script})
@@ -1192,8 +1194,8 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
tool_executed = True
if not tool_executed:
events.emit("tool_execution", payload={"status": "started", "tool": b_name, "args": b_input, "round": round_idx})
if b_name and b_name in mcp_client.TOOL_NAMES:
if name and name in mcp_client.TOOL_NAMES:
_append_comms("OUT", "tool_call", {"name": b_name, "id": b_id, "args": b_input})
if b_name in mcp_client.MUTATING_TOOLS and pre_tool_callback:
desc = f"# MCP MUTATING TOOL: {b_name}\n" + "\n".join(f"# {k}: {repr(v)}" for k, v in b_input.items())
@@ -1225,10 +1227,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
"tool_use_id": b_id,
"content": truncated,
})
if not tool_executed:
events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})
else:
events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})
events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})
if _cumulative_tool_bytes > _MAX_TOOL_OUTPUT_BYTES:
tool_results.append({
"type": "text",
@@ -1417,6 +1416,7 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
tool_args = {}
tool_output = ""
tool_executed = False
events.emit("tool_execution", payload={"status": "started", "tool": tool_name, "args": tool_args, "round": round_idx})
if tool_name == TOOL_NAME and pre_tool_callback:
script = cast(str, tool_args.get("script", ""))
_append_comms("OUT", "tool_call", {"name": TOOL_NAME, "id": tool_id, "script": script})
@@ -1428,7 +1428,6 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
tool_executed = True
if not tool_executed:
events.emit("tool_execution", payload={"status": "started", "tool": tool_name, "args": tool_args, "round": round_idx})
if tool_name in mcp_client.TOOL_NAMES:
_append_comms("OUT", "tool_call", {"name": tool_name, "id": tool_id, "args": tool_args})
if tool_name in mcp_client.MUTATING_TOOLS and pre_tool_callback: