checkpoint: massive refactor

2026-02-28 09:06:45 -05:00
parent f2512c30e9
commit d36632c21a
149 changed files with 16255 additions and 17722 deletions
@@ -15,82 +15,76 @@ import ai_client

@pytest.fixture(autouse=True)
 def reset_ai_client():
-    """Reset ai_client global state between every test to prevent state pollution."""
-    ai_client.reset_session()
-    # Default to a safe model
-    ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
-    yield
+ """Reset ai_client global state between every test to prevent state pollution."""
+ ai_client.reset_session()
+ # Default to a safe model
+ ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
+ yield

 def kill_process_tree(pid):
-    """Robustly kills a process and all its children."""
-    if pid is None:
-        return
-    try:
-        print(f"[Fixture] Attempting to kill process tree for PID {pid}...")
-        if os.name == 'nt':
-            # /F is force, /T is tree (includes children)
-            subprocess.run(["taskkill", "/F", "/T", "/PID", str(pid)], 
-                           stdout=subprocess.DEVNULL, 
-                           stderr=subprocess.DEVNULL, 
-                           check=False)
-        else:
-            # On Unix, kill the process group
-            os.killpg(os.getpgid(pid), signal.SIGKILL)
-        print(f"[Fixture] Process tree {pid} killed.")
-    except Exception as e:
-        print(f"[Fixture] Error killing process tree {pid}: {e}")
+ """Robustly kills a process and all its children."""
+ if pid is None:
+  return
+ try:
+  print(f"[Fixture] Attempting to kill process tree for PID {pid}...")
+  if os.name == 'nt':
+  # /F is force, /T is tree (includes children)
+   subprocess.run(["taskkill", "/F", "/T", "/PID", str(pid)], 
+    stdout=subprocess.DEVNULL, 
+    stderr=subprocess.DEVNULL, 
+    check=False)
+  else:
+  # On Unix, kill the process group
+   os.killpg(os.getpgid(pid), signal.SIGKILL)
+  print(f"[Fixture] Process tree {pid} killed.")
+ except Exception as e:
+  print(f"[Fixture] Error killing process tree {pid}: {e}")

@pytest.fixture(scope="session")
 def live_gui():
-    """
+ """
    Session-scoped fixture that starts gui_2.py with --enable-test-hooks.
    """
-    gui_script = "gui_2.py"
-    print(f"\n[Fixture] Starting {gui_script} --enable-test-hooks...")
-    
-    os.makedirs("logs", exist_ok=True)
-    log_file = open(f"logs/{gui_script.replace('.', '_')}_test.log", "w", encoding="utf-8")
-
-    process = subprocess.Popen(
-        ["uv", "run", "python", "-u", gui_script, "--enable-test-hooks"],
-        stdout=log_file,
-        stderr=log_file,
-        text=True,
-        creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == 'nt' else 0
-    )
-
-    max_retries = 15  # Slightly more time for gui_2
-    ready = False
-    print(f"[Fixture] Waiting up to {max_retries}s for Hook Server on port 8999...")
-    
-    start_time = time.time()
-    while time.time() - start_time < max_retries:
-        try:
-            response = requests.get("http://127.0.0.1:8999/status", timeout=0.5)
-            if response.status_code == 200:
-                ready = True
-                print(f"[Fixture] GUI Hook Server for {gui_script} is ready after {round(time.time() - start_time, 2)}s.")
-                break
-        except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
-            if process.poll() is not None:
-                print(f"[Fixture] {gui_script} process died unexpectedly during startup.")
-                break
-            time.sleep(0.5)
-            
-    if not ready:
-        print(f"[Fixture] TIMEOUT/FAILURE: Hook server for {gui_script} failed to respond.")
-        kill_process_tree(process.pid)
-        pytest.fail(f"Failed to start {gui_script} with test hooks.")
-        
-    try:
-        yield process, gui_script
-    finally:
-        print(f"\n[Fixture] Finally block triggered: Shutting down {gui_script}...")
-        # Reset the GUI state before shutting down
-        try:
-            client = ApiHookClient()
-            client.reset_session()
-            time.sleep(0.5)
-        except: pass
-        kill_process_tree(process.pid)
-        log_file.close()
+ gui_script = "gui_2.py"
+ print(f"\n[Fixture] Starting {gui_script} --enable-test-hooks...")
+ os.makedirs("logs", exist_ok=True)
+ log_file = open(f"logs/{gui_script.replace('.', '_')}_test.log", "w", encoding="utf-8")
+ process = subprocess.Popen(
+  ["uv", "run", "python", "-u", gui_script, "--enable-test-hooks"],
+  stdout=log_file,
+  stderr=log_file,
+  text=True,
+  creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == 'nt' else 0
+ )
+ max_retries = 15  # Slightly more time for gui_2
+ ready = False
+ print(f"[Fixture] Waiting up to {max_retries}s for Hook Server on port 8999...")
+ start_time = time.time()
+ while time.time() - start_time < max_retries:
+  try:
+   response = requests.get("http://127.0.0.1:8999/status", timeout=0.5)
+   if response.status_code == 200:
+    ready = True
+    print(f"[Fixture] GUI Hook Server for {gui_script} is ready after {round(time.time() - start_time, 2)}s.")
+    break
+  except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
+   if process.poll() is not None:
+    print(f"[Fixture] {gui_script} process died unexpectedly during startup.")
+    break
+   time.sleep(0.5)
+ if not ready:
+  print(f"[Fixture] TIMEOUT/FAILURE: Hook server for {gui_script} failed to respond.")
+  kill_process_tree(process.pid)
+  pytest.fail(f"Failed to start {gui_script} with test hooks.")
+ try:
+  yield process, gui_script
+ finally:
+  print(f"\n[Fixture] Finally block triggered: Shutting down {gui_script}...")
+  # Reset the GUI state before shutting down
+  try:
+   client = ApiHookClient()
+   client.reset_session()
+   time.sleep(0.5)
+  except: pass
+  kill_process_tree(process.pid)
+  log_file.close()
@@ -0,0 +1,21 @@
+import sys, json, os, subprocess
+prompt = sys.stdin.read()
+if '"role": "tool"' in prompt:
+    print(json.dumps({"type": "message", "role": "assistant", "content": "Tool worked!"}), flush=True)
+    print(json.dumps({"type": "result", "stats": {"total_tokens": 20}}), flush=True)
+else:
+    # We must call the bridge to trigger the GUI approval!
+    tool_call = {"name": "list_directory", "input": {"dir_path": "."}}
+    bridge_cmd = [sys.executable, "C:/projects/manual_slop/scripts/cli_tool_bridge.py"]
+    proc = subprocess.Popen(bridge_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True)
+    stdout, _ = proc.communicate(input=json.dumps(tool_call))
+    
+    # Even if bridge says allow, we emit the tool_use to the adapter
+    print(json.dumps({"type": "message", "role": "assistant", "content": "I will list the directory."}), flush=True)
+    print(json.dumps({
+        "type": "tool_use", 
+        "name": "list_directory", 
+        "id": "alias_call",
+        "args": {"dir_path": "."} 
+    }), flush=True)
+    print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)
@@ -4,104 +4,92 @@ import subprocess
 import os

 def main():
-    # Debug log to stderr
-    sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n")
-    sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
-    
-    # Read prompt from stdin
-    try:
-        # On Windows, stdin might be closed or behave weirdly if not handled
-        prompt = sys.stdin.read()
-    except EOFError:
-        prompt = ""
-        
-    sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n")
-    sys.stderr.flush()
-
-    # Skip management commands
-    if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]:
-        return
-
-    # If the prompt contains tool results, provide final answer
-    if '"role": "tool"' in prompt or '"tool_call_id"' in prompt:
-        print(json.dumps({
-            "type": "message", 
-            "role": "assistant",
-            "content": "I have processed the tool results. Everything looks good!"
-        }), flush=True)
-        print(json.dumps({
-            "type": "result", 
-            "status": "success",
-            "stats": {"total_tokens": 100, "input_tokens": 80, "output_tokens": 20},
-            "session_id": "mock-session-final"
-        }), flush=True)
-        return
-
-    # Default flow: simulate a tool call
-    bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
-    # Using format that bridge understands
-    bridge_tool_call = {
-        "name": "read_file",
-        "input": {"path": "test.txt"}
-    }
-    
-    sys.stderr.write(f"DEBUG: Calling bridge at {bridge_path}\n")
-    sys.stderr.flush()
-
-    try:
-        # CRITICAL: Use the current process environment to ensure GEMINI_CLI_HOOK_CONTEXT is passed
-        process = subprocess.Popen(
-            [sys.executable, bridge_path],
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            env=os.environ
-        )
-        stdout, stderr = process.communicate(input=json.dumps(bridge_tool_call))
-        
-        sys.stderr.write(f"DEBUG: Bridge stdout: {stdout}\n")
-        sys.stderr.write(f"DEBUG: Bridge stderr: {stderr}\n")
-        
-        decision_data = json.loads(stdout.strip())
-        decision = decision_data.get("decision")
-    except Exception as e:
-        sys.stderr.write(f"DEBUG: Bridge failed: {e}\n")
-        decision = "deny"
-
-    if decision == "allow":
-        # Simulate REAL CLI field names for adapter normalization test
-        print(json.dumps({
-            "type": "tool_use", 
-            "tool_name": "read_file", 
-            "tool_id": "call_123",
-            "parameters": {"path": "test.txt"}
-        }), flush=True)
-        
-        print(json.dumps({
-            "type": "message", 
-            "role": "assistant",
-            "content": "I am reading the file now..."
-        }), flush=True)
-        
-        print(json.dumps({
-            "type": "result", 
-            "status": "success",
-            "stats": {"total_tokens": 50, "input_tokens": 40, "output_tokens": 10},
-            "session_id": "mock-session-123"
-        }), flush=True)
-    else:
-        print(json.dumps({
-            "type": "message",
-            "role": "assistant",
-            "content": f"Tool execution was denied. Decision: {decision}"
-        }), flush=True)
-        print(json.dumps({
-            "type": "result",
-            "status": "success",
-            "stats": {"total_tokens": 10, "input_tokens": 10, "output_tokens": 0},
-            "session_id": "mock-session-denied"
-        }), flush=True)
+# Debug log to stderr
+ sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n")
+ sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
+ # Read prompt from stdin
+ try:
+ # On Windows, stdin might be closed or behave weirdly if not handled
+  prompt = sys.stdin.read()
+ except EOFError:
+  prompt = ""
+ sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n")
+ sys.stderr.flush()
+ # Skip management commands
+ if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]:
+  return
+  # If the prompt contains tool results, provide final answer
+ if '"role": "tool"' in prompt or '"tool_call_id"' in prompt:
+  print(json.dumps({
+     "type": "message", 
+     "role": "assistant",
+     "content": "I have processed the tool results. Everything looks good!"
+    }), flush=True)
+  print(json.dumps({
+     "type": "result", 
+     "status": "success",
+     "stats": {"total_tokens": 100, "input_tokens": 80, "output_tokens": 20},
+     "session_id": "mock-session-final"
+    }), flush=True)
+  return
+  # Default flow: simulate a tool call
+ bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
+ # Using format that bridge understands
+ bridge_tool_call = {
+  "name": "read_file",
+  "input": {"path": "test.txt"}
+ }
+ sys.stderr.write(f"DEBUG: Calling bridge at {bridge_path}\n")
+ sys.stderr.flush()
+ try:
+ # CRITICAL: Use the current process environment to ensure GEMINI_CLI_HOOK_CONTEXT is passed
+  process = subprocess.Popen(
+   [sys.executable, bridge_path],
+   stdin=subprocess.PIPE,
+   stdout=subprocess.PIPE,
+   stderr=subprocess.PIPE,
+   text=True,
+   env=os.environ
+  )
+  stdout, stderr = process.communicate(input=json.dumps(bridge_tool_call))
+  sys.stderr.write(f"DEBUG: Bridge stdout: {stdout}\n")
+  sys.stderr.write(f"DEBUG: Bridge stderr: {stderr}\n")
+  decision_data = json.loads(stdout.strip())
+  decision = decision_data.get("decision")
+ except Exception as e:
+  sys.stderr.write(f"DEBUG: Bridge failed: {e}\n")
+  decision = "deny"
+ if decision == "allow":
+ # Simulate REAL CLI field names for adapter normalization test
+  print(json.dumps({
+     "type": "tool_use", 
+     "tool_name": "read_file", 
+     "tool_id": "call_123",
+     "parameters": {"path": "test.txt"}
+    }), flush=True)
+  print(json.dumps({
+     "type": "message", 
+     "role": "assistant",
+     "content": "I am reading the file now..."
+    }), flush=True)
+  print(json.dumps({
+     "type": "result", 
+     "status": "success",
+     "stats": {"total_tokens": 50, "input_tokens": 40, "output_tokens": 10},
+     "session_id": "mock-session-123"
+    }), flush=True)
+ else:
+  print(json.dumps({
+     "type": "message",
+     "role": "assistant",
+     "content": f"Tool execution was denied. Decision: {decision}"
+    }), flush=True)
+  print(json.dumps({
+     "type": "result",
+     "status": "success",
+     "stats": {"total_tokens": 10, "input_tokens": 10, "output_tokens": 0},
+     "session_id": "mock-session-denied"
+    }), flush=True)

 if __name__ == "__main__":
-    main()
+ main()
@@ -15,6 +15,8 @@ output_dir = "./md_gen"
 base_dir = "."
 paths = []

+[files.tier_assignments]
+
 [screenshots]
 base_dir = "."
 paths = []
@@ -10,5 +10,5 @@ auto_add = true

 [discussions.main]
 git_commit = ""
-last_updated = "2026-02-27T18:56:53"
+last_updated = "2026-02-28T07:35:03"
 history = []
@@ -15,6 +15,8 @@ output_dir = "./md_gen"
 base_dir = "."
 paths = []

+[files.tier_assignments]
+
 [screenshots]
 base_dir = "."
 paths = []
@@ -6,10 +6,10 @@ roles = [
    "Reasoning",
 ]
 history = []
-active = "TestDisc_1772236592"
+active = "TestDisc_1772282083"
 auto_add = true

-[discussions.TestDisc_1772236592]
+[discussions.TestDisc_1772282083]
 git_commit = ""
-last_updated = "2026-02-27T18:56:46"
+last_updated = "2026-02-28T07:34:56"
 history = []
@@ -15,6 +15,8 @@ output_dir = "./md_gen"
 base_dir = "."
 paths = []

+[files.tier_assignments]
+
 [screenshots]
 base_dir = "."
 paths = []
@@ -10,5 +10,5 @@ auto_add = true

 [discussions.main]
 git_commit = ""
-last_updated = "2026-02-27T18:57:53"
+last_updated = "2026-02-28T07:35:49"
 history = []
@@ -15,6 +15,8 @@ output_dir = "./md_gen"
 base_dir = "."
 paths = []

+[files.tier_assignments]
+
 [screenshots]
 base_dir = "."
 paths = []
@@ -10,5 +10,5 @@ auto_add = true

 [discussions.main]
 git_commit = ""
-last_updated = "2026-02-27T18:57:10"
+last_updated = "2026-02-28T07:35:20"
 history = []
@@ -18,7 +18,5 @@ history = [

 [discussions.AutoDisc]
 git_commit = ""
-last_updated = "2026-02-27T23:54:05"
-history = [
-    "@2026-02-27T19:08:37\nSystem:\n[PERFORMANCE ALERT] Frame time high: 62.2ms. Please consider optimizing recent changes or reducing load.",
-]
+last_updated = "2026-02-28T07:34:41"
+history = []
@@ -8,5 +8,5 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 import ai_client

 def test_agent_capabilities_listing():
-    # Verify that the agent exposes its available tools correctly
-    pass
+# Verify that the agent exposes its available tools correctly
+ pass
@@ -9,14 +9,14 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from ai_client import set_agent_tools, _build_anthropic_tools

 def test_set_agent_tools():
-    # Correct usage: pass a dict
-    agent_tools = {"read_file": True, "list_directory": False}
-    set_agent_tools(agent_tools)
+# Correct usage: pass a dict
+ agent_tools = {"read_file": True, "list_directory": False}
+ set_agent_tools(agent_tools)

 def test_build_anthropic_tools_conversion():
-    # _build_anthropic_tools takes no arguments and uses the global _agent_tools
-    # We set a tool to True and check if it appears in the output
-    set_agent_tools({"read_file": True})
-    anthropic_tools = _build_anthropic_tools()
-    tool_names = [t["name"] for t in anthropic_tools]
-    assert "read_file" in tool_names
+# _build_anthropic_tools takes no arguments and uses the global _agent_tools
+# We set a tool to True and check if it appears in the output
+ set_agent_tools({"read_file": True})
+ anthropic_tools = _build_anthropic_tools()
+ tool_names = [t["name"] for t in anthropic_tools]
+ assert "read_file" in tool_names
@@ -3,39 +3,33 @@ from unittest.mock import MagicMock, patch
 import ai_client

 def test_ai_client_send_gemini_cli():
-    """
+ """
    Verifies that 'ai_client.send' correctly interacts with 'GeminiCliAdapter'
    when the 'gemini_cli' provider is specified.
    """
-    test_message = "Hello, this is a test prompt for the CLI adapter."
-    test_response = "This is a dummy response from the Gemini CLI."
-
-    # Set provider to gemini_cli
-    ai_client.set_provider("gemini_cli", "gemini-2.5-flash-lite")
-
-    # 1. Mock 'ai_client.GeminiCliAdapter' (which we will add)
-    with patch('ai_client.GeminiCliAdapter') as MockAdapterClass:
-        mock_adapter_instance = MockAdapterClass.return_value
-        mock_adapter_instance.send.return_value = {"text": test_response, "tool_calls": []}
-        mock_adapter_instance.last_usage = {"total_tokens": 100}
-        mock_adapter_instance.last_latency = 0.5
-        mock_adapter_instance.session_id = "test-session"
-
-        # Verify that 'events' are emitted correctly
-        with patch.object(ai_client.events, 'emit') as mock_emit:
-            response = ai_client.send(
-                md_content="<context></context>",
-                user_message=test_message,
-                base_dir="."
-            )
-
-            # Check that the adapter's send method was called.
-            mock_adapter_instance.send.assert_called()
-
-            # Verify that the expected lifecycle events were emitted.
-            emitted_event_names = [call.args[0] for call in mock_emit.call_args_list]
-            assert 'request_start' in emitted_event_names
-            assert 'response_received' in emitted_event_names
-
-            # Verify that the combined text returned by the adapter is returned by 'ai_client.send'.
-            assert response == test_response
+ test_message = "Hello, this is a test prompt for the CLI adapter."
+ test_response = "This is a dummy response from the Gemini CLI."
+ # Set provider to gemini_cli
+ ai_client.set_provider("gemini_cli", "gemini-2.5-flash-lite")
+ # 1. Mock 'ai_client.GeminiCliAdapter' (which we will add)
+ with patch('ai_client.GeminiCliAdapter') as MockAdapterClass:
+  mock_adapter_instance = MockAdapterClass.return_value
+  mock_adapter_instance.send.return_value = {"text": test_response, "tool_calls": []}
+  mock_adapter_instance.last_usage = {"total_tokens": 100}
+  mock_adapter_instance.last_latency = 0.5
+  mock_adapter_instance.session_id = "test-session"
+  # Verify that 'events' are emitted correctly
+  with patch.object(ai_client.events, 'emit') as mock_emit:
+   response = ai_client.send(
+    md_content="<context></context>",
+    user_message=test_message,
+    base_dir="."
+   )
+   # Check that the adapter's send method was called.
+   mock_adapter_instance.send.assert_called()
+   # Verify that the expected lifecycle events were emitted.
+   emitted_event_names = [call.args[0] for call in mock_emit.call_args_list]
+   assert 'request_start' in emitted_event_names
+   assert 'response_received' in emitted_event_names
+   # Verify that the combined text returned by the adapter is returned by 'ai_client.send'.
+   assert response == test_response
@@ -3,15 +3,14 @@ from unittest.mock import patch, MagicMock
 import ai_client

 def test_list_models_gemini_cli():
-    """
+ """
    Verifies that 'ai_client.list_models' correctly returns a list of models
    for the 'gemini_cli' provider.
    """
-    models = ai_client.list_models("gemini_cli")
-    
-    assert "gemini-3.1-pro-preview" in models
-    assert "gemini-3-flash-preview" in models
-    assert "gemini-2.5-pro" in models
-    assert "gemini-2.5-flash" in models
-    assert "gemini-2.5-flash-lite" in models
-    assert len(models) == 5
+ models = ai_client.list_models("gemini_cli")
+ assert "gemini-3.1-pro-preview" in models
+ assert "gemini-3-flash-preview" in models
+ assert "gemini-2.5-pro" in models
+ assert "gemini-2.5-flash" in models
+ assert "gemini-2.5-flash-lite" in models
+ assert len(models) == 5
@@ -3,22 +3,22 @@ import textwrap
 from scripts.ai_style_formatter import format_code

 def test_basic_indentation():
-    source = textwrap.dedent("""\
+ source = textwrap.dedent("""\
        def hello():
            print("world")
            if True:
                print("nested")
    """)
-    expected = (
-        "def hello():\n"
-        " print(\"world\")\n"
-        " if True:\n"
-        "  print(\"nested\")\n"
-    )
-    assert format_code(source) == expected
+ expected = (
+  "def hello():\n"
+  " print(\"world\")\n"
+  " if True:\n"
+  "  print(\"nested\")\n"
+ )
+ assert format_code(source) == expected

 def test_top_level_blank_lines():
-    source = textwrap.dedent("""\
+ source = textwrap.dedent("""\
        def a():
            pass

@@ -26,31 +26,31 @@ def test_top_level_blank_lines():
        def b():
            pass
    """)
-    expected = (
-        "def a():\n"
-        " pass\n"
-        "\n"
-        "def b():\n"
-        " pass\n"
-    )
-    assert format_code(source) == expected
+ expected = (
+  "def a():\n"
+  " pass\n"
+  "\n"
+  "def b():\n"
+  " pass\n"
+ )
+ assert format_code(source) == expected

 def test_inner_blank_lines():
-    source = textwrap.dedent("""\
+ source = textwrap.dedent("""\
        def a():
            print("start")

            print("end")
    """)
-    expected = (
-        "def a():\n"
-        " print(\"start\")\n"
-        " print(\"end\")\n"
-    )
-    assert format_code(source) == expected
+ expected = (
+  "def a():\n"
+  " print(\"start\")\n"
+  " print(\"end\")\n"
+ )
+ assert format_code(source) == expected

 def test_multiline_string_safety():
-    source = textwrap.dedent("""\
+ source = textwrap.dedent("""\
        def a():
            '''
            This is a multiline
@@ -60,21 +60,20 @@ def test_multiline_string_safety():
            '''
            pass
    """)
-    # Note: the indentation of the ''' itself becomes 1 space.
-    # The content inside remains exactly as in source.
-    # textwrap.dedent will remove the common leading whitespace from the source.
-    # The source's ''' is at 4 spaces. Content is at 4 spaces.
-    # After dedent:
-    # def a():
-    #     '''
-    #     This is a...
-    
-    result = format_code(source)
-    assert "    This is a multiline" in result
-    assert result.startswith("def a():\n '''")
+ # Note: the indentation of the ''' itself becomes 1 space.
+ # The content inside remains exactly as in source.
+ # textwrap.dedent will remove the common leading whitespace from the source.
+ # The source's ''' is at 4 spaces. Content is at 4 spaces.
+ # After dedent:
+ # def a():
+ #     '''
+ #     This is a...
+ result = format_code(source)
+ assert "    This is a multiline" in result
+ assert result.startswith("def a():\n '''")

 def test_continuation_indentation():
-    source = textwrap.dedent("""\
+ source = textwrap.dedent("""\
        def long_func(
            a,
            b
@@ -84,20 +83,20 @@ def test_continuation_indentation():
                b
            )
    """)
-    expected = (
-        "def long_func(\n"
-        " a,\n"
-        " b\n"
-        "):\n"
-        " return (\n"
-        "  a +\n"
-        "  b\n"
-        " )\n"
-    )
-    assert format_code(source) == expected
+ expected = (
+  "def long_func(\n"
+  " a,\n"
+  " b\n"
+  "):\n"
+  " return (\n"
+  "  a +\n"
+  "  b\n"
+  " )\n"
+ )
+ assert format_code(source) == expected

 def test_multiple_top_level_definitions():
-    source = textwrap.dedent("""\
+ source = textwrap.dedent("""\
        class MyClass:
            def __init__(self):
                self.x = 1
@@ -109,14 +108,14 @@ def test_multiple_top_level_definitions():
        def top_level():
            pass
    """)
-    expected = (
-        "class MyClass:\n"
-        " def __init__(self):\n"
-        "  self.x = 1\n"
-        " def method(self):\n"
-        "  pass\n"
-        "\n"
-        "def top_level():\n"
-        " pass\n"
-    )
-    assert format_code(source) == expected
+ expected = (
+  "class MyClass:\n"
+  " def __init__(self):\n"
+  "  self.x = 1\n"
+  " def method(self):\n"
+  "  pass\n"
+  "\n"
+  "def top_level():\n"
+  " pass\n"
+ )
+ assert format_code(source) == expected
@@ -3,127 +3,104 @@ from unittest.mock import MagicMock, patch
 import ai_client

 class MockUsage:
-    def __init__(self):
-        self.prompt_token_count = 10
-        self.candidates_token_count = 5
-        self.total_token_count = 15
-        self.cached_content_token_count = 0
+ def __init__(self):
+  self.prompt_token_count = 10
+  self.candidates_token_count = 5
+  self.total_token_count = 15
+  self.cached_content_token_count = 0

 class MockPart:
-    def __init__(self, text, function_call):
-        self.text = text
-        self.function_call = function_call
+ def __init__(self, text, function_call):
+  self.text = text
+  self.function_call = function_call

 class MockContent:
-    def __init__(self, parts):
-        self.parts = parts
+ def __init__(self, parts):
+  self.parts = parts

 class MockCandidate:
-    def __init__(self, parts):
-        self.content = MockContent(parts)
-        self.finish_reason = MagicMock()
-        self.finish_reason.name = "STOP"
+ def __init__(self, parts):
+  self.content = MockContent(parts)
+  self.finish_reason = MagicMock()
+  self.finish_reason.name = "STOP"

 def test_ai_client_event_emitter_exists():
-    # This should fail initially because 'events' won't exist on ai_client
-    assert hasattr(ai_client, 'events')
+# This should fail initially because 'events' won't exist on ai_client
+ assert hasattr(ai_client, 'events')

 def test_event_emission():
-    callback = MagicMock()
-    ai_client.events.on("test_event", callback)
-    ai_client.events.emit("test_event", payload={"data": 123})
-    callback.assert_called_once_with(payload={"data": 123})
+ callback = MagicMock()
+ ai_client.events.on("test_event", callback)
+ ai_client.events.emit("test_event", payload={"data": 123})
+ callback.assert_called_once_with(payload={"data": 123})

 def test_send_emits_events():
-    with patch("ai_client._send_gemini") as mock_send_gemini, \
-         patch("ai_client._send_anthropic") as mock_send_anthropic:
-        
-        mock_send_gemini.return_value = "gemini response"
-        
-        start_callback = MagicMock()
-        response_callback = MagicMock()
-        
-        ai_client.events.on("request_start", start_callback)
-        ai_client.events.on("response_received", response_callback)
-        
-        ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
-        ai_client.send("context", "message")
-        
-        # We mocked _send_gemini so it doesn't emit events inside. 
-        # But wait, ai_client.send itself emits request_start and response_received?
-        # Actually, ai_client.send delegates to _send_gemini. 
-        # Let's mock _gemini_client instead to let _send_gemini run and emit events.
-        pass
+ with patch("ai_client._send_gemini") as mock_send_gemini, \
+ patch("ai_client._send_anthropic") as mock_send_anthropic:
+  mock_send_gemini.return_value = "gemini response"
+  start_callback = MagicMock()
+  response_callback = MagicMock()
+  ai_client.events.on("request_start", start_callback)
+  ai_client.events.on("response_received", response_callback)
+  ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
+  ai_client.send("context", "message")
+  # We mocked _send_gemini so it doesn't emit events inside. 
+  # But wait, ai_client.send itself emits request_start and response_received?
+  # Actually, ai_client.send delegates to _send_gemini. 
+  # Let's mock _gemini_client instead to let _send_gemini run and emit events.
+  pass

 def test_send_emits_events_proper():
-    with patch("ai_client._ensure_gemini_client"), \
-         patch("ai_client._gemini_client") as mock_client:
-         
-        mock_chat = MagicMock()
-        mock_client.chats.create.return_value = mock_chat
-        
-        mock_response = MagicMock()
-        mock_response.candidates = [MockCandidate([MockPart("gemini response", None)])]
-        mock_response.usage_metadata = MockUsage()
-        mock_chat.send_message.return_value = mock_response
-        
-        start_callback = MagicMock()
-        response_callback = MagicMock()
-        
-        ai_client.events.on("request_start", start_callback)
-        ai_client.events.on("response_received", response_callback)
-        
-        ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
-        ai_client.send("context", "message")
-        
-        assert start_callback.called
-        assert response_callback.called
-        
-        args, kwargs = start_callback.call_args
-        assert kwargs['payload']['provider'] == 'gemini'
+ with patch("ai_client._ensure_gemini_client"), \
+ patch("ai_client._gemini_client") as mock_client:
+  mock_chat = MagicMock()
+  mock_client.chats.create.return_value = mock_chat
+  mock_response = MagicMock()
+  mock_response.candidates = [MockCandidate([MockPart("gemini response", None)])]
+  mock_response.usage_metadata = MockUsage()
+  mock_chat.send_message.return_value = mock_response
+  start_callback = MagicMock()
+  response_callback = MagicMock()
+  ai_client.events.on("request_start", start_callback)
+  ai_client.events.on("response_received", response_callback)
+  ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
+  ai_client.send("context", "message")
+  assert start_callback.called
+  assert response_callback.called
+  args, kwargs = start_callback.call_args
+  assert kwargs['payload']['provider'] == 'gemini'

 def test_send_emits_tool_events():
-    import mcp_client
-    with patch("ai_client._ensure_gemini_client"), \
-         patch("ai_client._gemini_client") as mock_client, \
-         patch("mcp_client.dispatch") as mock_dispatch:
-        
-        mock_chat = MagicMock()
-        mock_client.chats.create.return_value = mock_chat
-        
-        # 1. Setup mock response with a tool call
-        mock_fc = MagicMock()
-        mock_fc.name = "read_file"
-        mock_fc.args = {"path": "test.txt"}
-        
-        mock_response_with_tool = MagicMock()
-        mock_response_with_tool.candidates = [MockCandidate([MockPart("tool call text", mock_fc)])]
-        mock_response_with_tool.usage_metadata = MockUsage()
-        
-        # 2. Setup second mock response (final answer)
-        mock_response_final = MagicMock()
-        mock_response_final.candidates = [MockCandidate([MockPart("final answer", None)])]
-        mock_response_final.usage_metadata = MockUsage()
-        
-        mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
-        mock_dispatch.return_value = "file content"
-        
-        ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
-        
-        tool_callback = MagicMock()
-        ai_client.events.on("tool_execution", tool_callback)
-        
-        ai_client.send("context", "message")
-        
-        # Should be called twice: once for 'started', once for 'completed'
-        assert tool_callback.call_count == 2
-        
-        # Check 'started' call
-        args, kwargs = tool_callback.call_args_list[0]
-        assert kwargs['payload']['status'] == 'started'
-        assert kwargs['payload']['tool'] == 'read_file'
-        
-        # Check 'completed' call
-        args, kwargs = tool_callback.call_args_list[1]
-        assert kwargs['payload']['status'] == 'completed'
-        assert kwargs['payload']['result'] == 'file content'
+ import mcp_client
+ with patch("ai_client._ensure_gemini_client"), \
+ patch("ai_client._gemini_client") as mock_client, \
+ patch("mcp_client.dispatch") as mock_dispatch:
+  mock_chat = MagicMock()
+  mock_client.chats.create.return_value = mock_chat
+  # 1. Setup mock response with a tool call
+  mock_fc = MagicMock()
+  mock_fc.name = "read_file"
+  mock_fc.args = {"path": "test.txt"}
+  mock_response_with_tool = MagicMock()
+  mock_response_with_tool.candidates = [MockCandidate([MockPart("tool call text", mock_fc)])]
+  mock_response_with_tool.usage_metadata = MockUsage()
+  # 2. Setup second mock response (final answer)
+  mock_response_final = MagicMock()
+  mock_response_final.candidates = [MockCandidate([MockPart("final answer", None)])]
+  mock_response_final.usage_metadata = MockUsage()
+  mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
+  mock_dispatch.return_value = "file content"
+  ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
+  tool_callback = MagicMock()
+  ai_client.events.on("tool_execution", tool_callback)
+  ai_client.send("context", "message")
+  # Should be called twice: once for 'started', once for 'completed'
+  assert tool_callback.call_count == 2
+  # Check 'started' call
+  args, kwargs = tool_callback.call_args_list[0]
+  assert kwargs['payload']['status'] == 'started'
+  assert kwargs['payload']['tool'] == 'read_file'
+  # Check 'completed' call
+  args, kwargs = tool_callback.call_args_list[1]
+  assert kwargs['payload']['status'] == 'completed'
+  assert kwargs['payload']['result'] == 'file content'
@@ -13,88 +13,84 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from api_hook_client import ApiHookClient

 def test_get_status_success(live_gui):
-    """
+ """
    Test that get_status successfully retrieves the server status
    when the live GUI is running.
    """
-    client = ApiHookClient()
-    status = client.get_status()
-    assert status == {'status': 'ok'}
+ client = ApiHookClient()
+ status = client.get_status()
+ assert status == {'status': 'ok'}

 def test_get_project_success(live_gui):
-    """
+ """
    Test successful retrieval of project data from the live GUI.
    """
-    client = ApiHookClient()
-    response = client.get_project()
-    assert 'project' in response
-    # We don't assert specific content as it depends on the environment's active project
+ client = ApiHookClient()
+ response = client.get_project()
+ assert 'project' in response
+ # We don't assert specific content as it depends on the environment's active project

 def test_get_session_success(live_gui):
-    """
+ """
    Test successful retrieval of session data.
    """
-    client = ApiHookClient()
-    response = client.get_session()
-    assert 'session' in response
-    assert 'entries' in response['session']
+ client = ApiHookClient()
+ response = client.get_session()
+ assert 'session' in response
+ assert 'entries' in response['session']

 def test_post_gui_success(live_gui):
-    """
+ """
    Test successful posting of GUI data.
    """
-    client = ApiHookClient()
-    gui_data = {'command': 'set_text', 'id': 'some_item', 'value': 'new_text'}
-    response = client.post_gui(gui_data)
-    assert response == {'status': 'queued'}
+ client = ApiHookClient()
+ gui_data = {'command': 'set_text', 'id': 'some_item', 'value': 'new_text'}
+ response = client.post_gui(gui_data)
+ assert response == {'status': 'queued'}

 def test_get_performance_success(live_gui):
-    """
+ """
    Test successful retrieval of performance metrics.
    """
-    client = ApiHookClient()
-    response = client.get_performance()
-    assert "performance" in response
+ client = ApiHookClient()
+ response = client.get_performance()
+ assert "performance" in response

 def test_unsupported_method_error():
-    """
+ """
    Test that calling an unsupported HTTP method raises a ValueError.
    """
-    client = ApiHookClient()
-    with pytest.raises(ValueError, match="Unsupported HTTP method"):
-        client._make_request('PUT', '/some_endpoint', data={'key': 'value'})
+ client = ApiHookClient()
+ with pytest.raises(ValueError, match="Unsupported HTTP method"):
+  client._make_request('PUT', '/some_endpoint', data={'key': 'value'})

 def test_get_text_value():
-    """
+ """
    Test retrieval of string representation using get_text_value.
    """
-    client = ApiHookClient()
-    with patch.object(client, 'get_value', return_value=123):
-        assert client.get_text_value("dummy_tag") == "123"
-    
-    with patch.object(client, 'get_value', return_value=None):
-        assert client.get_text_value("dummy_tag") is None
+ client = ApiHookClient()
+ with patch.object(client, 'get_value', return_value=123):
+  assert client.get_text_value("dummy_tag") == "123"
+ with patch.object(client, 'get_value', return_value=None):
+  assert client.get_text_value("dummy_tag") is None

 def test_get_node_status():
-    """
+ """
    Test retrieval of DAG node status using get_node_status.
    """
-    client = ApiHookClient()
-    # When get_value returns a status directly
-    with patch.object(client, 'get_value', return_value="running"):
-        assert client.get_node_status("my_node") == "running"
-        
-    # When get_value returns None and diagnostics provides a nodes dict
-    with patch.object(client, 'get_value', return_value=None):
-        with patch.object(client, '_make_request', return_value={'nodes': {'my_node': 'completed'}}):
-            assert client.get_node_status("my_node") == "completed"
-
-    # When get_value returns None and diagnostics provides a direct key
-    with patch.object(client, 'get_value', return_value=None):
-        with patch.object(client, '_make_request', return_value={'my_node': 'failed'}):
-            assert client.get_node_status("my_node") == "failed"
-
-    # When neither works
-    with patch.object(client, 'get_value', return_value=None):
-        with patch.object(client, '_make_request', return_value={}):
-            assert client.get_node_status("my_node") is None
+ client = ApiHookClient()
+ # When get_value returns a status directly
+ with patch.object(client, 'get_value', return_value="running"):
+  assert client.get_node_status("my_node") == "running"
+  # When get_value returns None and diagnostics provides a nodes dict
+ with patch.object(client, 'get_value', return_value=None):
+  with patch.object(client, '_make_request', return_value={'nodes': {'my_node': 'completed'}}):
+   assert client.get_node_status("my_node") == "completed"
+   # When get_value returns None and diagnostics provides a direct key
+ with patch.object(client, 'get_value', return_value=None):
+  with patch.object(client, '_make_request', return_value={'my_node': 'failed'}):
+   assert client.get_node_status("my_node") == "failed"
+   # When neither works
+ with patch.object(client, 'get_value', return_value=None):
+  with patch.object(client, '_make_request', return_value={}):
+   assert client.get_node_status("my_node") is None
@@ -8,68 +8,64 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from api_hook_client import ApiHookClient

 def test_api_client_has_extensions():
-    client = ApiHookClient()
-    # These should fail initially as they are not implemented
-    assert hasattr(client, 'select_tab')
-    assert hasattr(client, 'select_list_item')
+ client = ApiHookClient()
+ # These should fail initially as they are not implemented
+ assert hasattr(client, 'select_tab')
+ assert hasattr(client, 'select_list_item')

 def test_select_tab_integration(live_gui):
-    client = ApiHookClient()
-    # We'll need to make sure the tags exist in gui_legacy.py
-    # For now, this is a placeholder for the integration test
-    response = client.select_tab("operations_tabs", "tab_tool")
-    assert response == {'status': 'queued'}
+ client = ApiHookClient()
+ # We'll need to make sure the tags exist in gui_legacy.py
+ # For now, this is a placeholder for the integration test
+ response = client.select_tab("operations_tabs", "tab_tool")
+ assert response == {'status': 'queued'}

 def test_select_list_item_integration(live_gui):
-    client = ApiHookClient()
-    # Assuming 'Default' discussion exists or we can just test that it queues
-    response = client.select_list_item("disc_listbox", "Default")
-    assert response == {'status': 'queued'}
+ client = ApiHookClient()
+ # Assuming 'Default' discussion exists or we can just test that it queues
+ response = client.select_list_item("disc_listbox", "Default")
+ assert response == {'status': 'queued'}

 def test_get_indicator_state_integration(live_gui):
-    client = ApiHookClient()
-    # thinking_indicator is usually hidden unless AI is running
-    response = client.get_indicator_state("thinking_indicator")
-    assert 'shown' in response
-    assert response['tag'] == "thinking_indicator"
+ client = ApiHookClient()
+ # thinking_indicator is usually hidden unless AI is running
+ response = client.get_indicator_state("thinking_indicator")
+ assert 'shown' in response
+ assert response['tag'] == "thinking_indicator"

 def test_app_processes_new_actions():
-    import gui_legacy
-    from unittest.mock import MagicMock, patch
-    import dearpygui.dearpygui as dpg
-
-    dpg.create_context()
-    try:
-        with patch('gui_legacy.load_config', return_value={}), \
-             patch('gui_legacy.PerformanceMonitor'), \
-             patch('gui_legacy.shell_runner'), \
-             patch('gui_legacy.project_manager'), \
-             patch.object(gui_legacy.App, '_load_active_project'):
-            app = gui_legacy.App()
-            
-            with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
-                 patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
-                 patch('dearpygui.dearpygui.get_item_callback') as mock_get_cb:
-                
-                # Test select_tab
-                app._pending_gui_tasks.append({
-                    "action": "select_tab",
-                    "tab_bar": "some_tab_bar",
-                    "tab": "some_tab"
-                })
-                app._process_pending_gui_tasks()
-                mock_set_value.assert_any_call("some_tab_bar", "some_tab")
-                
-                # Test select_list_item
-                mock_cb = MagicMock()
-                mock_get_cb.return_value = mock_cb
-                app._pending_gui_tasks.append({
-                    "action": "select_list_item",
-                    "listbox": "some_listbox",
-                    "item_value": "some_value"
-                })
-                app._process_pending_gui_tasks()
-                mock_set_value.assert_any_call("some_listbox", "some_value")
-                mock_cb.assert_called_with("some_listbox", "some_value")
-    finally:
-        dpg.destroy_context()
+ import gui_legacy
+ from unittest.mock import MagicMock, patch
+ import dearpygui.dearpygui as dpg
+ dpg.create_context()
+ try:
+  with patch('gui_legacy.load_config', return_value={}), \
+  patch('gui_legacy.PerformanceMonitor'), \
+  patch('gui_legacy.shell_runner'), \
+  patch('gui_legacy.project_manager'), \
+  patch.object(gui_legacy.App, '_load_active_project'):
+   app = gui_legacy.App()
+   with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
+   patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
+   patch('dearpygui.dearpygui.get_item_callback') as mock_get_cb:
+   # Test select_tab
+    app._pending_gui_tasks.append({
+      "action": "select_tab",
+      "tab_bar": "some_tab_bar",
+      "tab": "some_tab"
+     })
+    app._process_pending_gui_tasks()
+    mock_set_value.assert_any_call("some_tab_bar", "some_tab")
+    # Test select_list_item
+    mock_cb = MagicMock()
+    mock_get_cb.return_value = mock_cb
+    app._pending_gui_tasks.append({
+      "action": "select_list_item",
+      "listbox": "some_listbox",
+      "item_value": "some_value"
+     })
+    app._process_pending_gui_tasks()
+    mock_set_value.assert_any_call("some_listbox", "some_value")
+    mock_cb.assert_called_with("some_listbox", "some_value")
+ finally:
+  dpg.destroy_context()
@@ -3,24 +3,24 @@ import tree_sitter
 from file_cache import ASTParser

 def test_ast_parser_initialization():
-    """Verify that ASTParser can be initialized with a language string."""
-    parser = ASTParser("python")
-    assert parser.language_name == "python"
+ """Verify that ASTParser can be initialized with a language string."""
+ parser = ASTParser("python")
+ assert parser.language_name == "python"

 def test_ast_parser_parse():
-    """Verify that the parse method returns a tree_sitter.Tree."""
-    parser = ASTParser("python")
-    code = """def example_func():
+ """Verify that the parse method returns a tree_sitter.Tree."""
+ parser = ASTParser("python")
+ code = """def example_func():
    return 42"""
-    tree = parser.parse(code)
-    assert isinstance(tree, tree_sitter.Tree)
-    # Basic check that it parsed something
-    assert tree.root_node.type == "module"
+ tree = parser.parse(code)
+ assert isinstance(tree, tree_sitter.Tree)
+ # Basic check that it parsed something
+ assert tree.root_node.type == "module"

 def test_ast_parser_get_skeleton_python():
-    """Verify that get_skeleton replaces function bodies with '...' while preserving docstrings."""
-    parser = ASTParser("python")
-    code = '''
+ """Verify that get_skeleton replaces function bodies with '...' while preserving docstrings."""
+ parser = ASTParser("python")
+ code = '''
 def complex_function(a, b):
    """
    This is a docstring.
@@ -36,35 +36,32 @@ class MyClass:
        print("doing something")
        return None
 '''
-    skeleton = parser.get_skeleton(code)
-    
-    # Check that signatures are preserved
-    assert "def complex_function(a, b):" in skeleton
-    assert "class MyClass:" in skeleton
-    assert "def method_without_docstring(self):" in skeleton
-    
-    # Check that docstring is preserved
-    assert '"""' in skeleton
-    assert "This is a docstring." in skeleton
-    assert "It should be preserved." in skeleton
-    
-    # Check that bodies are replaced with '...'
-    assert "..." in skeleton
-    assert "result = a + b" not in skeleton
-    assert "return result" not in skeleton
-    assert 'print("doing something")' not in skeleton
+ skeleton = parser.get_skeleton(code)
+ # Check that signatures are preserved
+ assert "def complex_function(a, b):" in skeleton
+ assert "class MyClass:" in skeleton
+ assert "def method_without_docstring(self):" in skeleton
+ # Check that docstring is preserved
+ assert '"""' in skeleton
+ assert "This is a docstring." in skeleton
+ assert "It should be preserved." in skeleton
+ # Check that bodies are replaced with '...'
+ assert "..." in skeleton
+ assert "result = a + b" not in skeleton
+ assert "return result" not in skeleton
+ assert 'print("doing something")' not in skeleton

 def test_ast_parser_invalid_language():
-    """Verify handling of unsupported or invalid languages."""
-    # This might raise an error or return a default, depending on implementation
-    # For now, we expect it to either fail gracefully or raise an exception we can catch
-    with pytest.raises(Exception):
-        ASTParser("not-a-language")
+ """Verify handling of unsupported or invalid languages."""
+ # This might raise an error or return a default, depending on implementation
+ # For now, we expect it to either fail gracefully or raise an exception we can catch
+ with pytest.raises(Exception):
+  ASTParser("not-a-language")

 def test_ast_parser_get_curated_view():
-    """Verify that get_curated_view preserves function bodies with @core_logic or # [HOT]."""
-    parser = ASTParser("python")
-    code = '''
+ """Verify that get_curated_view preserves function bodies with @core_logic or # [HOT]."""
+ parser = ASTParser("python")
+ code = '''
@core_logic
 def core_func():
    """Core logic doc."""
@@ -86,20 +83,16 @@ class MyClass:
    def core_method(self, x):
        print("method preserved", x)
 '''
-    curated = parser.get_curated_view(code)
-    
-    # Check that core_func is preserved
-    assert 'print("this should be preserved")' in curated
-    assert 'return True' in curated
-    
-    # Check that hot_func is preserved
-    assert '# [HOT]' in curated
-    assert 'print("this should also be preserved")' in curated
-    
-    # Check that normal_func is stripped but docstring is preserved
-    assert '"""Normal doc."""' in curated
-    assert 'print("this should be stripped")' not in curated
-    assert '...' in curated
-
-    # Check that core_method is preserved
-    assert 'print("method preserved", x)' in curated
+ curated = parser.get_curated_view(code)
+ # Check that core_func is preserved
+ assert 'print("this should be preserved")' in curated
+ assert 'return True' in curated
+ # Check that hot_func is preserved
+ assert '# [HOT]' in curated
+ assert 'print("this should also be preserved")' in curated
+ # Check that normal_func is stripped but docstring is preserved
+ assert '"""Normal doc."""' in curated
+ assert 'print("this should be stripped")' not in curated
+ assert '...' in curated
+ # Check that core_method is preserved
+ assert 'print("method preserved", x)' in curated
@@ -2,8 +2,8 @@ import pytest
 from file_cache import ASTParser

 def test_ast_parser_get_curated_view():
-    parser = ASTParser("python")
-    code = '''
+ parser = ASTParser("python")
+ code = '''
@core_logic
 def core_func():
    """Core logic doc."""
@@ -25,20 +25,16 @@ class MyClass:
    def core_method(self):
        print("method preserved")
 '''
-    curated = parser.get_curated_view(code)
-    
-    # Check that core_func is preserved
-    assert 'print("this should be preserved")' in curated
-    assert 'return True' in curated
-    
-    # Check that hot_func is preserved
-    assert '# [HOT]' in curated
-    assert 'print("this should also be preserved")' in curated
-    
-    # Check that normal_func is stripped but docstring is preserved
-    assert '"""Normal doc."""' in curated
-    assert 'print("this should be stripped")' not in curated
-    assert '...' in curated
-
-    # Check that core_method is preserved
-    assert 'print("method preserved")' in curated
+ curated = parser.get_curated_view(code)
+ # Check that core_func is preserved
+ assert 'print("this should be preserved")' in curated
+ assert 'return True' in curated
+ # Check that hot_func is preserved
+ assert '# [HOT]' in curated
+ assert 'print("this should also be preserved")' in curated
+ # Check that normal_func is stripped but docstring is preserved
+ assert '"""Normal doc."""' in curated
+ assert 'print("this should be stripped")' not in curated
+ assert '...' in curated
+ # Check that core_method is preserved
+ assert 'print("method preserved")' in curated
@@ -3,45 +3,40 @@ import pytest
 from events import AsyncEventQueue

 def test_async_event_queue_put_get():
-    """Verify that an event can be asynchronously put and retrieved from the queue."""
-    async def run_test():
-        queue = AsyncEventQueue()
-        event_name = "test_event"
-        payload = {"data": "hello"}
-        
-        await queue.put(event_name, payload)
-        ret_name, ret_payload = await queue.get()
-        
-        assert ret_name == event_name
-        assert ret_payload == payload
+ """Verify that an event can be asynchronously put and retrieved from the queue."""

-    asyncio.run(run_test())
+ async def run_test():
+  queue = AsyncEventQueue()
+  event_name = "test_event"
+  payload = {"data": "hello"}
+  await queue.put(event_name, payload)
+  ret_name, ret_payload = await queue.get()
+  assert ret_name == event_name
+  assert ret_payload == payload
+ asyncio.run(run_test())

 def test_async_event_queue_multiple():
-    """Verify that multiple events can be asynchronously put and retrieved in order."""
-    async def run_test():
-        queue = AsyncEventQueue()
-        
-        await queue.put("event1", 1)
-        await queue.put("event2", 2)
-        
-        name1, val1 = await queue.get()
-        name2, val2 = await queue.get()
-        
-        assert name1 == "event1"
-        assert val1 == 1
-        assert name2 == "event2"
-        assert val2 == 2
+ """Verify that multiple events can be asynchronously put and retrieved in order."""

-    asyncio.run(run_test())
+ async def run_test():
+  queue = AsyncEventQueue()
+  await queue.put("event1", 1)
+  await queue.put("event2", 2)
+  name1, val1 = await queue.get()
+  name2, val2 = await queue.get()
+  assert name1 == "event1"
+  assert val1 == 1
+  assert name2 == "event2"
+  assert val2 == 2
+ asyncio.run(run_test())

 def test_async_event_queue_none_payload():
-    """Verify that an event with None payload works correctly."""
-    async def run_test():
-        queue = AsyncEventQueue()
-        await queue.put("no_payload")
-        name, payload = await queue.get()
-        assert name == "no_payload"
-        assert payload is None
+ """Verify that an event with None payload works correctly."""

-    asyncio.run(run_test())
+ async def run_test():
+  queue = AsyncEventQueue()
+  await queue.put("no_payload")
+  name, payload = await queue.get()
+  assert name == "no_payload"
+  assert payload is None
+ asyncio.run(run_test())
@@ -5,72 +5,60 @@ from log_registry import LogRegistry

@pytest.fixture
 def registry_setup(tmp_path):
-    registry_path = tmp_path / "log_registry.toml"
-    logs_dir = tmp_path / "logs"
-    logs_dir.mkdir()
-    registry = LogRegistry(str(registry_path))
-    return registry, logs_dir
+ registry_path = tmp_path / "log_registry.toml"
+ logs_dir = tmp_path / "logs"
+ logs_dir.mkdir()
+ registry = LogRegistry(str(registry_path))
+ return registry, logs_dir

 def test_auto_whitelist_keywords(registry_setup):
-    registry, logs_dir = registry_setup
-    session_id = "test_kw"
-    session_dir = logs_dir / session_id
-    session_dir.mkdir()
-    
-    # Create comms.log with ERROR
-    comms_log = session_dir / "comms.log"
-    comms_log.write_text("Some message\nAN ERROR OCCURRED\nMore text")
-    
-    registry.register_session(session_id, str(session_dir), datetime.now())
-    registry.update_auto_whitelist_status(session_id)
-    
-    assert registry.is_session_whitelisted(session_id)
-    assert "ERROR" in registry.data[session_id]["metadata"]["reason"]
+ registry, logs_dir = registry_setup
+ session_id = "test_kw"
+ session_dir = logs_dir / session_id
+ session_dir.mkdir()
+ # Create comms.log with ERROR
+ comms_log = session_dir / "comms.log"
+ comms_log.write_text("Some message\nAN ERROR OCCURRED\nMore text")
+ registry.register_session(session_id, str(session_dir), datetime.now())
+ registry.update_auto_whitelist_status(session_id)
+ assert registry.is_session_whitelisted(session_id)
+ assert "ERROR" in registry.data[session_id]["metadata"]["reason"]

 def test_auto_whitelist_message_count(registry_setup):
-    registry, logs_dir = registry_setup
-    session_id = "test_msg_count"
-    session_dir = logs_dir / session_id
-    session_dir.mkdir()
-    
-    # Create comms.log with > 10 lines
-    comms_log = session_dir / "comms.log"
-    comms_log.write_text("\n".join(["msg"] * 15))
-    
-    registry.register_session(session_id, str(session_dir), datetime.now())
-    registry.update_auto_whitelist_status(session_id)
-    
-    assert registry.is_session_whitelisted(session_id)
-    assert registry.data[session_id]["metadata"]["message_count"] == 15
+ registry, logs_dir = registry_setup
+ session_id = "test_msg_count"
+ session_dir = logs_dir / session_id
+ session_dir.mkdir()
+ # Create comms.log with > 10 lines
+ comms_log = session_dir / "comms.log"
+ comms_log.write_text("\n".join(["msg"] * 15))
+ registry.register_session(session_id, str(session_dir), datetime.now())
+ registry.update_auto_whitelist_status(session_id)
+ assert registry.is_session_whitelisted(session_id)
+ assert registry.data[session_id]["metadata"]["message_count"] == 15

 def test_auto_whitelist_large_size(registry_setup):
-    registry, logs_dir = registry_setup
-    session_id = "test_large"
-    session_dir = logs_dir / session_id
-    session_dir.mkdir()
-    
-    # Create large file (> 50KB)
-    large_file = session_dir / "large.log"
-    large_file.write_text("x" * 60000)
-    
-    registry.register_session(session_id, str(session_dir), datetime.now())
-    registry.update_auto_whitelist_status(session_id)
-    
-    assert registry.is_session_whitelisted(session_id)
-    assert "Large session size" in registry.data[session_id]["metadata"]["reason"]
+ registry, logs_dir = registry_setup
+ session_id = "test_large"
+ session_dir = logs_dir / session_id
+ session_dir.mkdir()
+ # Create large file (> 50KB)
+ large_file = session_dir / "large.log"
+ large_file.write_text("x" * 60000)
+ registry.register_session(session_id, str(session_dir), datetime.now())
+ registry.update_auto_whitelist_status(session_id)
+ assert registry.is_session_whitelisted(session_id)
+ assert "Large session size" in registry.data[session_id]["metadata"]["reason"]

 def test_no_auto_whitelist_insignificant(registry_setup):
-    registry, logs_dir = registry_setup
-    session_id = "test_insignificant"
-    session_dir = logs_dir / session_id
-    session_dir.mkdir()
-    
-    # Small file, few lines, no keywords
-    comms_log = session_dir / "comms.log"
-    comms_log.write_text("hello\nworld")
-    
-    registry.register_session(session_id, str(session_dir), datetime.now())
-    registry.update_auto_whitelist_status(session_id)
-    
-    assert not registry.is_session_whitelisted(session_id)
-    assert registry.data[session_id]["metadata"]["message_count"] == 2
+ registry, logs_dir = registry_setup
+ session_id = "test_insignificant"
+ session_dir = logs_dir / session_id
+ session_dir.mkdir()
+ # Small file, few lines, no keywords
+ comms_log = session_dir / "comms.log"
+ comms_log.write_text("hello\nworld")
+ registry.register_session(session_id, str(session_dir), datetime.now())
+ registry.update_auto_whitelist_status(session_id)
+ assert not registry.is_session_whitelisted(session_id)
+ assert registry.data[session_id]["metadata"]["message_count"] == 2
@@ -12,64 +12,55 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from scripts.cli_tool_bridge import main

 class TestCliToolBridge(unittest.TestCase):
-    def setUp(self):
-        os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
-        self.tool_call = {
-            'tool_name': 'read_file',
-            'tool_input': {'path': 'test.txt'}
-        }
+ def setUp(self):
+  os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
+  self.tool_call = {
+   'tool_name': 'read_file',
+   'tool_input': {'path': 'test.txt'}
+  }

-    @patch('sys.stdin', new_callable=io.StringIO)
-    @patch('sys.stdout', new_callable=io.StringIO)
-    @patch('api_hook_client.ApiHookClient.request_confirmation')
-    def test_allow_decision(self, mock_request, mock_stdout, mock_stdin):
-        # 1. Mock stdin with a JSON string tool call
-        mock_stdin.write(json.dumps(self.tool_call))
-        mock_stdin.seek(0)
+ @patch('sys.stdin', new_callable=io.StringIO)
+ @patch('sys.stdout', new_callable=io.StringIO)
+ @patch('api_hook_client.ApiHookClient.request_confirmation')
+ def test_allow_decision(self, mock_request, mock_stdout, mock_stdin):
+ # 1. Mock stdin with a JSON string tool call
+  mock_stdin.write(json.dumps(self.tool_call))
+  mock_stdin.seek(0)
+  # 2. Mock ApiHookClient to return approved
+  mock_request.return_value = {'approved': True}
+  # Run main
+  main()
+  # 3. Capture stdout and assert allow
+  output = json.loads(mock_stdout.getvalue().strip())
+  self.assertEqual(output.get('decision'), 'allow')

-        # 2. Mock ApiHookClient to return approved
-        mock_request.return_value = {'approved': True}
+ @patch('sys.stdin', new_callable=io.StringIO)
+ @patch('sys.stdout', new_callable=io.StringIO)
+ @patch('api_hook_client.ApiHookClient.request_confirmation')
+ def test_deny_decision(self, mock_request, mock_stdout, mock_stdin):
+ # Mock stdin
+  mock_stdin.write(json.dumps(self.tool_call))
+  mock_stdin.seek(0)
+  # 4. Mock ApiHookClient to return denied
+  mock_request.return_value = {'approved': False}
+  main()
+  # Assert deny
+  output = json.loads(mock_stdout.getvalue().strip())
+  self.assertEqual(output.get('decision'), 'deny')

-        # Run main
-        main()
-
-        # 3. Capture stdout and assert allow
-        output = json.loads(mock_stdout.getvalue().strip())
-        self.assertEqual(output.get('decision'), 'allow')
-
-    @patch('sys.stdin', new_callable=io.StringIO)
-    @patch('sys.stdout', new_callable=io.StringIO)
-    @patch('api_hook_client.ApiHookClient.request_confirmation')
-    def test_deny_decision(self, mock_request, mock_stdout, mock_stdin):
-        # Mock stdin
-        mock_stdin.write(json.dumps(self.tool_call))
-        mock_stdin.seek(0)
-
-        # 4. Mock ApiHookClient to return denied
-        mock_request.return_value = {'approved': False}
-
-        main()
-
-        # Assert deny
-        output = json.loads(mock_stdout.getvalue().strip())
-        self.assertEqual(output.get('decision'), 'deny')
-
-    @patch('sys.stdin', new_callable=io.StringIO)
-    @patch('sys.stdout', new_callable=io.StringIO)
-    @patch('api_hook_client.ApiHookClient.request_confirmation')
-    def test_unreachable_hook_server(self, mock_request, mock_stdout, mock_stdin):
-        # Mock stdin
-        mock_stdin.write(json.dumps(self.tool_call))
-        mock_stdin.seek(0)
-
-        # 5. Test case where hook server is unreachable (exception)
-        mock_request.side_effect = Exception("Connection refused")
-
-        main()
-
-        # Assert deny on error
-        output = json.loads(mock_stdout.getvalue().strip())
-        self.assertEqual(output.get('decision'), 'deny')
+ @patch('sys.stdin', new_callable=io.StringIO)
+ @patch('sys.stdout', new_callable=io.StringIO)
+ @patch('api_hook_client.ApiHookClient.request_confirmation')
+ def test_unreachable_hook_server(self, mock_request, mock_stdout, mock_stdin):
+ # Mock stdin
+  mock_stdin.write(json.dumps(self.tool_call))
+  mock_stdin.seek(0)
+  # 5. Test case where hook server is unreachable (exception)
+  mock_request.side_effect = Exception("Connection refused")
+  main()
+  # Assert deny on error
+  output = json.loads(mock_stdout.getvalue().strip())
+  self.assertEqual(output.get('decision'), 'deny')

 if __name__ == '__main__':
-    unittest.main()
+ unittest.main()
@@ -12,42 +12,37 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from scripts.cli_tool_bridge import main

 class TestCliToolBridgeMapping(unittest.TestCase):
-    def setUp(self):
-        os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
+ def setUp(self):
+  os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'

-    @patch('sys.stdin', new_callable=io.StringIO)
-    @patch('sys.stdout', new_callable=io.StringIO)
-    @patch('api_hook_client.ApiHookClient.request_confirmation')
-    def test_mapping_from_api_format(self, mock_request, mock_stdout, mock_stdin):
-        """
+ @patch('sys.stdin', new_callable=io.StringIO)
+ @patch('sys.stdout', new_callable=io.StringIO)
+ @patch('api_hook_client.ApiHookClient.request_confirmation')
+ def test_mapping_from_api_format(self, mock_request, mock_stdout, mock_stdin):
+  """
        Verify that bridge correctly maps 'id', 'name', 'input' (Gemini API format)
        into tool_name and tool_input for the hook client.
        """
-        api_tool_call = {
-            'id': 'call123',
-            'name': 'read_file',
-            'input': {'path': 'test.txt'}
-        }
-        
-        # 1. Mock stdin with the API format JSON
-        mock_stdin.write(json.dumps(api_tool_call))
-        mock_stdin.seek(0)
-
-        # 2. Mock ApiHookClient to return approved
-        mock_request.return_value = {'approved': True}
-
-        # Run main
-        main()
-
-        # 3. Verify that request_confirmation was called with mapped values
-        # If it's not mapped, it will likely be called with None or fail
-        mock_request.assert_called_once_with('read_file', {'path': 'test.txt'})
-
-        # 4. Capture stdout and assert allow
-        output_str = mock_stdout.getvalue().strip()
-        self.assertTrue(output_str, "Stdout should not be empty")
-        output = json.loads(output_str)
-        self.assertEqual(output.get('decision'), 'allow')
+  api_tool_call = {
+   'id': 'call123',
+   'name': 'read_file',
+   'input': {'path': 'test.txt'}
+  }
+  # 1. Mock stdin with the API format JSON
+  mock_stdin.write(json.dumps(api_tool_call))
+  mock_stdin.seek(0)
+  # 2. Mock ApiHookClient to return approved
+  mock_request.return_value = {'approved': True}
+  # Run main
+  main()
+  # 3. Verify that request_confirmation was called with mapped values
+  # If it's not mapped, it will likely be called with None or fail
+  mock_request.assert_called_once_with('read_file', {'path': 'test.txt'})
+  # 4. Capture stdout and assert allow
+  output_str = mock_stdout.getvalue().strip()
+  self.assertTrue(output_str, "Stdout should not be empty")
+  output = json.loads(output_str)
+  self.assertEqual(output.get('decision'), 'allow')

 if __name__ == '__main__':
-    unittest.main()
+ unittest.main()
@@ -13,61 +13,55 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from api_hook_client import ApiHookClient

 def simulate_conductor_phase_completion(client: ApiHookClient):
-    """
+ """
    Simulates the Conductor agent's logic for phase completion using ApiHookClient.
    """
-    results = {
-        "verification_successful": False,
-        "verification_message": ""
-    }
-    
-    try:
-        status = client.get_status()
-        if status.get('status') == 'ok':
-            results["verification_successful"] = True
-            results["verification_message"] = "Automated verification completed successfully."
-        else:
-            results["verification_successful"] = False
-            results["verification_message"] = f"Automated verification failed: {status}"
-    except Exception as e:
-        results["verification_successful"] = False
-        results["verification_message"] = f"Automated verification failed: {e}"
-
-    return results
+ results = {
+  "verification_successful": False,
+  "verification_message": ""
+ }
+ try:
+  status = client.get_status()
+  if status.get('status') == 'ok':
+   results["verification_successful"] = True
+   results["verification_message"] = "Automated verification completed successfully."
+  else:
+   results["verification_successful"] = False
+   results["verification_message"] = f"Automated verification failed: {status}"
+ except Exception as e:
+  results["verification_successful"] = False
+  results["verification_message"] = f"Automated verification failed: {e}"
+ return results

 def test_conductor_integrates_api_hook_client_for_verification(live_gui):
-    """
+ """
    Verify that Conductor's simulated phase completion logic properly integrates
    and uses the ApiHookClient for verification against the live GUI.
    """
-    client = ApiHookClient()
-    results = simulate_conductor_phase_completion(client)
-
-    assert results["verification_successful"] is True
-    assert "successfully" in results["verification_message"]
+ client = ApiHookClient()
+ results = simulate_conductor_phase_completion(client)
+ assert results["verification_successful"] is True
+ assert "successfully" in results["verification_message"]

 def test_conductor_handles_api_hook_failure(live_gui):
-    """
+ """
    Verify Conductor handles a simulated API hook verification failure.
    We patch the client's get_status to simulate failure even with live GUI.
    """
-    client = ApiHookClient()
-    
-    with patch.object(ApiHookClient, 'get_status') as mock_get_status:
-        mock_get_status.return_value = {'status': 'failed', 'error': 'Something went wrong'}
-        results = simulate_conductor_phase_completion(client)
-
-        assert results["verification_successful"] is False
-        assert "failed" in results["verification_message"]
+ client = ApiHookClient()
+ with patch.object(ApiHookClient, 'get_status') as mock_get_status:
+  mock_get_status.return_value = {'status': 'failed', 'error': 'Something went wrong'}
+  results = simulate_conductor_phase_completion(client)
+  assert results["verification_successful"] is False
+  assert "failed" in results["verification_message"]

 def test_conductor_handles_api_hook_connection_error():
-    """
+ """
    Verify Conductor handles a simulated API hook connection error (server down).
    """
-    client = ApiHookClient(base_url="http://127.0.0.1:9998", max_retries=0)
-    results = simulate_conductor_phase_completion(client)
-
-    assert results["verification_successful"] is False
-    # Check for expected error substrings from ApiHookClient
-    msg = results["verification_message"]
-    assert any(term in msg for term in ["Could not connect", "timed out", "Could not reach"])
+ client = ApiHookClient(base_url="http://127.0.0.1:9998", max_retries=0)
+ results = simulate_conductor_phase_completion(client)
+ assert results["verification_successful"] is False
+ # Check for expected error substrings from ApiHookClient
+ msg = results["verification_message"]
+ assert any(term in msg for term in ["Could not connect", "timed out", "Could not reach"])
@@ -7,284 +7,234 @@ import ai_client
 # which will be implemented in the next phase of TDD.

 def test_conductor_engine_initialization():
-    """
+ """
    Test that ConductorEngine can be initialized with a Track.
    """
-    track = Track(id="test_track", description="Test Track")
-    from multi_agent_conductor import ConductorEngine
-    engine = ConductorEngine(track=track)
-    assert engine.track == track
+ track = Track(id="test_track", description="Test Track")
+ from multi_agent_conductor import ConductorEngine
+ engine = ConductorEngine(track=track)
+ assert engine.track == track

@pytest.mark.asyncio
 async def test_conductor_engine_run_linear_executes_tickets_in_order(monkeypatch):
-    """
+ """
    Test that run_linear iterates through executable tickets and calls the worker lifecycle.
    """
-    ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
-    ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker2", depends_on=["T1"])
-    track = Track(id="track1", description="Track 1", tickets=[ticket1, ticket2])
-    
-    from multi_agent_conductor import ConductorEngine
-    engine = ConductorEngine(track=track)
-    
-    # Mock ai_client.send using monkeypatch
-    mock_send = MagicMock()
-    monkeypatch.setattr(ai_client, 'send', mock_send)
-    
-    # We mock run_worker_lifecycle as it is expected to be in the same module
-    with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
-        # Mocking lifecycle to mark ticket as complete so dependencies can be resolved
-        def side_effect(ticket, context, *args, **kwargs):
-            ticket.mark_complete()
-            return "Success"
-        mock_lifecycle.side_effect = side_effect
-        
-        await engine.run_linear()
-        
-        # Track.get_executable_tickets() should be called repeatedly until all are done
-        # T1 should run first, then T2.
-        assert mock_lifecycle.call_count == 2
-        assert ticket1.status == "completed"
-        assert ticket2.status == "completed"
-        
-        # Verify sequence: T1 before T2
-        calls = mock_lifecycle.call_args_list
-        assert calls[0][0][0].id == "T1"
-        assert calls[1][0][0].id == "T2"
+ ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
+ ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker2", depends_on=["T1"])
+ track = Track(id="track1", description="Track 1", tickets=[ticket1, ticket2])
+ from multi_agent_conductor import ConductorEngine
+ engine = ConductorEngine(track=track)
+ # Mock ai_client.send using monkeypatch
+ mock_send = MagicMock()
+ monkeypatch.setattr(ai_client, 'send', mock_send)
+ # We mock run_worker_lifecycle as it is expected to be in the same module
+ with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
+ # Mocking lifecycle to mark ticket as complete so dependencies can be resolved
+
+  def side_effect(ticket, context, *args, **kwargs):
+   ticket.mark_complete()
+   return "Success"
+  mock_lifecycle.side_effect = side_effect
+  await engine.run_linear()
+  # Track.get_executable_tickets() should be called repeatedly until all are done
+  # T1 should run first, then T2.
+  assert mock_lifecycle.call_count == 2
+  assert ticket1.status == "completed"
+  assert ticket2.status == "completed"
+  # Verify sequence: T1 before T2
+  calls = mock_lifecycle.call_args_list
+  assert calls[0][0][0].id == "T1"
+  assert calls[1][0][0].id == "T2"

@pytest.mark.asyncio
 async def test_run_worker_lifecycle_calls_ai_client_send(monkeypatch):
-    """
+ """
    Test that run_worker_lifecycle triggers the AI client and updates ticket status on success.
    """
-    ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
-    context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
-    
-    from multi_agent_conductor import run_worker_lifecycle
-    
-    # Mock ai_client.send using monkeypatch
-    mock_send = MagicMock()
-    monkeypatch.setattr(ai_client, 'send', mock_send)
-    
-    mock_send.return_value = "Task complete. I have updated the file."
-    
-    result = run_worker_lifecycle(ticket, context)
-    
-    assert result == "Task complete. I have updated the file."
-    assert ticket.status == "completed"
-    mock_send.assert_called_once()
-    
-    # Check if description was passed to send()
-    args, kwargs = mock_send.call_args
-    # user_message is passed as a keyword argument
-    assert ticket.description in kwargs["user_message"]
+ ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
+ context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
+ from multi_agent_conductor import run_worker_lifecycle
+ # Mock ai_client.send using monkeypatch
+ mock_send = MagicMock()
+ monkeypatch.setattr(ai_client, 'send', mock_send)
+ mock_send.return_value = "Task complete. I have updated the file."
+ result = run_worker_lifecycle(ticket, context)
+ assert result == "Task complete. I have updated the file."
+ assert ticket.status == "completed"
+ mock_send.assert_called_once()
+ # Check if description was passed to send()
+ args, kwargs = mock_send.call_args
+ # user_message is passed as a keyword argument
+ assert ticket.description in kwargs["user_message"]

@pytest.mark.asyncio
 async def test_run_worker_lifecycle_context_injection(monkeypatch):
-    """
+ """
    Test that run_worker_lifecycle can take a context_files list and injects AST views into the prompt.
    """
-    ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
-    context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
-    context_files = ["primary.py", "secondary.py"]
-    
-    from multi_agent_conductor import run_worker_lifecycle
-    
-    # Mock ai_client.send using monkeypatch
-    mock_send = MagicMock()
-    monkeypatch.setattr(ai_client, 'send', mock_send)
-    
-    # We mock ASTParser which is expected to be imported in multi_agent_conductor
-    with patch("multi_agent_conductor.ASTParser") as mock_ast_parser_class, \
-         patch("builtins.open", new_callable=MagicMock) as mock_open:
-        
-        # Setup open mock to return different content for different files
-        file_contents = {
-            "primary.py": "def primary(): pass",
-            "secondary.py": "def secondary(): pass"
-        }
-        
-        def mock_open_side_effect(file, *args, **kwargs):
-            content = file_contents.get(file, "")
-            mock_file = MagicMock()
-            mock_file.read.return_value = content
-            mock_file.__enter__.return_value = mock_file
-            return mock_file
-            
-        mock_open.side_effect = mock_open_side_effect
-        
-        # Setup ASTParser mock
-        mock_ast_parser = mock_ast_parser_class.return_value
-        mock_ast_parser.get_curated_view.return_value = "CURATED VIEW"
-        mock_ast_parser.get_skeleton.return_value = "SKELETON VIEW"
-        
-        mock_send.return_value = "Success"
-        
-        run_worker_lifecycle(ticket, context, context_files=context_files)
-        
-        # Verify ASTParser calls: 
-        # First file (primary) should get curated view, others (secondary) get skeleton
-        mock_ast_parser.get_curated_view.assert_called_once_with("def primary(): pass")
-        mock_ast_parser.get_skeleton.assert_called_once_with("def secondary(): pass")
-        
-        # Verify user_message contains the views
-        _, kwargs = mock_send.call_args
-        user_message = kwargs["user_message"]
-        assert "CURATED VIEW" in user_message
-        assert "SKELETON VIEW" in user_message
-        assert "primary.py" in user_message
-        assert "secondary.py" in user_message
+ ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
+ context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
+ context_files = ["primary.py", "secondary.py"]
+ from multi_agent_conductor import run_worker_lifecycle
+ # Mock ai_client.send using monkeypatch
+ mock_send = MagicMock()
+ monkeypatch.setattr(ai_client, 'send', mock_send)
+ # We mock ASTParser which is expected to be imported in multi_agent_conductor
+ with patch("multi_agent_conductor.ASTParser") as mock_ast_parser_class, \
+ patch("builtins.open", new_callable=MagicMock) as mock_open:
+ # Setup open mock to return different content for different files
+  file_contents = {
+   "primary.py": "def primary(): pass",
+   "secondary.py": "def secondary(): pass"
+  }
+
+  def mock_open_side_effect(file, *args, **kwargs):
+   content = file_contents.get(file, "")
+   mock_file = MagicMock()
+   mock_file.read.return_value = content
+   mock_file.__enter__.return_value = mock_file
+   return mock_file
+  mock_open.side_effect = mock_open_side_effect
+  # Setup ASTParser mock
+  mock_ast_parser = mock_ast_parser_class.return_value
+  mock_ast_parser.get_curated_view.return_value = "CURATED VIEW"
+  mock_ast_parser.get_skeleton.return_value = "SKELETON VIEW"
+  mock_send.return_value = "Success"
+  run_worker_lifecycle(ticket, context, context_files=context_files)
+  # Verify ASTParser calls: 
+  # First file (primary) should get curated view, others (secondary) get skeleton
+  mock_ast_parser.get_curated_view.assert_called_once_with("def primary(): pass")
+  mock_ast_parser.get_skeleton.assert_called_once_with("def secondary(): pass")
+  # Verify user_message contains the views
+  _, kwargs = mock_send.call_args
+  user_message = kwargs["user_message"]
+  assert "CURATED VIEW" in user_message
+  assert "SKELETON VIEW" in user_message
+  assert "primary.py" in user_message
+  assert "secondary.py" in user_message

@pytest.mark.asyncio
 async def test_run_worker_lifecycle_handles_blocked_response(monkeypatch):
-    """
+ """
    Test that run_worker_lifecycle marks the ticket as blocked if the AI indicates it cannot proceed.
    """
-    ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
-    context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
-    
-    from multi_agent_conductor import run_worker_lifecycle
-    
-    # Mock ai_client.send using monkeypatch
-    mock_send = MagicMock()
-    monkeypatch.setattr(ai_client, 'send', mock_send)
-    
-    # Simulate a response indicating a block
-    mock_send.return_value = "I am BLOCKED because I don't have enough information."
-    
-    run_worker_lifecycle(ticket, context)
-
-    assert ticket.status == "blocked"
-    assert "BLOCKED" in ticket.blocked_reason
+ ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
+ context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
+ from multi_agent_conductor import run_worker_lifecycle
+ # Mock ai_client.send using monkeypatch
+ mock_send = MagicMock()
+ monkeypatch.setattr(ai_client, 'send', mock_send)
+ # Simulate a response indicating a block
+ mock_send.return_value = "I am BLOCKED because I don't have enough information."
+ run_worker_lifecycle(ticket, context)
+ assert ticket.status == "blocked"
+ assert "BLOCKED" in ticket.blocked_reason

@pytest.mark.asyncio
 async def test_run_worker_lifecycle_step_mode_confirmation(monkeypatch):
-    """
+ """
    Test that run_worker_lifecycle passes confirm_execution to ai_client.send when step_mode is True.
    Verify that if confirm_execution is called (simulated by mocking ai_client.send to call its callback),
    the flow works as expected.
    """
-    ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
-    context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
-    
-    from multi_agent_conductor import run_worker_lifecycle
-    
-    # Mock ai_client.send using monkeypatch
-    mock_send = MagicMock()
-    monkeypatch.setattr(ai_client, 'send', mock_send)
-    
-    with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
-        
-        # We simulate ai_client.send by making it call the pre_tool_callback it received
-        def mock_send_side_effect(md_content, user_message, **kwargs):
-            callback = kwargs.get("pre_tool_callback")
-            if callback:
-                # Simulate calling it with some payload
-                callback('{"tool": "read_file", "args": {"path": "test.txt"}}')
-            return "Success"
-        
-        mock_send.side_effect = mock_send_side_effect
-        mock_confirm.return_value = True
-        
-        mock_event_queue = MagicMock()
-        run_worker_lifecycle(ticket, context, event_queue=mock_event_queue)
-        
-        # Verify confirm_execution was called
-        mock_confirm.assert_called_once()
-        assert ticket.status == "completed"
+ ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
+ context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
+ from multi_agent_conductor import run_worker_lifecycle
+ # Mock ai_client.send using monkeypatch
+ mock_send = MagicMock()
+ monkeypatch.setattr(ai_client, 'send', mock_send)
+ with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
+ # We simulate ai_client.send by making it call the pre_tool_callback it received
+
+  def mock_send_side_effect(md_content, user_message, **kwargs):
+   callback = kwargs.get("pre_tool_callback")
+   if callback:
+   # Simulate calling it with some payload
+    callback('{"tool": "read_file", "args": {"path": "test.txt"}}')
+   return "Success"
+  mock_send.side_effect = mock_send_side_effect
+  mock_confirm.return_value = True
+  mock_event_queue = MagicMock()
+  run_worker_lifecycle(ticket, context, event_queue=mock_event_queue)
+  # Verify confirm_execution was called
+  mock_confirm.assert_called_once()
+  assert ticket.status == "completed"

@pytest.mark.asyncio
 async def test_run_worker_lifecycle_step_mode_rejection(monkeypatch):
-    """
+ """
    Verify that if confirm_execution returns False, the logic (in ai_client, which we simulate here)
    would prevent execution. In run_worker_lifecycle, we just check if it's passed.
    """
-    ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
-    context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
-    
-    from multi_agent_conductor import run_worker_lifecycle
-    
-    # Mock ai_client.send using monkeypatch
-    mock_send = MagicMock()
-    monkeypatch.setattr(ai_client, 'send', mock_send)
-    
-    with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
-        
-        mock_confirm.return_value = False
-        mock_send.return_value = "Task failed because tool execution was rejected."
-        
-        run_worker_lifecycle(ticket, context)
-        
-        # Verify it was passed to send
-        args, kwargs = mock_send.call_args
-        assert kwargs["pre_tool_callback"] is not None
-        
-        # Since we've already tested ai_client's implementation of pre_tool_callback (mentally or via other tests),
-        # here we just verify the wiring.
+ ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
+ context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
+ from multi_agent_conductor import run_worker_lifecycle
+ # Mock ai_client.send using monkeypatch
+ mock_send = MagicMock()
+ monkeypatch.setattr(ai_client, 'send', mock_send)
+ with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
+  mock_confirm.return_value = False
+  mock_send.return_value = "Task failed because tool execution was rejected."
+  run_worker_lifecycle(ticket, context)
+  # Verify it was passed to send
+  args, kwargs = mock_send.call_args
+  assert kwargs["pre_tool_callback"] is not None
+  # Since we've already tested ai_client's implementation of pre_tool_callback (mentally or via other tests),
+  # here we just verify the wiring.

@pytest.mark.asyncio
 async def test_conductor_engine_dynamic_parsing_and_execution(monkeypatch):
-    """
+ """
    Test that parse_json_tickets correctly populates the track and run_linear executes them in dependency order.
    """
-    import json
-    from multi_agent_conductor import ConductorEngine
-    
-    track = Track(id="dynamic_track", description="Dynamic Track")
-    engine = ConductorEngine(track=track)
-    
-    tickets_json = json.dumps([
-        {
-            "id": "T1",
-            "description": "Initial task",
-            "status": "todo",
-            "assigned_to": "worker1",
-            "depends_on": []
-        },
-        {
-            "id": "T2",
-            "description": "Dependent task",
-            "status": "todo",
-            "assigned_to": "worker2",
-            "depends_on": ["T1"]
-        },
-        {
-            "id": "T3",
-            "description": "Another initial task",
-            "status": "todo",
-            "assigned_to": "worker3",
-            "depends_on": []
-        }
-    ])
-    
-    engine.parse_json_tickets(tickets_json)
-    
-    assert len(engine.track.tickets) == 3
-    assert engine.track.tickets[0].id == "T1"
-    assert engine.track.tickets[1].id == "T2"
-    assert engine.track.tickets[2].id == "T3"
-    
-    # Mock ai_client.send using monkeypatch
-    mock_send = MagicMock()
-    monkeypatch.setattr(ai_client, 'send', mock_send)
-    
-    # Mock run_worker_lifecycle to mark tickets as complete
-    with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
-        def side_effect(ticket, context, *args, **kwargs):
-            ticket.mark_complete()
-            return "Success"
-        mock_lifecycle.side_effect = side_effect
-        
-        await engine.run_linear()
-        
-        assert mock_lifecycle.call_count == 3
-        
-        # Verify dependency order: T1 must be called before T2
-        calls = [call[0][0].id for call in mock_lifecycle.call_args_list]
-        
-        t1_idx = calls.index("T1")
-        t2_idx = calls.index("T2")
-        assert t1_idx < t2_idx
-        
-        # T3 can be anywhere relative to T1 and T2, but T1 < T2 is mandatory
-        assert "T3" in calls
+ import json
+ from multi_agent_conductor import ConductorEngine
+ track = Track(id="dynamic_track", description="Dynamic Track")
+ engine = ConductorEngine(track=track)
+ tickets_json = json.dumps([
+   {
+    "id": "T1",
+    "description": "Initial task",
+    "status": "todo",
+    "assigned_to": "worker1",
+    "depends_on": []
+   },
+   {
+    "id": "T2",
+    "description": "Dependent task",
+    "status": "todo",
+    "assigned_to": "worker2",
+    "depends_on": ["T1"]
+   },
+   {
+    "id": "T3",
+    "description": "Another initial task",
+    "status": "todo",
+    "assigned_to": "worker3",
+    "depends_on": []
+   }
+  ])
+ engine.parse_json_tickets(tickets_json)
+ assert len(engine.track.tickets) == 3
+ assert engine.track.tickets[0].id == "T1"
+ assert engine.track.tickets[1].id == "T2"
+ assert engine.track.tickets[2].id == "T3"
+ # Mock ai_client.send using monkeypatch
+ mock_send = MagicMock()
+ monkeypatch.setattr(ai_client, 'send', mock_send)
+ # Mock run_worker_lifecycle to mark tickets as complete
+ with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
+  def side_effect(ticket, context, *args, **kwargs):
+   ticket.mark_complete()
+   return "Success"
+  mock_lifecycle.side_effect = side_effect
+  await engine.run_linear()
+  assert mock_lifecycle.call_count == 3
+  # Verify dependency order: T1 must be called before T2
+  calls = [call[0][0].id for call in mock_lifecycle.call_args_list]
+  t1_idx = calls.index("T1")
+  t2_idx = calls.index("T2")
+  assert t1_idx < t2_idx
+  # T3 can be anywhere relative to T1 and T2, but T1 < T2 is mandatory
+  assert "T3" in calls
@@ -4,112 +4,106 @@ import json
 import conductor_tech_lead

 class TestConductorTechLead(unittest.TestCase):
-    @patch('ai_client.send')
-    @patch('ai_client.set_provider')
-    @patch('ai_client.reset_session')
-    def test_generate_tickets_success(self, mock_reset_session, mock_set_provider, mock_send):
-        # Setup mock response
-        mock_tickets = [
-            {
-                "id": "ticket_1",
-                "type": "Ticket",
-                "goal": "Test goal",
-                "target_file": "test.py",
-                "depends_on": [],
-                "context_requirements": []
-            }
-        ]
-        mock_send.return_value = "```json\n" + json.dumps(mock_tickets) + "\n```"
-        
-        track_brief = "Test track brief"
-        module_skeletons = "Test skeletons"
-        # Call the function
-        tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
+ @patch('ai_client.send')
+ @patch('ai_client.set_provider')
+ @patch('ai_client.reset_session')
+ def test_generate_tickets_success(self, mock_reset_session, mock_set_provider, mock_send):
+ # Setup mock response
+  mock_tickets = [
+   {
+    "id": "ticket_1",
+    "type": "Ticket",
+    "goal": "Test goal",
+    "target_file": "test.py",
+    "depends_on": [],
+    "context_requirements": []
+   }
+  ]
+  mock_send.return_value = "```json\n" + json.dumps(mock_tickets) + "\n```"
+  track_brief = "Test track brief"
+  module_skeletons = "Test skeletons"
+  # Call the function
+  tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
+  # Verify set_provider was called
+  mock_set_provider.assert_called_with('gemini', 'gemini-2.5-flash-lite')
+  mock_reset_session.assert_called_once()
+  # Verify send was called
+  mock_send.assert_called_once()
+  args, kwargs = mock_send.call_args
+  self.assertEqual(kwargs['md_content'], "")
+  self.assertIn(track_brief, kwargs['user_message'])
+  self.assertIn(module_skeletons, kwargs['user_message'])
+  # Verify tickets were parsed correctly
+  self.assertEqual(tickets, mock_tickets)

-        # Verify set_provider was called
-        mock_set_provider.assert_called_with('gemini', 'gemini-2.5-flash-lite')
-        mock_reset_session.assert_called_once()
-        
-        # Verify send was called
-        mock_send.assert_called_once()
-        args, kwargs = mock_send.call_args
-        self.assertEqual(kwargs['md_content'], "")
-        self.assertIn(track_brief, kwargs['user_message'])
-        self.assertIn(module_skeletons, kwargs['user_message'])
-        
-        # Verify tickets were parsed correctly
-        self.assertEqual(tickets, mock_tickets)
-
-    @patch('ai_client.send')
-    @patch('ai_client.set_provider')
-    @patch('ai_client.reset_session')
-    def test_generate_tickets_parse_error(self, mock_reset_session, mock_set_provider, mock_send):
-        # Setup mock invalid response
-        mock_send.return_value = "Invalid JSON"
-        
-        # Call the function
-        tickets = conductor_tech_lead.generate_tickets("brief", "skeletons")
-        
-        # Verify it returns an empty list on parse error
-        self.assertEqual(tickets, [])
+ @patch('ai_client.send')
+ @patch('ai_client.set_provider')
+ @patch('ai_client.reset_session')
+ def test_generate_tickets_parse_error(self, mock_reset_session, mock_set_provider, mock_send):
+ # Setup mock invalid response
+  mock_send.return_value = "Invalid JSON"
+  # Call the function
+  tickets = conductor_tech_lead.generate_tickets("brief", "skeletons")
+  # Verify it returns an empty list on parse error
+  self.assertEqual(tickets, [])

 class TestTopologicalSort(unittest.TestCase):
-    def test_topological_sort_empty(self):
-        tickets = []
-        sorted_tickets = conductor_tech_lead.topological_sort(tickets)
-        self.assertEqual(sorted_tickets, [])
+ def test_topological_sort_empty(self):
+  tickets = []
+  sorted_tickets = conductor_tech_lead.topological_sort(tickets)
+  self.assertEqual(sorted_tickets, [])

-    def test_topological_sort_linear(self):
-        tickets = [
-            {"id": "t2", "depends_on": ["t1"]},
-            {"id": "t1", "depends_on": []},
-            {"id": "t3", "depends_on": ["t2"]},
-        ]
-        sorted_tickets = conductor_tech_lead.topological_sort(tickets)
-        ids = [t["id"] for t in sorted_tickets]
-        self.assertEqual(ids, ["t1", "t2", "t3"])
+ def test_topological_sort_linear(self):
+  tickets = [
+   {"id": "t2", "depends_on": ["t1"]},
+   {"id": "t1", "depends_on": []},
+   {"id": "t3", "depends_on": ["t2"]},
+  ]
+  sorted_tickets = conductor_tech_lead.topological_sort(tickets)
+  ids = [t["id"] for t in sorted_tickets]
+  self.assertEqual(ids, ["t1", "t2", "t3"])

-    def test_topological_sort_complex(self):
-        # t1
-        # |  \
-        # t2  t3
-        # |  /
-        # t4
-        tickets = [
-            {"id": "t4", "depends_on": ["t2", "t3"]},
-            {"id": "t3", "depends_on": ["t1"]},
-            {"id": "t2", "depends_on": ["t1"]},
-            {"id": "t1", "depends_on": []},
-        ]
-        sorted_tickets = conductor_tech_lead.topological_sort(tickets)
-        ids = [t["id"] for t in sorted_tickets]
-        # Possible valid orders: [t1, t2, t3, t4] or [t1, t3, t2, t4]
-        self.assertEqual(ids[0], "t1")
-        self.assertEqual(ids[-1], "t4")
-        self.assertSetEqual(set(ids[1:3]), {"t2", "t3"})
+ def test_topological_sort_complex(self):
+ # t1
+ # |  \
+ # t2  t3
+ # |  /
+ # t4
+  tickets = [
+   {"id": "t4", "depends_on": ["t2", "t3"]},
+   {"id": "t3", "depends_on": ["t1"]},
+   {"id": "t2", "depends_on": ["t1"]},
+   {"id": "t1", "depends_on": []},
+  ]
+  sorted_tickets = conductor_tech_lead.topological_sort(tickets)
+  ids = [t["id"] for t in sorted_tickets]
+  # Possible valid orders: [t1, t2, t3, t4] or [t1, t3, t2, t4]
+  self.assertEqual(ids[0], "t1")
+  self.assertEqual(ids[-1], "t4")
+  self.assertSetEqual(set(ids[1:3]), {"t2", "t3"})

-    def test_topological_sort_cycle(self):
-        tickets = [
-            {"id": "t1", "depends_on": ["t2"]},
-            {"id": "t2", "depends_on": ["t1"]},
-        ]
-        with self.assertRaises(ValueError) as cm:
-            conductor_tech_lead.topological_sort(tickets)
-        self.assertIn("Circular dependency detected", str(cm.exception))
+ def test_topological_sort_cycle(self):
+  tickets = [
+   {"id": "t1", "depends_on": ["t2"]},
+   {"id": "t2", "depends_on": ["t1"]},
+  ]
+  with self.assertRaises(ValueError) as cm:
+   conductor_tech_lead.topological_sort(tickets)
+  self.assertIn("Circular dependency detected", str(cm.exception))

-    def test_topological_sort_missing_dependency(self):
-        # If a ticket depends on something not in the list, we should probably handle it or let it fail.
-        # Usually in our context, we only care about dependencies within the same track.
-        tickets = [
-            {"id": "t1", "depends_on": ["missing"]},
-        ]
-        # For now, let's assume it should raise an error if a dependency is missing within the set we are sorting,
-        # OR it should just treat it as "ready" if it's external? 
-        # Actually, let's just test that it doesn't crash if it's not a cycle.
-        # But if 'missing' is not in tickets, it will never be satisfied.
-        # Let's say it raises ValueError for missing internal dependencies.
-        with self.assertRaises(ValueError):
-            conductor_tech_lead.topological_sort(tickets)
+ def test_topological_sort_missing_dependency(self):
+ # If a ticket depends on something not in the list, we should probably handle it or let it fail.
+ # Usually in our context, we only care about dependencies within the same track.
+  tickets = [
+   {"id": "t1", "depends_on": ["missing"]},
+  ]
+  # For now, let's assume it should raise an error if a dependency is missing within the set we are sorting,
+  # OR it should just treat it as "ready" if it's external? 
+  # Actually, let's just test that it doesn't crash if it's not a cycle.
+  # But if 'missing' is not in tickets, it will never be satisfied.
+  # Let's say it raises ValueError for missing internal dependencies.
+  with self.assertRaises(ValueError):
+   conductor_tech_lead.topological_sort(tickets)

 if __name__ == '__main__':
-    unittest.main()
+ unittest.main()
@@ -3,82 +3,72 @@ from models import Ticket
 from dag_engine import TrackDAG

 def test_get_ready_tasks_linear():
-    t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
-    t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
-    t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T2"])
-    
-    dag = TrackDAG([t1, t2, t3])
-    ready = dag.get_ready_tasks()
-    assert len(ready) == 1
-    assert ready[0].id == "T2"
+ t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
+ t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
+ t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T2"])
+ dag = TrackDAG([t1, t2, t3])
+ ready = dag.get_ready_tasks()
+ assert len(ready) == 1
+ assert ready[0].id == "T2"

 def test_get_ready_tasks_branching():
-    t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
-    t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
-    t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
-    
-    dag = TrackDAG([t1, t2, t3])
-    ready = dag.get_ready_tasks()
-    assert len(ready) == 2
-    ready_ids = {t.id for t in ready}
-    assert ready_ids == {"T2", "T3"}
+ t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
+ t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
+ t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
+ dag = TrackDAG([t1, t2, t3])
+ ready = dag.get_ready_tasks()
+ assert len(ready) == 2
+ ready_ids = {t.id for t in ready}
+ assert ready_ids == {"T2", "T3"}

 def test_has_cycle_no_cycle():
-    t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
-    t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
-    
-    dag = TrackDAG([t1, t2])
-    assert not dag.has_cycle()
+ t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
+ t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
+ dag = TrackDAG([t1, t2])
+ assert not dag.has_cycle()

 def test_has_cycle_direct_cycle():
-    t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
-    t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
-    
-    dag = TrackDAG([t1, t2])
-    assert dag.has_cycle()
+ t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
+ t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
+ dag = TrackDAG([t1, t2])
+ assert dag.has_cycle()

 def test_has_cycle_indirect_cycle():
-    t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
-    t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T3"])
-    t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
-    
-    dag = TrackDAG([t1, t2, t3])
-    assert dag.has_cycle()
+ t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
+ t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T3"])
+ t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
+ dag = TrackDAG([t1, t2, t3])
+ assert dag.has_cycle()

 def test_has_cycle_complex_no_cycle():
-    # T1 -> T2, T1 -> T3, T2 -> T4, T3 -> T4
-    t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
-    t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T4"])
-    t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T4"])
-    t4 = Ticket(id="T4", description="T4", status="todo", assigned_to="worker")
-    
-    dag = TrackDAG([t1, t2, t3, t4])
-    assert not dag.has_cycle()
+# T1 -> T2, T1 -> T3, T2 -> T4, T3 -> T4
+ t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
+ t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T4"])
+ t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T4"])
+ t4 = Ticket(id="T4", description="T4", status="todo", assigned_to="worker")
+ dag = TrackDAG([t1, t2, t3, t4])
+ assert not dag.has_cycle()

 def test_get_ready_tasks_multiple_deps():
-    t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="worker")
-    t2 = Ticket(id="T2", description="T2", status="completed", assigned_to="worker")
-    t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T1", "T2"])
-    
-    dag = TrackDAG([t1, t2, t3])
-    assert [t.id for t in dag.get_ready_tasks()] == ["T3"]
-    
-    t2.status = "todo"
-    assert [t.id for t in dag.get_ready_tasks()] == ["T2"]
+ t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="worker")
+ t2 = Ticket(id="T2", description="T2", status="completed", assigned_to="worker")
+ t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T1", "T2"])
+ dag = TrackDAG([t1, t2, t3])
+ assert [t.id for t in dag.get_ready_tasks()] == ["T3"]
+ t2.status = "todo"
+ assert [t.id for t in dag.get_ready_tasks()] == ["T2"]

 def test_topological_sort():
-    t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker")
-    t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
-    t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T2"])
-    
-    dag = TrackDAG([t1, t2, t3])
-    sort = dag.topological_sort()
-    assert sort == ["T1", "T2", "T3"]
+ t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker")
+ t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
+ t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T2"])
+ dag = TrackDAG([t1, t2, t3])
+ sort = dag.topological_sort()
+ assert sort == ["T1", "T2", "T3"]

 def test_topological_sort_cycle():
-    t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2"])
-    t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
-    
-    dag = TrackDAG([t1, t2])
-    with pytest.raises(ValueError, match="Dependency cycle detected"):
-        dag.topological_sort()
+ t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2"])
+ t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
+ dag = TrackDAG([t1, t2])
+ with pytest.raises(ValueError, match="Dependency cycle detected"):
+  dag.topological_sort()
@@ -12,54 +12,51 @@ import ai_client
 import project_manager

 def test_credentials_error_mentions_deepseek(monkeypatch):
-    """
+ """
    Verify that the error message shown when credentials.toml is missing
    includes deepseek instructions.
    """
-    # Monkeypatch SLOP_CREDENTIALS to a non-existent file
-    monkeypatch.setenv("SLOP_CREDENTIALS", "non_existent_credentials_file.toml")
-    
-    with pytest.raises(FileNotFoundError) as excinfo:
-        ai_client._load_credentials()
-    
-    err_msg = str(excinfo.value)
-    assert "[deepseek]" in err_msg
-    assert "api_key" in err_msg
+ # Monkeypatch SLOP_CREDENTIALS to a non-existent file
+ monkeypatch.setenv("SLOP_CREDENTIALS", "non_existent_credentials_file.toml")
+ with pytest.raises(FileNotFoundError) as excinfo:
+  ai_client._load_credentials()
+ err_msg = str(excinfo.value)
+ assert "[deepseek]" in err_msg
+ assert "api_key" in err_msg

 def test_default_project_includes_reasoning_role():
-    """
+ """
    Verify that 'Reasoning' is included in the default discussion roles
    to support DeepSeek-R1 reasoning traces.
    """
-    proj = project_manager.default_project("test")
-    roles = proj["discussion"]["roles"]
-    assert "Reasoning" in roles
+ proj = project_manager.default_project("test")
+ roles = proj["discussion"]["roles"]
+ assert "Reasoning" in roles

 def test_gui_providers_list():
-    """
+ """
    Check if 'deepseek' is in the GUI's provider list.
    """
-    import gui_2
-    assert "deepseek" in gui_2.PROVIDERS
+ import gui_2
+ assert "deepseek" in gui_2.PROVIDERS

 def test_deepseek_model_listing():
-    """
+ """
    Verify that list_models for deepseek returns expected models.
    """
-    models = ai_client.list_models("deepseek")
-    assert "deepseek-chat" in models
-    assert "deepseek-reasoner" in models
+ models = ai_client.list_models("deepseek")
+ assert "deepseek-chat" in models
+ assert "deepseek-reasoner" in models

 def test_gui_provider_list_via_hooks(live_gui):
-    """
+ """
    Verify 'deepseek' is present in the GUI provider list using API hooks.
    """
-    from api_hook_client import ApiHookClient
-    import time
-    client = ApiHookClient()
-    assert client.wait_for_server(timeout=10)
-    
-    # Attempt to set provider to deepseek to verify it's an allowed value
-    client.set_value('current_provider', 'deepseek')
-    time.sleep(0.5)
-    assert client.get_value('current_provider') == 'deepseek'
+ from api_hook_client import ApiHookClient
+ import time
+ client = ApiHookClient()
+ assert client.wait_for_server(timeout=10)
+ # Attempt to set provider to deepseek to verify it's an allowed value
+ client.set_value('current_provider', 'deepseek')
+ time.sleep(0.5)
+ assert client.get_value('current_provider') == 'deepseek'
@@ -3,137 +3,124 @@ from unittest.mock import patch, MagicMock
 import ai_client

 def test_deepseek_model_selection():
-    """
+ """
    Verifies that ai_client.set_provider('deepseek', 'deepseek-chat') correctly updates the internal state.
    """
-    ai_client.set_provider("deepseek", "deepseek-chat")
-    assert ai_client._provider == "deepseek"
-    assert ai_client._model == "deepseek-chat"
+ ai_client.set_provider("deepseek", "deepseek-chat")
+ assert ai_client._provider == "deepseek"
+ assert ai_client._model == "deepseek-chat"

 def test_deepseek_completion_logic():
-    """
+ """
    Verifies that ai_client.send() correctly calls the DeepSeek API and returns content.
    """
-    ai_client.set_provider("deepseek", "deepseek-chat")
-    
-    with patch("requests.post") as mock_post:
-        mock_response = MagicMock()
-        mock_response.status_code = 200
-        mock_response.json.return_value = {
-            "choices": [{
-                "message": {"role": "assistant", "content": "DeepSeek Response"},
-                "finish_reason": "stop"
-            }],
-            "usage": {"prompt_tokens": 10, "completion_tokens": 5}
-        }
-        mock_post.return_value = mock_response
-        
-        result = ai_client.send(md_content="Context", user_message="Hello", base_dir=".")
-        assert result == "DeepSeek Response"
-        assert mock_post.called
+ ai_client.set_provider("deepseek", "deepseek-chat")
+ with patch("requests.post") as mock_post:
+  mock_response = MagicMock()
+  mock_response.status_code = 200
+  mock_response.json.return_value = {
+   "choices": [{
+     "message": {"role": "assistant", "content": "DeepSeek Response"},
+     "finish_reason": "stop"
+    }],
+   "usage": {"prompt_tokens": 10, "completion_tokens": 5}
+  }
+  mock_post.return_value = mock_response
+  result = ai_client.send(md_content="Context", user_message="Hello", base_dir=".")
+  assert result == "DeepSeek Response"
+  assert mock_post.called

 def test_deepseek_reasoning_logic():
-    """
+ """
    Verifies that reasoning_content is captured and wrapped in <thinking> tags.
    """
-    ai_client.set_provider("deepseek", "deepseek-reasoner")
-    
-    with patch("requests.post") as mock_post:
-        mock_response = MagicMock()
-        mock_response.status_code = 200
-        mock_response.json.return_value = {
-            "choices": [{
-                "message": {
-                    "role": "assistant", 
-                    "content": "Final Answer", 
-                    "reasoning_content": "Chain of thought"
-                },
-                "finish_reason": "stop"
-            }],
-            "usage": {"prompt_tokens": 10, "completion_tokens": 20}
-        }
-        mock_post.return_value = mock_response
-        
-        result = ai_client.send(md_content="Context", user_message="Reasoning test", base_dir=".")
-        assert "<thinking>\nChain of thought\n</thinking>" in result
-        assert "Final Answer" in result
+ ai_client.set_provider("deepseek", "deepseek-reasoner")
+ with patch("requests.post") as mock_post:
+  mock_response = MagicMock()
+  mock_response.status_code = 200
+  mock_response.json.return_value = {
+   "choices": [{
+     "message": {
+      "role": "assistant", 
+      "content": "Final Answer", 
+      "reasoning_content": "Chain of thought"
+     },
+     "finish_reason": "stop"
+    }],
+   "usage": {"prompt_tokens": 10, "completion_tokens": 20}
+  }
+  mock_post.return_value = mock_response
+  result = ai_client.send(md_content="Context", user_message="Reasoning test", base_dir=".")
+  assert "<thinking>\nChain of thought\n</thinking>" in result
+  assert "Final Answer" in result

 def test_deepseek_tool_calling():
-    """
+ """
    Verifies that DeepSeek provider correctly identifies and executes tool calls.
    """
-    ai_client.set_provider("deepseek", "deepseek-chat")
-    
-    with patch("requests.post") as mock_post, \
-         patch("mcp_client.dispatch") as mock_dispatch:
-        
-        # 1. Mock first response with a tool call
-        mock_resp1 = MagicMock()
-        mock_resp1.status_code = 200
-        mock_resp1.json.return_value = {
-            "choices": [{
-                "message": {
-                    "role": "assistant", 
-                    "content": "Let me read that file.",
-                    "tool_calls": [{
-                        "id": "call_123",
-                        "type": "function",
-                        "function": {
-                            "name": "read_file",
-                            "arguments": '{"path": "test.txt"}'
-                        }
-                    }]
-                },
-                "finish_reason": "tool_calls"
-            }],
-            "usage": {"prompt_tokens": 50, "completion_tokens": 10}
+ ai_client.set_provider("deepseek", "deepseek-chat")
+ with patch("requests.post") as mock_post, \
+ patch("mcp_client.dispatch") as mock_dispatch:
+ # 1. Mock first response with a tool call
+  mock_resp1 = MagicMock()
+  mock_resp1.status_code = 200
+  mock_resp1.json.return_value = {
+   "choices": [{
+     "message": {
+      "role": "assistant", 
+      "content": "Let me read that file.",
+      "tool_calls": [{
+        "id": "call_123",
+        "type": "function",
+        "function": {
+         "name": "read_file",
+         "arguments": '{"path": "test.txt"}'
        }
-        
-        # 2. Mock second response (final answer)
-        mock_resp2 = MagicMock()
-        mock_resp2.status_code = 200
-        mock_resp2.json.return_value = {
-            "choices": [{
-                "message": {
-                    "role": "assistant", 
-                    "content": "File content is: Hello World"
-                },
-                "finish_reason": "stop"
-            }],
-            "usage": {"prompt_tokens": 100, "completion_tokens": 20}
-        }
-        
-        mock_post.side_effect = [mock_resp1, mock_resp2]
-        mock_dispatch.return_value = "Hello World"
-        
-        result = ai_client.send(md_content="Context", user_message="Read test.txt", base_dir=".")
-        
-        assert "File content is: Hello World" in result
-        assert mock_dispatch.called
-        assert mock_dispatch.call_args[0][0] == "read_file"
-        assert mock_dispatch.call_args[0][1] == {"path": "test.txt"}
+       }]
+     },
+     "finish_reason": "tool_calls"
+    }],
+   "usage": {"prompt_tokens": 50, "completion_tokens": 10}
+  }
+  # 2. Mock second response (final answer)
+  mock_resp2 = MagicMock()
+  mock_resp2.status_code = 200
+  mock_resp2.json.return_value = {
+   "choices": [{
+     "message": {
+      "role": "assistant", 
+      "content": "File content is: Hello World"
+     },
+     "finish_reason": "stop"
+    }],
+   "usage": {"prompt_tokens": 100, "completion_tokens": 20}
+  }
+  mock_post.side_effect = [mock_resp1, mock_resp2]
+  mock_dispatch.return_value = "Hello World"
+  result = ai_client.send(md_content="Context", user_message="Read test.txt", base_dir=".")
+  assert "File content is: Hello World" in result
+  assert mock_dispatch.called
+  assert mock_dispatch.call_args[0][0] == "read_file"
+  assert mock_dispatch.call_args[0][1] == {"path": "test.txt"}

 def test_deepseek_streaming():
-    """
+ """
    Verifies that DeepSeek provider correctly aggregates streaming chunks.
    """
-    ai_client.set_provider("deepseek", "deepseek-chat")
-    
-    with patch("requests.post") as mock_post:
-        # Mock a streaming response
-        mock_response = MagicMock()
-        mock_response.status_code = 200
-        
-        # Simulate OpenAI-style server-sent events (SSE) for streaming
-        # Each line starts with 'data: ' and contains a JSON object
-        chunks = [
-            'data: {"choices": [{"delta": {"role": "assistant", "content": "Hello"}, "index": 0, "finish_reason": null}]}',
-            'data: {"choices": [{"delta": {"content": " World"}, "index": 0, "finish_reason": null}]}',
-            'data: {"choices": [{"delta": {}, "index": 0, "finish_reason": "stop"}]}',
-            'data: [DONE]'
-        ]
-        mock_response.iter_lines.return_value = [c.encode('utf-8') for c in chunks]
-        mock_post.return_value = mock_response
-        
-        result = ai_client.send(md_content="Context", user_message="Stream test", base_dir=".", stream=True)
-        assert result == "Hello World"
+ ai_client.set_provider("deepseek", "deepseek-chat")
+ with patch("requests.post") as mock_post:
+ # Mock a streaming response
+  mock_response = MagicMock()
+  mock_response.status_code = 200
+  # Simulate OpenAI-style server-sent events (SSE) for streaming
+  # Each line starts with 'data: ' and contains a JSON object
+  chunks = [
+   'data: {"choices": [{"delta": {"role": "assistant", "content": "Hello"}, "index": 0, "finish_reason": null}]}',
+   'data: {"choices": [{"delta": {"content": " World"}, "index": 0, "finish_reason": null}]}',
+   'data: {"choices": [{"delta": {}, "index": 0, "finish_reason": "stop"}]}',
+   'data: [DONE]'
+  ]
+  mock_response.iter_lines.return_value = [c.encode('utf-8') for c in chunks]
+  mock_post.return_value = mock_response
+  result = ai_client.send(md_content="Context", user_message="Stream test", base_dir=".", stream=True)
+  assert result == "Hello World"
@@ -3,121 +3,99 @@ from models import Ticket
 from dag_engine import TrackDAG, ExecutionEngine

 def test_execution_engine_basic_flow():
-    # Setup tickets with dependencies
-    t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
-    t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
-    t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
-    t4 = Ticket(id="T4", description="Task 4", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
-
-    dag = TrackDAG([t1, t2, t3, t4])
-    engine = ExecutionEngine(dag)
-
-    # Tick 1: Only T1 should be ready
-    ready = engine.tick()
-    assert len(ready) == 1
-    assert ready[0].id == "T1"
-
-    # Complete T1
-    engine.update_task_status("T1", "completed")
-
-    # Tick 2: T2 and T3 should be ready
-    ready = engine.tick()
-    assert len(ready) == 2
-    ids = {t.id for t in ready}
-    assert ids == {"T2", "T3"}
-
-    # Complete T2
-    engine.update_task_status("T2", "completed")
-
-    # Tick 3: Only T3 should be ready (T4 depends on T2 AND T3)
-    ready = engine.tick()
-    assert len(ready) == 1
-    assert ready[0].id == "T3"
-
-    # Complete T3
-    engine.update_task_status("T3", "completed")
-
-    # Tick 4: T4 should be ready
-    ready = engine.tick()
-    assert len(ready) == 1
-    assert ready[0].id == "T4"
-
-    # Complete T4
-    engine.update_task_status("T4", "completed")
-
-    # Tick 5: Nothing ready
-    ready = engine.tick()
-    assert len(ready) == 0
+# Setup tickets with dependencies
+ t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
+ t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
+ t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
+ t4 = Ticket(id="T4", description="Task 4", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
+ dag = TrackDAG([t1, t2, t3, t4])
+ engine = ExecutionEngine(dag)
+ # Tick 1: Only T1 should be ready
+ ready = engine.tick()
+ assert len(ready) == 1
+ assert ready[0].id == "T1"
+ # Complete T1
+ engine.update_task_status("T1", "completed")
+ # Tick 2: T2 and T3 should be ready
+ ready = engine.tick()
+ assert len(ready) == 2
+ ids = {t.id for t in ready}
+ assert ids == {"T2", "T3"}
+ # Complete T2
+ engine.update_task_status("T2", "completed")
+ # Tick 3: Only T3 should be ready (T4 depends on T2 AND T3)
+ ready = engine.tick()
+ assert len(ready) == 1
+ assert ready[0].id == "T3"
+ # Complete T3
+ engine.update_task_status("T3", "completed")
+ # Tick 4: T4 should be ready
+ ready = engine.tick()
+ assert len(ready) == 1
+ assert ready[0].id == "T4"
+ # Complete T4
+ engine.update_task_status("T4", "completed")
+ # Tick 5: Nothing ready
+ ready = engine.tick()
+ assert len(ready) == 0

 def test_execution_engine_update_nonexistent_task():
-    dag = TrackDAG([])
-    engine = ExecutionEngine(dag)
-    # Should not raise error, or handle gracefully
-    engine.update_task_status("NONEXISTENT", "completed")
+ dag = TrackDAG([])
+ engine = ExecutionEngine(dag)
+ # Should not raise error, or handle gracefully
+ engine.update_task_status("NONEXISTENT", "completed")

 def test_execution_engine_status_persistence():
-    t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
-    dag = TrackDAG([t1])
-    engine = ExecutionEngine(dag)
-    
-    engine.update_task_status("T1", "in_progress")
-    assert t1.status == "in_progress"
-    
-    ready = engine.tick()
-    assert len(ready) == 0 # Only 'todo' tasks should be returned by tick() if they are ready
+ t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
+ dag = TrackDAG([t1])
+ engine = ExecutionEngine(dag)
+ engine.update_task_status("T1", "in_progress")
+ assert t1.status == "in_progress"
+ ready = engine.tick()
+ assert len(ready) == 0 # Only 'todo' tasks should be returned by tick() if they are ready

 def test_execution_engine_auto_queue():
-    t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
-    t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
-    
-    dag = TrackDAG([t1, t2])
-    engine = ExecutionEngine(dag, auto_queue=True)
-    
-    # Tick 1: T1 is ready and should be automatically marked as 'in_progress'
-    ready = engine.tick()
-    assert len(ready) == 1
-    assert ready[0].id == "T1"
-    assert t1.status == "in_progress"
-    
-    # Tick 2: T1 is in_progress, so T2 is NOT ready yet (T1 must be 'completed')
-    ready = engine.tick()
-    assert len(ready) == 0
-    assert t2.status == "todo"
-    
-    # Complete T1
-    engine.update_task_status("T1", "completed")
-    
-    # Tick 3: T2 is now ready and should be automatically marked as 'in_progress'
-    ready = engine.tick()
-    assert len(ready) == 1
-    assert ready[0].id == "T2"
-    assert t2.status == "in_progress"
+ t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
+ t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
+ dag = TrackDAG([t1, t2])
+ engine = ExecutionEngine(dag, auto_queue=True)
+ # Tick 1: T1 is ready and should be automatically marked as 'in_progress'
+ ready = engine.tick()
+ assert len(ready) == 1
+ assert ready[0].id == "T1"
+ assert t1.status == "in_progress"
+ # Tick 2: T1 is in_progress, so T2 is NOT ready yet (T1 must be 'completed')
+ ready = engine.tick()
+ assert len(ready) == 0
+ assert t2.status == "todo"
+ # Complete T1
+ engine.update_task_status("T1", "completed")
+ # Tick 3: T2 is now ready and should be automatically marked as 'in_progress'
+ ready = engine.tick()
+ assert len(ready) == 1
+ assert ready[0].id == "T2"
+ assert t2.status == "in_progress"

 def test_execution_engine_step_mode():
-    t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", step_mode=True)
-    
-    dag = TrackDAG([t1])
-    engine = ExecutionEngine(dag, auto_queue=True)
-    
-    # Tick 1: T1 is ready, but step_mode=True, so it should NOT be automatically marked as 'in_progress'
-    ready = engine.tick()
-    assert len(ready) == 1
-    assert ready[0].id == "T1"
-    assert t1.status == "todo"
-    
-    # Manual approval
-    engine.approve_task("T1")
-    assert t1.status == "in_progress"
-    
-    # Tick 2: T1 is already in_progress, should not be returned by tick() (it's not 'ready'/todo)
-    ready = engine.tick()
-    assert len(ready) == 0
+ t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", step_mode=True)
+ dag = TrackDAG([t1])
+ engine = ExecutionEngine(dag, auto_queue=True)
+ # Tick 1: T1 is ready, but step_mode=True, so it should NOT be automatically marked as 'in_progress'
+ ready = engine.tick()
+ assert len(ready) == 1
+ assert ready[0].id == "T1"
+ assert t1.status == "todo"
+ # Manual approval
+ engine.approve_task("T1")
+ assert t1.status == "in_progress"
+ # Tick 2: T1 is already in_progress, should not be returned by tick() (it's not 'ready'/todo)
+ ready = engine.tick()
+ assert len(ready) == 0

 def test_execution_engine_approve_task():
-    t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
-    dag = TrackDAG([t1])
-    engine = ExecutionEngine(dag, auto_queue=False)
-    
-    # Should be able to approve even if auto_queue is False
-    engine.approve_task("T1")
-    assert t1.status == "in_progress"
+ t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
+ dag = TrackDAG([t1])
+ engine = ExecutionEngine(dag, auto_queue=False)
+ # Should be able to approve even if auto_queue is False
+ engine.approve_task("T1")
+ assert t1.status == "in_progress"
@@ -14,44 +14,40 @@ from simulation.sim_execution import ExecutionSimulation

@pytest.mark.integration
 def test_context_sim_live(live_gui):
-    """Run the Context & Chat simulation against a live GUI."""
-    client = ApiHookClient()
-    assert client.wait_for_server(timeout=10)
-    
-    sim = ContextSimulation(client)
-    sim.setup("LiveContextSim")
-    sim.run()
-    sim.teardown()
+ """Run the Context & Chat simulation against a live GUI."""
+ client = ApiHookClient()
+ assert client.wait_for_server(timeout=10)
+ sim = ContextSimulation(client)
+ sim.setup("LiveContextSim")
+ sim.run()
+ sim.teardown()

@pytest.mark.integration
 def test_ai_settings_sim_live(live_gui):
-    """Run the AI Settings simulation against a live GUI."""
-    client = ApiHookClient()
-    assert client.wait_for_server(timeout=10)
-    
-    sim = AISettingsSimulation(client)
-    sim.setup("LiveAISettingsSim")
-    sim.run()
-    sim.teardown()
+ """Run the AI Settings simulation against a live GUI."""
+ client = ApiHookClient()
+ assert client.wait_for_server(timeout=10)
+ sim = AISettingsSimulation(client)
+ sim.setup("LiveAISettingsSim")
+ sim.run()
+ sim.teardown()

@pytest.mark.integration
 def test_tools_sim_live(live_gui):
-    """Run the Tools & Search simulation against a live GUI."""
-    client = ApiHookClient()
-    assert client.wait_for_server(timeout=10)
-    
-    sim = ToolsSimulation(client)
-    sim.setup("LiveToolsSim")
-    sim.run()
-    sim.teardown()
+ """Run the Tools & Search simulation against a live GUI."""
+ client = ApiHookClient()
+ assert client.wait_for_server(timeout=10)
+ sim = ToolsSimulation(client)
+ sim.setup("LiveToolsSim")
+ sim.run()
+ sim.teardown()

@pytest.mark.integration
 def test_execution_sim_live(live_gui):
-    """Run the Execution & Modals simulation against a live GUI."""
-    client = ApiHookClient()
-    assert client.wait_for_server(timeout=10)
-    
-    sim = ExecutionSimulation(client)
-    sim.setup("LiveExecutionSim")
-    sim.run()
-    sim.teardown()
+ """Run the Execution & Modals simulation against a live GUI."""
+ client = ApiHookClient()
+ assert client.wait_for_server(timeout=10)
+ sim = ExecutionSimulation(client)
+ sim.setup("LiveExecutionSim")
+ sim.run()
+ sim.teardown()
@@ -12,119 +12,105 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from gemini_cli_adapter import GeminiCliAdapter

 class TestGeminiCliAdapter(unittest.TestCase):
-    def setUp(self):
-        self.adapter = GeminiCliAdapter(binary_path="gemini")
+ def setUp(self):
+  self.adapter = GeminiCliAdapter(binary_path="gemini")

-    @patch('subprocess.Popen')
-    def test_send_starts_subprocess_with_correct_args(self, mock_popen):
-        """
+ @patch('subprocess.Popen')
+ def test_send_starts_subprocess_with_correct_args(self, mock_popen):
+  """
        Verify that send(message) correctly starts the subprocess with 
        --output-format stream-json and the provided message via stdin using communicate.
        """
-        # Setup mock process with a minimal valid JSONL termination
-        process_mock = MagicMock()
-        stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
-        process_mock.communicate.return_value = (stdout_content, "")
-        process_mock.poll.return_value = 0
-        process_mock.wait.return_value = 0
-        mock_popen.return_value = process_mock
+  # Setup mock process with a minimal valid JSONL termination
+  process_mock = MagicMock()
+  stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
+  process_mock.communicate.return_value = (stdout_content, "")
+  process_mock.poll.return_value = 0
+  process_mock.wait.return_value = 0
+  mock_popen.return_value = process_mock
+  message = "Hello Gemini CLI"
+  self.adapter.send(message)
+  # Verify subprocess.Popen call
+  mock_popen.assert_called_once()
+  args, kwargs = mock_popen.call_args
+  cmd = args[0]
+  # Check mandatory CLI components
+  self.assertIn("gemini", cmd)
+  self.assertIn("--output-format", cmd)
+  self.assertIn("stream-json", cmd)
+  # Message should NOT be in cmd now
+  self.assertNotIn(message, cmd)
+  # Verify message was sent via communicate
+  process_mock.communicate.assert_called_once_with(input=message)
+  # Check process configuration
+  self.assertEqual(kwargs.get('stdout'), subprocess.PIPE)
+  self.assertEqual(kwargs.get('stdin'), subprocess.PIPE)
+  self.assertEqual(kwargs.get('text'), True)

-        message = "Hello Gemini CLI"
-        self.adapter.send(message)
-
-        # Verify subprocess.Popen call
-        mock_popen.assert_called_once()
-        args, kwargs = mock_popen.call_args
-        cmd = args[0]
-        
-        # Check mandatory CLI components
-        self.assertIn("gemini", cmd)
-        self.assertIn("--output-format", cmd)
-        self.assertIn("stream-json", cmd)
-        # Message should NOT be in cmd now
-        self.assertNotIn(message, cmd)
-        
-        # Verify message was sent via communicate
-        process_mock.communicate.assert_called_once_with(input=message)
-
-        # Check process configuration
-        self.assertEqual(kwargs.get('stdout'), subprocess.PIPE)
-        self.assertEqual(kwargs.get('stdin'), subprocess.PIPE)
-        self.assertEqual(kwargs.get('text'), True)
-
-    @patch('subprocess.Popen')
-    def test_send_parses_jsonl_output(self, mock_popen):
-        """
+ @patch('subprocess.Popen')
+ def test_send_parses_jsonl_output(self, mock_popen):
+  """
        Verify that it correctly parses multiple JSONL 'message' events 
        and returns the combined text.
        """
-        jsonl_output = [
-            json.dumps({"type": "message", "role": "model", "text": "The quick brown "}),
-            json.dumps({"type": "message", "role": "model", "text": "fox jumps."}),
-            json.dumps({"type": "result", "usage": {"prompt_tokens": 5, "candidates_tokens": 5}})
-        ]
-        stdout_content = "\n".join(jsonl_output) + "\n"
-        
-        process_mock = MagicMock()
-        process_mock.communicate.return_value = (stdout_content, "")
-        process_mock.poll.return_value = 0
-        process_mock.wait.return_value = 0
-        mock_popen.return_value = process_mock
+  jsonl_output = [
+   json.dumps({"type": "message", "role": "model", "text": "The quick brown "}),
+   json.dumps({"type": "message", "role": "model", "text": "fox jumps."}),
+   json.dumps({"type": "result", "usage": {"prompt_tokens": 5, "candidates_tokens": 5}})
+  ]
+  stdout_content = "\n".join(jsonl_output) + "\n"
+  process_mock = MagicMock()
+  process_mock.communicate.return_value = (stdout_content, "")
+  process_mock.poll.return_value = 0
+  process_mock.wait.return_value = 0
+  mock_popen.return_value = process_mock
+  result = self.adapter.send("test message")
+  self.assertEqual(result["text"], "The quick brown fox jumps.")
+  self.assertEqual(result["tool_calls"], [])

-        result = self.adapter.send("test message")
-
-        self.assertEqual(result["text"], "The quick brown fox jumps.")
-        self.assertEqual(result["tool_calls"], [])
-
-    @patch('subprocess.Popen')
-    def test_send_handles_tool_use_events(self, mock_popen):
-        """
+ @patch('subprocess.Popen')
+ def test_send_handles_tool_use_events(self, mock_popen):
+  """
        Verify that it correctly handles 'tool_use' events in the stream 
        by continuing to read until the final 'result' event.
        """
-        jsonl_output = [
-            json.dumps({"type": "message", "role": "assistant", "text": "Calling tool..."}),
-            json.dumps({"type": "tool_use", "name": "read_file", "args": {"path": "test.txt"}}),
-            json.dumps({"type": "message", "role": "assistant", "text": "\nFile read successfully."}),
-            json.dumps({"type": "result", "usage": {}})
-        ]
-        stdout_content = "\n".join(jsonl_output) + "\n"
+  jsonl_output = [
+   json.dumps({"type": "message", "role": "assistant", "text": "Calling tool..."}),
+   json.dumps({"type": "tool_use", "name": "read_file", "args": {"path": "test.txt"}}),
+   json.dumps({"type": "message", "role": "assistant", "text": "\nFile read successfully."}),
+   json.dumps({"type": "result", "usage": {}})
+  ]
+  stdout_content = "\n".join(jsonl_output) + "\n"
+  process_mock = MagicMock()
+  process_mock.communicate.return_value = (stdout_content, "")
+  process_mock.poll.return_value = 0
+  process_mock.wait.return_value = 0
+  mock_popen.return_value = process_mock
+  result = self.adapter.send("read test.txt")
+  # Result should contain the combined text from all 'message' events
+  self.assertEqual(result["text"], "Calling tool...\nFile read successfully.")
+  self.assertEqual(len(result["tool_calls"]), 1)
+  self.assertEqual(result["tool_calls"][0]["name"], "read_file")

-        process_mock = MagicMock()
-        process_mock.communicate.return_value = (stdout_content, "")
-        process_mock.poll.return_value = 0
-        process_mock.wait.return_value = 0
-        mock_popen.return_value = process_mock
-
-        result = self.adapter.send("read test.txt")
-
-        # Result should contain the combined text from all 'message' events
-        self.assertEqual(result["text"], "Calling tool...\nFile read successfully.")
-        self.assertEqual(len(result["tool_calls"]), 1)
-        self.assertEqual(result["tool_calls"][0]["name"], "read_file")
-
-    @patch('subprocess.Popen')
-    def test_send_captures_usage_metadata(self, mock_popen):
-        """
+ @patch('subprocess.Popen')
+ def test_send_captures_usage_metadata(self, mock_popen):
+  """
        Verify that usage data is extracted from the 'result' event.
        """
-        usage_data = {"total_tokens": 42}
-        jsonl_output = [
-            json.dumps({"type": "message", "text": "Finalizing"}),
-            json.dumps({"type": "result", "usage": usage_data})
-        ]
-        stdout_content = "\n".join(jsonl_output) + "\n"
-
-        process_mock = MagicMock()
-        process_mock.communicate.return_value = (stdout_content, "")
-        process_mock.poll.return_value = 0
-        process_mock.wait.return_value = 0
-        mock_popen.return_value = process_mock
-
-        self.adapter.send("usage test")
-
-        # Verify the usage was captured in the adapter instance
-        self.assertEqual(self.adapter.last_usage, usage_data)
+  usage_data = {"total_tokens": 42}
+  jsonl_output = [
+   json.dumps({"type": "message", "text": "Finalizing"}),
+   json.dumps({"type": "result", "usage": usage_data})
+  ]
+  stdout_content = "\n".join(jsonl_output) + "\n"
+  process_mock = MagicMock()
+  process_mock.communicate.return_value = (stdout_content, "")
+  process_mock.poll.return_value = 0
+  process_mock.wait.return_value = 0
+  mock_popen.return_value = process_mock
+  self.adapter.send("usage test")
+  # Verify the usage was captured in the adapter instance
+  self.assertEqual(self.adapter.last_usage, usage_data)

 if __name__ == '__main__':
-    unittest.main()
+ unittest.main()
@@ -9,168 +9,143 @@ import os
 # Ensure the project root is in sys.path to resolve imports correctly
 project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
 if project_root not in sys.path:
-    sys.path.append(project_root)
-
-# Import the class to be tested
+ sys.path.append(project_root)
+ # Import the class to be tested
 from gemini_cli_adapter import GeminiCliAdapter

 class TestGeminiCliAdapterParity(unittest.TestCase):

-    def setUp(self):
-        """Set up a fresh adapter instance and reset session state for each test."""
-        # Patch session_logger to prevent file operations during tests
-        self.session_logger_patcher = patch('gemini_cli_adapter.session_logger')
-        self.mock_session_logger = self.session_logger_patcher.start()
-        
-        self.adapter = GeminiCliAdapter(binary_path="gemini")
-        self.adapter.session_id = None
-        self.adapter.last_usage = None
-        self.adapter.last_latency = 0.0
+ def setUp(self):
+  """Set up a fresh adapter instance and reset session state for each test."""
+  # Patch session_logger to prevent file operations during tests
+  self.session_logger_patcher = patch('gemini_cli_adapter.session_logger')
+  self.mock_session_logger = self.session_logger_patcher.start()
+  self.adapter = GeminiCliAdapter(binary_path="gemini")
+  self.adapter.session_id = None
+  self.adapter.last_usage = None
+  self.adapter.last_latency = 0.0

-    def tearDown(self):
-        self.session_logger_patcher.stop()
+ def tearDown(self):
+  self.session_logger_patcher.stop()

-    @patch('subprocess.Popen')
-    def test_count_tokens_uses_estimation(self, mock_popen):
-        """
+ @patch('subprocess.Popen')
+ def test_count_tokens_uses_estimation(self, mock_popen):
+  """
        Test that count_tokens uses character-based estimation.
        """
-        contents_to_count = ["This is the first line.", "This is the second line."]
-        expected_chars = len("\n".join(contents_to_count))
-        expected_tokens = expected_chars // 4
-        
-        token_count = self.adapter.count_tokens(contents=contents_to_count)
-        self.assertEqual(token_count, expected_tokens)
-        
-        # Verify that NO subprocess was started for counting
-        mock_popen.assert_not_called()
+  contents_to_count = ["This is the first line.", "This is the second line."]
+  expected_chars = len("\n".join(contents_to_count))
+  expected_tokens = expected_chars // 4
+  token_count = self.adapter.count_tokens(contents=contents_to_count)
+  self.assertEqual(token_count, expected_tokens)
+  # Verify that NO subprocess was started for counting
+  mock_popen.assert_not_called()

-    @patch('subprocess.Popen')
-    def test_send_with_safety_settings_no_flags_added(self, mock_popen):
-        """
+ @patch('subprocess.Popen')
+ def test_send_with_safety_settings_no_flags_added(self, mock_popen):
+  """
        Test that the send method does NOT add --safety flags when safety_settings are provided,
        as this functionality is no longer supported via CLI flags.
        """
-        process_mock = MagicMock()
-        mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
-        process_mock.communicate.return_value = (mock_stdout_content, "")
-        process_mock.returncode = 0
-        mock_popen.return_value = process_mock
+  process_mock = MagicMock()
+  mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
+  process_mock.communicate.return_value = (mock_stdout_content, "")
+  process_mock.returncode = 0
+  mock_popen.return_value = process_mock
+  message_content = "User's prompt here."
+  safety_settings = [
+   {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"},
+   {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
+  ]
+  self.adapter.send(message=message_content, safety_settings=safety_settings)
+  args, kwargs = mock_popen.call_args
+  command = args[0]
+  # Verify that no --safety flags were added to the command
+  self.assertNotIn("--safety", command)
+  # Verify that the message was passed correctly via stdin
+  process_mock.communicate.assert_called_once_with(input=message_content)

-        message_content = "User's prompt here."
-        safety_settings = [
-            {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"},
-            {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
-        ]
-
-        self.adapter.send(message=message_content, safety_settings=safety_settings)
-
-        args, kwargs = mock_popen.call_args
-        command = args[0]
-
-        # Verify that no --safety flags were added to the command
-        self.assertNotIn("--safety", command)
-        # Verify that the message was passed correctly via stdin
-        process_mock.communicate.assert_called_once_with(input=message_content)
-
-    @patch('subprocess.Popen')
-    def test_send_without_safety_settings_no_flags(self, mock_popen):
-        """
+ @patch('subprocess.Popen')
+ def test_send_without_safety_settings_no_flags(self, mock_popen):
+  """
        Test that when safety_settings is None or an empty list, no --safety flags are added.
        """
-        process_mock = MagicMock()
-        mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
-        process_mock.communicate.return_value = (mock_stdout_content, "")
-        process_mock.returncode = 0
-        mock_popen.return_value = process_mock
+  process_mock = MagicMock()
+  mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
+  process_mock.communicate.return_value = (mock_stdout_content, "")
+  process_mock.returncode = 0
+  mock_popen.return_value = process_mock
+  message_content = "Another prompt."
+  self.adapter.send(message=message_content, safety_settings=None)
+  args_none, _ = mock_popen.call_args
+  self.assertNotIn("--safety", args_none[0])
+  mock_popen.reset_mock()
+  self.adapter.send(message=message_content, safety_settings=[])
+  args_empty, _ = mock_popen.call_args
+  self.assertNotIn("--safety", args_empty[0])

-        message_content = "Another prompt."
-
-        self.adapter.send(message=message_content, safety_settings=None)
-        args_none, _ = mock_popen.call_args
-        self.assertNotIn("--safety", args_none[0])
-        mock_popen.reset_mock()
-
-        self.adapter.send(message=message_content, safety_settings=[])
-        args_empty, _ = mock_popen.call_args
-        self.assertNotIn("--safety", args_empty[0])
-
-    @patch('subprocess.Popen')
-    def test_send_with_system_instruction_prepended_to_stdin(self, mock_popen):
-        """
+ @patch('subprocess.Popen')
+ def test_send_with_system_instruction_prepended_to_stdin(self, mock_popen):
+  """
        Test that the send method prepends the system instruction to the prompt
        sent via stdin, and does NOT add a --system flag to the command.
        """
-        process_mock = MagicMock()
-        mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
-        process_mock.communicate.return_value = (mock_stdout_content, "")
-        process_mock.returncode = 0
-        mock_popen.return_value = process_mock
+  process_mock = MagicMock()
+  mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
+  process_mock.communicate.return_value = (mock_stdout_content, "")
+  process_mock.returncode = 0
+  mock_popen.return_value = process_mock
+  message_content = "User's prompt here."
+  system_instruction_text = "Some instruction"
+  expected_input = f"{system_instruction_text}\n\n{message_content}"
+  self.adapter.send(message=message_content, system_instruction=system_instruction_text)
+  args, kwargs = mock_popen.call_args
+  command = args[0]
+  # Verify that the system instruction was prepended to the input sent to communicate
+  process_mock.communicate.assert_called_once_with(input=expected_input)
+  # Verify that no --system flag was added to the command
+  self.assertNotIn("--system", command)

-        message_content = "User's prompt here."
-        system_instruction_text = "Some instruction"
-        expected_input = f"{system_instruction_text}\n\n{message_content}"
-
-        self.adapter.send(message=message_content, system_instruction=system_instruction_text)
-
-        args, kwargs = mock_popen.call_args
-        command = args[0]
-
-        # Verify that the system instruction was prepended to the input sent to communicate
-        process_mock.communicate.assert_called_once_with(input=expected_input)
-
-        # Verify that no --system flag was added to the command
-        self.assertNotIn("--system", command)
-
-    @patch('subprocess.Popen')
-    def test_send_with_model_parameter(self, mock_popen):
-        """
+ @patch('subprocess.Popen')
+ def test_send_with_model_parameter(self, mock_popen):
+  """
        Test that the send method correctly adds the -m <model> flag when a model is specified.
        """
-        process_mock = MagicMock()
-        mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
-        process_mock.communicate.return_value = (mock_stdout_content, "")
-        process_mock.returncode = 0
-        mock_popen.return_value = process_mock
+  process_mock = MagicMock()
+  mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
+  process_mock.communicate.return_value = (mock_stdout_content, "")
+  process_mock.returncode = 0
+  mock_popen.return_value = process_mock
+  message_content = "User's prompt here."
+  model_name = "gemini-1.5-flash"
+  expected_command_part = f'-m "{model_name}"'
+  self.adapter.send(message=message_content, model=model_name)
+  args, kwargs = mock_popen.call_args
+  command = args[0]
+  # Verify that the -m <model> flag was added to the command
+  self.assertIn(expected_command_part, command)
+  # Verify that the message was passed correctly via stdin
+  process_mock.communicate.assert_called_once_with(input=message_content)

-        message_content = "User's prompt here."
-        model_name = "gemini-1.5-flash"
-        expected_command_part = f'-m "{model_name}"'
-
-        self.adapter.send(message=message_content, model=model_name)
-
-        args, kwargs = mock_popen.call_args
-        command = args[0]
-
-        # Verify that the -m <model> flag was added to the command
-        self.assertIn(expected_command_part, command)
-        # Verify that the message was passed correctly via stdin
-        process_mock.communicate.assert_called_once_with(input=message_content)
-
-    @patch('subprocess.Popen')
-    def test_send_kills_process_on_communicate_exception(self, mock_popen):
-        """
+ @patch('subprocess.Popen')
+ def test_send_kills_process_on_communicate_exception(self, mock_popen):
+  """
        Test that if subprocess.Popen().communicate() raises an exception,
        GeminiCliAdapter.send() kills the process and re-raises the exception.
        """
-        mock_process = MagicMock()
-        mock_popen.return_value = mock_process
-
-        # Define an exception to simulate
-        simulated_exception = RuntimeError("Simulated communicate error")
-        mock_process.communicate.side_effect = simulated_exception
-
-        message_content = "User message"
-
-        # Assert that the exception is raised and process is killed
-        with self.assertRaises(RuntimeError) as cm:
-            self.adapter.send(message=message_content)
-
-        # Verify that the process's kill method was called
-        mock_process.kill.assert_called_once()
-
-        # Verify that the correct exception was re-raised
-        self.assertIs(cm.exception, simulated_exception)
+  mock_process = MagicMock()
+  mock_popen.return_value = mock_process
+  # Define an exception to simulate
+  simulated_exception = RuntimeError("Simulated communicate error")
+  mock_process.communicate.side_effect = simulated_exception
+  message_content = "User message"
+  # Assert that the exception is raised and process is killed
+  with self.assertRaises(RuntimeError) as cm:
+   self.adapter.send(message=message_content)
+   # Verify that the process's kill method was called
+  mock_process.kill.assert_called_once()
+  # Verify that the correct exception was re-raised
+  self.assertIs(cm.exception, simulated_exception)

 if __name__ == '__main__':
-    unittest.main()
+ unittest.main()
@@ -7,66 +7,57 @@ import json
 from api_hook_client import ApiHookClient

 def test_gemini_cli_context_bleed_prevention(live_gui):
-    """
+ """
    Test that the GeminiCliAdapter correctly filters out echoed 'user' messages
    and only shows assistant content in the GUI history.
    """
-    client = ApiHookClient("http://127.0.0.1:8999")
-    client.click("btn_reset")
-    client.set_value("auto_add_history", True)
-    
-    # Create a specialized mock for context bleed
-    bleed_mock = os.path.abspath("tests/mock_context_bleed.py")
-    with open(bleed_mock, "w") as f:
-        f.write('''import sys, json
+ client = ApiHookClient("http://127.0.0.1:8999")
+ client.click("btn_reset")
+ client.set_value("auto_add_history", True)
+ # Create a specialized mock for context bleed
+ bleed_mock = os.path.abspath("tests/mock_context_bleed.py")
+ with open(bleed_mock, "w") as f:
+  f.write('''import sys, json
 print(json.dumps({"type": "init", "session_id": "bleed-test"}), flush=True)
 print(json.dumps({"type": "message", "role": "user", "content": "I am echoing you"}), flush=True)
 print(json.dumps({"type": "message", "role": "assistant", "content": "Actual AI Response"}), flush=True)
 print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)
 ''')
-    
-    cli_cmd = f'"{sys.executable}" "{bleed_mock}"'
-    client.set_value("current_provider", "gemini_cli")
-    client.set_value("gcli_path", cli_cmd)
-    
-    client.set_value("ai_input", "Test context bleed")
-    client.click("btn_gen_send")
-    
-    # Wait for completion
-    time.sleep(3)
-    
-    session = client.get_session()
-    entries = session.get("session", {}).get("entries", [])
-    
-    # Verify: We expect exactly one AI entry, and it must NOT contain the echoed user message
-    ai_entries = [e for e in entries if e.get("role") == "AI"]
-    assert len(ai_entries) == 1
-    assert ai_entries[0].get("content") == "Actual AI Response"
-    assert "echoing you" not in ai_entries[0].get("content")
-    
-    os.remove(bleed_mock)
+ cli_cmd = f'"{sys.executable}" "{bleed_mock}"'
+ client.set_value("current_provider", "gemini_cli")
+ client.set_value("gcli_path", cli_cmd)
+ client.set_value("ai_input", "Test context bleed")
+ client.click("btn_gen_send")
+ # Wait for completion
+ time.sleep(3)
+ session = client.get_session()
+ entries = session.get("session", {}).get("entries", [])
+ # Verify: We expect exactly one AI entry, and it must NOT contain the echoed user message
+ ai_entries = [e for e in entries if e.get("role") == "AI"]
+ assert len(ai_entries) == 1
+ assert ai_entries[0].get("content") == "Actual AI Response"
+ assert "echoing you" not in ai_entries[0].get("content")
+ os.remove(bleed_mock)

 def test_gemini_cli_parameter_resilience(live_gui):
-    """
+ """
    Test that mcp_client correctly handles 'file_path' and 'dir_path' aliases 
    sent by the AI instead of 'path'.
    """
-    client = ApiHookClient("http://127.0.0.1:8999")
-    client.click("btn_reset")
-    client.set_value("auto_add_history", True)
-    client.select_list_item("proj_files", "manual_slop")
-
-    # Create a mock that uses dir_path for list_directory
-    alias_mock = os.path.abspath("tests/mock_alias_tool.py")
-    bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
-    # Avoid backslashes in f-string expression part
-    if sys.platform == "win32":
-        bridge_path_str = bridge_path.replace("\\", "/")
-    else:
-        bridge_path_str = bridge_path
-        
-    with open(alias_tool_content := "tests/mock_alias_tool.py", "w") as f:
-        f.write(f'''import sys, json, os, subprocess
+ client = ApiHookClient("http://127.0.0.1:8999")
+ client.click("btn_reset")
+ client.set_value("auto_add_history", True)
+ client.select_list_item("proj_files", "manual_slop")
+ # Create a mock that uses dir_path for list_directory
+ alias_mock = os.path.abspath("tests/mock_alias_tool.py")
+ bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
+ # Avoid backslashes in f-string expression part
+ if sys.platform == "win32":
+  bridge_path_str = bridge_path.replace("\\", "/")
+ else:
+  bridge_path_str = bridge_path
+ with open(alias_tool_content := "tests/mock_alias_tool.py", "w") as f:
+  f.write(f'''import sys, json, os, subprocess
 prompt = sys.stdin.read()
 if '"role": "tool"' in prompt:
    print(json.dumps({{"type": "message", "role": "assistant", "content": "Tool worked!"}}), flush=True)
@@ -88,83 +79,71 @@ else:
    }}), flush=True)
    print(json.dumps({{"type": "result", "stats": {{"total_tokens": 10}}}}), flush=True)
 ''')
-    
-    cli_cmd = f'"{sys.executable}" "{alias_mock}"'
-    client.set_value("current_provider", "gemini_cli")
-    client.set_value("gcli_path", cli_cmd)
-    
-    client.set_value("ai_input", "Test parameter aliases")
-    client.click("btn_gen_send")
-    
-    # Handle approval
-    timeout = 15
-    start_time = time.time()
-    approved = False
-    while time.time() - start_time < timeout:
-        for ev in client.get_events():
-            if ev.get("type") == "ask_received":
-                requests.post("http://127.0.0.1:8999/api/ask/respond", 
-                             json={"request_id": ev.get("request_id"), "response": {"approved": True}})
-                approved = True
-        if approved: break
-        time.sleep(0.5)
-        
-    assert approved, "Tool approval event never received"
-    
-    # Verify tool result in history
-    time.sleep(2)
-    session = client.get_session()
-    entries = session.get("session", {}).get("entries", [])
-    
-    # Check for "Tool worked!" which implies the tool execution was successful
-    found = any("Tool worked!" in e.get("content", "") for e in entries)
-    assert found, "Tool result indicating success not found in history"
-    
-    os.remove(alias_mock)
+ cli_cmd = f'"{sys.executable}" "{alias_mock}"'
+ client.set_value("current_provider", "gemini_cli")
+ client.set_value("gcli_path", cli_cmd)
+ client.set_value("ai_input", "Test parameter aliases")
+ client.click("btn_gen_send")
+ # Handle approval
+ timeout = 15
+ start_time = time.time()
+ approved = False
+ while time.time() - start_time < timeout:
+  for ev in client.get_events():
+   if ev.get("type") == "ask_received":
+    requests.post("http://127.0.0.1:8999/api/ask/respond", 
+     json={"request_id": ev.get("request_id"), "response": {"approved": True}})
+    approved = True
+  if approved: break
+  time.sleep(0.5)
+ assert approved, "Tool approval event never received"
+ # Verify tool result in history
+ time.sleep(2)
+ session = client.get_session()
+ entries = session.get("session", {}).get("entries", [])
+ # Check for "Tool worked!" which implies the tool execution was successful
+ found = any("Tool worked!" in e.get("content", "") for e in entries)
+ assert found, "Tool result indicating success not found in history"
+ os.remove(alias_mock)

 def test_gemini_cli_loop_termination(live_gui):
-    """
+ """
    Test that multi-round tool calling correctly terminates and preserves 
    payload (session context) between rounds.
    """
-    client = ApiHookClient("http://127.0.0.1:8999")
-    client.click("btn_reset")
-    client.set_value("auto_add_history", True)
-    client.select_list_item("proj_files", "manual_slop")
-
-    # This uses the existing mock_gemini_cli.py which is already designed for 2 rounds
-    mock_script = os.path.abspath("tests/mock_gemini_cli.py")
-    cli_cmd = f'"{sys.executable}" "{mock_script}"'
-    client.set_value("current_provider", "gemini_cli")
-    client.set_value("gcli_path", cli_cmd)
-    
-    client.set_value("ai_input", "Perform multi-round tool test")
-    client.click("btn_gen_send")
-    
-    # Handle approvals (mock does one tool call)
-    timeout = 20
-    start_time = time.time()
-    approved = False
-    while time.time() - start_time < timeout:
-        for ev in client.get_events():
-            if ev.get("type") == "ask_received":
-                requests.post("http://127.0.0.1:8999/api/ask/respond", 
-                             json={"request_id": ev.get("request_id"), "response": {"approved": True}})
-                approved = True
-        if approved: break
-        time.sleep(0.5)
-        
-    # Wait for the second round and final answer
-    found_final = False
-    start_time = time.time()
-    while time.time() - start_time < 15:
-        session = client.get_session()
-        entries = session.get("session", {}).get("entries", [])
-        for e in entries:
-            if "processed the tool results" in e.get("content", ""):
-                found_final = True
-                break
-        if found_final: break
-        time.sleep(1)
-        
-    assert found_final, "Final message after multi-round tool loop not found"
+ client = ApiHookClient("http://127.0.0.1:8999")
+ client.click("btn_reset")
+ client.set_value("auto_add_history", True)
+ client.select_list_item("proj_files", "manual_slop")
+ # This uses the existing mock_gemini_cli.py which is already designed for 2 rounds
+ mock_script = os.path.abspath("tests/mock_gemini_cli.py")
+ cli_cmd = f'"{sys.executable}" "{mock_script}"'
+ client.set_value("current_provider", "gemini_cli")
+ client.set_value("gcli_path", cli_cmd)
+ client.set_value("ai_input", "Perform multi-round tool test")
+ client.click("btn_gen_send")
+ # Handle approvals (mock does one tool call)
+ timeout = 20
+ start_time = time.time()
+ approved = False
+ while time.time() - start_time < timeout:
+  for ev in client.get_events():
+   if ev.get("type") == "ask_received":
+    requests.post("http://127.0.0.1:8999/api/ask/respond", 
+     json={"request_id": ev.get("request_id"), "response": {"approved": True}})
+    approved = True
+  if approved: break
+  time.sleep(0.5)
+  # Wait for the second round and final answer
+ found_final = False
+ start_time = time.time()
+ while time.time() - start_time < 15:
+  session = client.get_session()
+  entries = session.get("session", {}).get("entries", [])
+  for e in entries:
+   if "processed the tool results" in e.get("content", ""):
+    found_final = True
+    break
+  if found_final: break
+  time.sleep(1)
+ assert found_final, "Final message after multi-round tool loop not found"
@@ -6,136 +6,116 @@ import requests
 from api_hook_client import ApiHookClient

 def test_gemini_cli_full_integration(live_gui):
-    """
+ """
    Integration test for the Gemini CLI provider and tool bridge.
    Handles 'ask_received' events from the bridge and any other approval requests.
    """
-    client = ApiHookClient("http://127.0.0.1:8999")
-    
-    # 0. Reset session and enable history
-    client.click("btn_reset")
-    client.set_value("auto_add_history", True)
-    # Switch to manual_slop project explicitly
-    client.select_list_item("proj_files", "manual_slop")
-
-    # 1. Setup paths and configure the GUI
-    # Use the real gemini CLI if available, otherwise use mock
-    # For CI/testing we prefer mock
-    mock_script = os.path.abspath("tests/mock_gemini_cli.py")
-    cli_cmd = f'"{sys.executable}" "{mock_script}"'
-    
-    print(f"[TEST] Setting current_provider to gemini_cli")
-    client.set_value("current_provider", "gemini_cli")
-    print(f"[TEST] Setting gcli_path to {cli_cmd}")
-    client.set_value("gcli_path", cli_cmd)
-    
-    # Verify settings
-    assert client.get_value("current_provider") == "gemini_cli"
-    
-    # Clear events
-    client.get_events()
-    
-    # 2. Trigger a message in the GUI
-    print("[TEST] Sending user message...")
-    client.set_value("ai_input", "Please read test.txt")
-    client.click("btn_gen_send")
-    
-    # 3. Monitor for approval events
-    print("[TEST] Waiting for approval events...")
-    timeout = 45
-    start_time = time.time()
-    approved_count = 0
-    
-    while time.time() - start_time < timeout:
-        events = client.get_events()
-        if events:
-            for ev in events:
-                etype = ev.get("type")
-                eid = ev.get("request_id") or ev.get("action_id")
-                print(f"[TEST] Received event: {etype} (ID: {eid})")
-                
-                if etype in ["ask_received", "glob_approval_required", "script_confirmation_required"]:
-                    print(f"[TEST] Approving {etype} {eid}")
-                    if etype == "script_confirmation_required":
-                        resp = requests.post(f"http://127.0.0.1:8999/api/confirm/{eid}", json={"approved": True})
-                    else:
-                        resp = requests.post("http://127.0.0.1:8999/api/ask/respond", 
-                                          json={"request_id": eid, "response": {"approved": True}})
-                    assert resp.status_code == 200
-                    approved_count += 1
-        
-        # Check if we got a final response in history
-        session = client.get_session()
-        entries = session.get("session", {}).get("entries", [])
-        found_final = False
-        for entry in entries:
-            content = entry.get("content", "")
-            if "Hello from mock!" in content or "processed the tool results" in content:
-                print(f"[TEST] Success! Found final message in history.")
-                found_final = True
-                break
-        
-        if found_final:
-            break
-            
-        time.sleep(1.0)
-
-    assert approved_count > 0, "No approval events were processed"
-    assert found_final, "Final message from mock CLI was not found in the GUI history"
+ client = ApiHookClient("http://127.0.0.1:8999")
+ # 0. Reset session and enable history
+ client.click("btn_reset")
+ client.set_value("auto_add_history", True)
+ # Switch to manual_slop project explicitly
+ client.select_list_item("proj_files", "manual_slop")
+ # 1. Setup paths and configure the GUI
+ # Use the real gemini CLI if available, otherwise use mock
+ # For CI/testing we prefer mock
+ mock_script = os.path.abspath("tests/mock_gemini_cli.py")
+ cli_cmd = f'"{sys.executable}" "{mock_script}"'
+ print(f"[TEST] Setting current_provider to gemini_cli")
+ client.set_value("current_provider", "gemini_cli")
+ print(f"[TEST] Setting gcli_path to {cli_cmd}")
+ client.set_value("gcli_path", cli_cmd)
+ # Verify settings
+ assert client.get_value("current_provider") == "gemini_cli"
+ # Clear events
+ client.get_events()
+ # 2. Trigger a message in the GUI
+ print("[TEST] Sending user message...")
+ client.set_value("ai_input", "Please read test.txt")
+ client.click("btn_gen_send")
+ # 3. Monitor for approval events
+ print("[TEST] Waiting for approval events...")
+ timeout = 45
+ start_time = time.time()
+ approved_count = 0
+ while time.time() - start_time < timeout:
+  events = client.get_events()
+  if events:
+   for ev in events:
+    etype = ev.get("type")
+    eid = ev.get("request_id") or ev.get("action_id")
+    print(f"[TEST] Received event: {etype} (ID: {eid})")
+    if etype in ["ask_received", "glob_approval_required", "script_confirmation_required"]:
+     print(f"[TEST] Approving {etype} {eid}")
+     if etype == "script_confirmation_required":
+      resp = requests.post(f"http://127.0.0.1:8999/api/confirm/{eid}", json={"approved": True})
+     else:
+      resp = requests.post("http://127.0.0.1:8999/api/ask/respond", 
+       json={"request_id": eid, "response": {"approved": True}})
+     assert resp.status_code == 200
+     approved_count += 1
+     # Check if we got a final response in history
+  session = client.get_session()
+  entries = session.get("session", {}).get("entries", [])
+  found_final = False
+  for entry in entries:
+   content = entry.get("content", "")
+   if "Hello from mock!" in content or "processed the tool results" in content:
+    print(f"[TEST] Success! Found final message in history.")
+    found_final = True
+    break
+  if found_final:
+   break
+  time.sleep(1.0)
+ assert approved_count > 0, "No approval events were processed"
+ assert found_final, "Final message from mock CLI was not found in the GUI history"

 def test_gemini_cli_rejection_and_history(live_gui):
-    """
+ """
    Integration test for the Gemini CLI provider: Rejection flow and history.
    """
-    client = ApiHookClient("http://127.0.0.1:8999")
-    
-    # 0. Reset session
-    client.click("btn_reset")
-    client.set_value("auto_add_history", True)
-    client.select_list_item("proj_files", "manual_slop")
-
-    mock_script = os.path.abspath("tests/mock_gemini_cli.py")
-    cli_cmd = f'"{sys.executable}" "{mock_script}"'
-    client.set_value("current_provider", "gemini_cli")
-    client.set_value("gcli_path", cli_cmd)
-
-    # 2. Trigger a message
-    print("[TEST] Sending user message (to be denied)...")
-    client.set_value("ai_input", "Deny me")
-    client.click("btn_gen_send")
-    
-    # 3. Wait for event and reject
-    timeout = 20
-    start_time = time.time()
-    denied = False
-    while time.time() - start_time < timeout:
-        for ev in client.get_events():
-            etype = ev.get("type")
-            eid = ev.get("request_id")
-            print(f"[TEST] Received event: {etype}")
-            if etype == "ask_received":
-                print(f"[TEST] Denying request {eid}")
-                requests.post("http://127.0.0.1:8999/api/ask/respond",
-                             json={"request_id": eid, "response": {"approved": False}})
-                denied = True
-                break
-        if denied: break
-        time.sleep(0.5)
-
-    assert denied, "No ask_received event to deny"
-
-    # 4. Verify rejection in history
-    print("[TEST] Waiting for rejection in history...")
-    rejection_found = False
-    start_time = time.time()
-    while time.time() - start_time < 20:
-        session = client.get_session()
-        entries = session.get("session", {}).get("entries", [])
-        for entry in entries:
-            if "Tool execution was denied" in entry.get("content", ""):
-                rejection_found = True
-                break
-        if rejection_found: break
-        time.sleep(1.0)
-
-    assert rejection_found, "Rejection message not found in history"
+ client = ApiHookClient("http://127.0.0.1:8999")
+ # 0. Reset session
+ client.click("btn_reset")
+ client.set_value("auto_add_history", True)
+ client.select_list_item("proj_files", "manual_slop")
+ mock_script = os.path.abspath("tests/mock_gemini_cli.py")
+ cli_cmd = f'"{sys.executable}" "{mock_script}"'
+ client.set_value("current_provider", "gemini_cli")
+ client.set_value("gcli_path", cli_cmd)
+ # 2. Trigger a message
+ print("[TEST] Sending user message (to be denied)...")
+ client.set_value("ai_input", "Deny me")
+ client.click("btn_gen_send")
+ # 3. Wait for event and reject
+ timeout = 20
+ start_time = time.time()
+ denied = False
+ while time.time() - start_time < timeout:
+  for ev in client.get_events():
+   etype = ev.get("type")
+   eid = ev.get("request_id")
+   print(f"[TEST] Received event: {etype}")
+   if etype == "ask_received":
+    print(f"[TEST] Denying request {eid}")
+    requests.post("http://127.0.0.1:8999/api/ask/respond",
+     json={"request_id": eid, "response": {"approved": False}})
+    denied = True
+    break
+  if denied: break
+  time.sleep(0.5)
+ assert denied, "No ask_received event to deny"
+ # 4. Verify rejection in history
+ print("[TEST] Waiting for rejection in history...")
+ rejection_found = False
+ start_time = time.time()
+ while time.time() - start_time < 20:
+  session = client.get_session()
+  entries = session.get("session", {}).get("entries", [])
+  for entry in entries:
+   if "Tool execution was denied" in entry.get("content", ""):
+    rejection_found = True
+    break
+  if rejection_found: break
+  time.sleep(1.0)
+ assert rejection_found, "Rejection message not found in history"
@@ -10,43 +10,38 @@ import ai_client

@pytest.fixture(autouse=True)
 def setup_ai_client():
-    ai_client.reset_session()
-    ai_client.set_provider("gemini_cli", "gemini-2.5-flash")
-    ai_client.confirm_and_run_callback = lambda script, base_dir: "Mocked execution"
-    ai_client.comms_log_callback = lambda entry: None
-    ai_client.tool_log_callback = lambda script, result: None
-    yield
+ ai_client.reset_session()
+ ai_client.set_provider("gemini_cli", "gemini-2.5-flash")
+ ai_client.confirm_and_run_callback = lambda script, base_dir: "Mocked execution"
+ ai_client.comms_log_callback = lambda entry: None
+ ai_client.tool_log_callback = lambda script, result: None
+ yield

@patch('ai_client.GeminiCliAdapter')
@patch('ai_client._get_combined_system_prompt')
 def test_send_invokes_adapter_send(mock_prompt, mock_adapter_class):
-    mock_prompt.return_value = "Mocked Prompt"
-    mock_instance = mock_adapter_class.return_value
-    mock_instance.send.return_value = {"text": "Done", "tool_calls": []}
-    mock_instance.last_usage = {"input_tokens": 10}
-    mock_instance.last_latency = 0.1
-    mock_instance.session_id = None
-    
-    ai_client.send("context", "message", discussion_history="hist")
-
-    expected_payload = "[DISCUSSION HISTORY]\n\nhist\n\n---\n\nmessage"
-    assert mock_instance.send.called
-    args, kwargs = mock_instance.send.call_args
-    assert args[0] == expected_payload
-    assert kwargs['system_instruction'] == "Mocked Prompt\n\n<context>\ncontext\n</context>"
+ mock_prompt.return_value = "Mocked Prompt"
+ mock_instance = mock_adapter_class.return_value
+ mock_instance.send.return_value = {"text": "Done", "tool_calls": []}
+ mock_instance.last_usage = {"input_tokens": 10}
+ mock_instance.last_latency = 0.1
+ mock_instance.session_id = None
+ ai_client.send("context", "message", discussion_history="hist")
+ expected_payload = "[DISCUSSION HISTORY]\n\nhist\n\n---\n\nmessage"
+ assert mock_instance.send.called
+ args, kwargs = mock_instance.send.call_args
+ assert args[0] == expected_payload
+ assert kwargs['system_instruction'] == "Mocked Prompt\n\n<context>\ncontext\n</context>"

@patch('ai_client.GeminiCliAdapter')
 def test_get_history_bleed_stats(mock_adapter_class):
-    mock_instance = mock_adapter_class.return_value
-    mock_instance.send.return_value = {"text": "txt", "tool_calls": []}
-    mock_instance.last_usage = {"input_tokens": 1500}
-    mock_instance.last_latency = 0.5
-    mock_instance.session_id = "sess"
-
-    # Initialize by sending a message
-    ai_client.send("context", "msg")
-    
-    stats = ai_client.get_history_bleed_stats()
-
-    assert stats["provider"] == "gemini_cli"
-    assert stats["current"] == 1500
+ mock_instance = mock_adapter_class.return_value
+ mock_instance.send.return_value = {"text": "txt", "tool_calls": []}
+ mock_instance.last_usage = {"input_tokens": 1500}
+ mock_instance.last_latency = 0.5
+ mock_instance.session_id = "sess"
+ # Initialize by sending a message
+ ai_client.send("context", "msg")
+ stats = ai_client.get_history_bleed_stats()
+ assert stats["provider"] == "gemini_cli"
+ assert stats["current"] == 1500
@@ -10,41 +10,34 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from ai_client import get_gemini_cache_stats, reset_session

 def test_get_gemini_cache_stats_with_mock_client():
-    """
+ """
    Test that get_gemini_cache_stats correctly processes cache lists
    from a mocked client instance.
    """
-    # Ensure a clean state before the test by resetting the session
-    reset_session()
-
-    # 1. Create a mock for the cache object that the client will return
-    mock_cache = MagicMock()
-    mock_cache.name = "cachedContents/test-cache"
-    mock_cache.display_name = "Test Cache"
-    mock_cache.model = "models/gemini-1.5-pro-001"
-    mock_cache.size_bytes = 1024
-
-    # 2. Create a mock for the client instance
-    mock_client_instance = MagicMock()
-    # Configure its `caches.list` method to return our mock cache
-    mock_client_instance.caches.list.return_value = [mock_cache]
-
-    # 3. Patch the Client constructor to return our mock instance
-    #    This intercepts the `_ensure_gemini_client` call inside the function
-    with patch('google.genai.Client', return_value=mock_client_instance) as mock_client_constructor:
-        
-        # 4. Call the function under test
-        stats = get_gemini_cache_stats()
-
-        # 5. Assert that the function behaved as expected
-        
-        # It should have constructed the client
-        mock_client_constructor.assert_called_once()
-        # It should have called the `list` method on the `caches` attribute
-        mock_client_instance.caches.list.assert_called_once()
-
-        # The returned stats dictionary should be correct
-        assert "cache_count" in stats
-        assert "total_size_bytes" in stats
-        assert stats["cache_count"] == 1
-        assert stats["total_size_bytes"] == 1024
+ # Ensure a clean state before the test by resetting the session
+ reset_session()
+ # 1. Create a mock for the cache object that the client will return
+ mock_cache = MagicMock()
+ mock_cache.name = "cachedContents/test-cache"
+ mock_cache.display_name = "Test Cache"
+ mock_cache.model = "models/gemini-1.5-pro-001"
+ mock_cache.size_bytes = 1024
+ # 2. Create a mock for the client instance
+ mock_client_instance = MagicMock()
+ # Configure its `caches.list` method to return our mock cache
+ mock_client_instance.caches.list.return_value = [mock_cache]
+ # 3. Patch the Client constructor to return our mock instance
+ #    This intercepts the `_ensure_gemini_client` call inside the function
+ with patch('google.genai.Client', return_value=mock_client_instance) as mock_client_constructor:
+ # 4. Call the function under test
+  stats = get_gemini_cache_stats()
+  # 5. Assert that the function behaved as expected
+  # It should have constructed the client
+  mock_client_constructor.assert_called_once()
+  # It should have called the `list` method on the `caches` attribute
+  mock_client_instance.caches.list.assert_called_once()
+  # The returned stats dictionary should be correct
+  assert "cache_count" in stats
+  assert "total_size_bytes" in stats
+  assert stats["cache_count"] == 1
+  assert stats["total_size_bytes"] == 1024
@@ -6,43 +6,40 @@ from events import EventEmitter

@pytest.fixture
 def app_instance():
-    """
+ """
    Fixture to create an instance of the gui_2.App class for testing.
    It mocks functions that would render a window or block execution.
    """
-    if not hasattr(ai_client, 'events') or ai_client.events is None:
-        ai_client.events = EventEmitter()
-
-    with (
-        patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
-        patch('gui_2.save_config'),
-        patch('gui_2.project_manager'),
-        patch('gui_2.session_logger'),
-        patch('gui_2.immapp.run'),
-        patch.object(App, '_load_active_project'),
-        patch.object(App, '_fetch_models'),
-        patch.object(App, '_load_fonts'),
-        patch.object(App, '_post_init')
-    ):
-        yield App
+ if not hasattr(ai_client, 'events') or ai_client.events is None:
+  ai_client.events = EventEmitter()
+ with (
+  patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
+  patch('gui_2.save_config'),
+  patch('gui_2.project_manager'),
+  patch('gui_2.session_logger'),
+  patch('gui_2.immapp.run'),
+  patch.object(App, '_load_active_project'),
+  patch.object(App, '_fetch_models'),
+  patch.object(App, '_load_fonts'),
+  patch.object(App, '_post_init')
+ ):
+  yield App

 def test_app_subscribes_to_events(app_instance):
-    """
+ """
    This test checks that the App's __init__ method subscribes the necessary
    event handlers to the ai_client.events emitter.
    This test will fail until the event subscription logic is added to gui_2.App.
    """
-    with patch.object(ai_client.events, 'on') as mock_on:
-        app = app_instance()
-        mock_on.assert_called()
-        
-        calls = mock_on.call_args_list
-        event_names = [call.args[0] for call in calls]
-        assert "request_start" in event_names
-        assert "response_received" in event_names
-        assert "tool_execution" in event_names
-
-        for call in calls:
-            handler = call.args[1]
-            assert hasattr(handler, '__self__')
-            assert handler.__self__ is app
+ with patch.object(ai_client.events, 'on') as mock_on:
+  app = app_instance()
+  mock_on.assert_called()
+  calls = mock_on.call_args_list
+  event_names = [call.args[0] for call in calls]
+  assert "request_start" in event_names
+  assert "response_received" in event_names
+  assert "tool_execution" in event_names
+  for call in calls:
+   handler = call.args[1]
+   assert hasattr(handler, '__self__')
+   assert handler.__self__ is app
@@ -4,45 +4,43 @@ from gui_2 import App

@pytest.fixture
 def app_instance():
-    with (
-         patch('gui_2.load_config', return_value={'gui': {'show_windows': {}}}),
-         patch('gui_2.save_config'),
-         patch('gui_2.project_manager'),
-         patch('gui_2.session_logger'),
-         patch('gui_2.immapp.run'),
-         patch.object(App, '_load_active_project'),
-         patch.object(App, '_fetch_models'),
-         patch.object(App, '_load_fonts'),
-         patch.object(App, '_post_init')
-    ):
-        yield App()
+ with (
+  patch('gui_2.load_config', return_value={'gui': {'show_windows': {}}}),
+  patch('gui_2.save_config'),
+  patch('gui_2.project_manager'),
+  patch('gui_2.session_logger'),
+  patch('gui_2.immapp.run'),
+  patch.object(App, '_load_active_project'),
+  patch.object(App, '_fetch_models'),
+  patch.object(App, '_load_fonts'),
+  patch.object(App, '_post_init')
+ ):
+  yield App()

 def test_gui2_hubs_exist_in_show_windows(app_instance):
-    """
+ """
    Verifies that the new consolidated Hub windows are defined in the App's show_windows.
    This ensures they will be available in the 'Windows' menu.
    """
-    expected_hubs = [
-        "Context Hub",
-        "AI Settings",
-        "Discussion Hub",
-        "Operations Hub",
-        "Files & Media",
-        "Theme",
-    ]
-    
-    for hub in expected_hubs:
-        assert hub in app_instance.show_windows, f"Expected hub window '{hub}' not found in show_windows"
+ expected_hubs = [
+  "Context Hub",
+  "AI Settings",
+  "Discussion Hub",
+  "Operations Hub",
+  "Files & Media",
+  "Theme",
+ ]
+ for hub in expected_hubs:
+  assert hub in app_instance.show_windows, f"Expected hub window '{hub}' not found in show_windows"

 def test_gui2_old_windows_removed_from_show_windows(app_instance):
-    """
+ """
    Verifies that the old fragmented windows are removed from show_windows.
    """
-    old_windows = [
-        "Projects", "Files", "Screenshots",
-        "Provider", "System Prompts",
-        "Message", "Response", "Tool Calls", "Comms History"
-    ]
-    
-    for old_win in old_windows:
-        assert old_win not in app_instance.show_windows, f"Old window '{old_win}' should have been removed from show_windows"
+ old_windows = [
+  "Projects", "Files", "Screenshots",
+  "Provider", "System Prompts",
+  "Message", "Response", "Tool Calls", "Comms History"
+ ]
+ for old_win in old_windows:
+  assert old_win not in app_instance.show_windows, f"Old window '{old_win}' should have been removed from show_windows"
@@ -6,74 +6,65 @@ from events import EventEmitter

@pytest.fixture
 def app_instance():
-    if not hasattr(ai_client, 'events') or ai_client.events is None:
-        ai_client.events = EventEmitter()
-
-    with (
-        patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
-        patch('gui_2.save_config'),
-        patch('gui_2.project_manager'),
-        patch('gui_2.session_logger'),
-        patch('gui_2.immapp.run'),
-        patch.object(App, '_load_active_project'),
-        patch.object(App, '_fetch_models'),
-        patch.object(App, '_load_fonts'),
-        patch.object(App, '_post_init')
-    ):
-        yield App()
+ if not hasattr(ai_client, 'events') or ai_client.events is None:
+  ai_client.events = EventEmitter()
+ with (
+  patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
+  patch('gui_2.save_config'),
+  patch('gui_2.project_manager'),
+  patch('gui_2.session_logger'),
+  patch('gui_2.immapp.run'),
+  patch.object(App, '_load_active_project'),
+  patch.object(App, '_fetch_models'),
+  patch.object(App, '_load_fonts'),
+  patch.object(App, '_post_init')
+ ):
+  yield App()

 def test_mcp_tool_call_is_dispatched(app_instance):
-    """
+ """
    This test verifies that when the AI returns a tool call for an MCP function,
    the ai_client correctly dispatches it to mcp_client.
    This will fail until mcp_client is properly integrated.
    """
-    # 1. Define the mock tool call from the AI
-    mock_fc = MagicMock()
-    mock_fc.name = "read_file"
-    mock_fc.args = {"file_path": "test.txt"}
+ # 1. Define the mock tool call from the AI
+ mock_fc = MagicMock()
+ mock_fc.name = "read_file"
+ mock_fc.args = {"file_path": "test.txt"}
+ # 2. Construct the mock AI response (Gemini format)
+ mock_response_with_tool = MagicMock()
+ mock_part = MagicMock()
+ mock_part.text = ""
+ mock_part.function_call = mock_fc
+ mock_candidate = MagicMock()
+ mock_candidate.content.parts = [mock_part]
+ mock_candidate.finish_reason.name = "TOOL_CALLING"
+ mock_response_with_tool.candidates = [mock_candidate]

-    # 2. Construct the mock AI response (Gemini format)
-    mock_response_with_tool = MagicMock()
-    mock_part = MagicMock()
-    mock_part.text = ""
-    mock_part.function_call = mock_fc
-    mock_candidate = MagicMock()
-    mock_candidate.content.parts = [mock_part]
-    mock_candidate.finish_reason.name = "TOOL_CALLING"
-    mock_response_with_tool.candidates = [mock_candidate]
-
-    class DummyUsage:
-        prompt_token_count = 100
-        candidates_token_count = 10
-        cached_content_token_count = 0
-
-    mock_response_with_tool.usage_metadata = DummyUsage()
-
-    # 3. Create a mock for the final AI response after the tool call
-    mock_response_final = MagicMock()
-    mock_response_final.text = "Final answer"
-    mock_response_final.candidates = []
-    mock_response_final.usage_metadata = DummyUsage()
-
-    # 4. Patch the necessary components
-    with patch("ai_client._ensure_gemini_client"), \
-         patch("ai_client._gemini_client") as mock_client, \
-         patch('mcp_client.dispatch', return_value="file content") as mock_dispatch:
-
-        mock_chat = mock_client.chats.create.return_value
-        mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
-
-        ai_client.set_provider("gemini", "mock-model")
-
-        # 5. Call the send function
-        ai_client.send(
-            md_content="some context",
-            user_message="read the file",
-            base_dir=".",
-            file_items=[],
-            discussion_history=""
-        )
-
-        # 6. Assert that the MCP dispatch function was called
-        mock_dispatch.assert_called_once_with("read_file", {"file_path": "test.txt"})
+ class DummyUsage:
+  prompt_token_count = 100
+  candidates_token_count = 10
+  cached_content_token_count = 0
+ mock_response_with_tool.usage_metadata = DummyUsage()
+ # 3. Create a mock for the final AI response after the tool call
+ mock_response_final = MagicMock()
+ mock_response_final.text = "Final answer"
+ mock_response_final.candidates = []
+ mock_response_final.usage_metadata = DummyUsage()
+ # 4. Patch the necessary components
+ with patch("ai_client._ensure_gemini_client"), \
+ patch("ai_client._gemini_client") as mock_client, \
+ patch('mcp_client.dispatch', return_value="file content") as mock_dispatch:
+  mock_chat = mock_client.chats.create.return_value
+  mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
+  ai_client.set_provider("gemini", "mock-model")
+  # 5. Call the send function
+  ai_client.send(
+   md_content="some context",
+   user_message="read the file",
+   base_dir=".",
+   file_items=[],
+   discussion_history=""
+  )
+  # 6. Assert that the MCP dispatch function was called
+  mock_dispatch.assert_called_once_with("read_file", {"file_path": "test.txt"})
@@ -15,70 +15,62 @@ TEST_CALLBACK_FILE = Path("temp_callback_output.txt")

@pytest.fixture(scope="function", autouse=True)
 def cleanup_callback_file():
-    """Ensures the test callback file is cleaned up before and after each test."""
-    if TEST_CALLBACK_FILE.exists():
-        TEST_CALLBACK_FILE.unlink()
-    yield
-    if TEST_CALLBACK_FILE.exists():
-        TEST_CALLBACK_FILE.unlink()
+ """Ensures the test callback file is cleaned up before and after each test."""
+ if TEST_CALLBACK_FILE.exists():
+  TEST_CALLBACK_FILE.unlink()
+ yield
+ if TEST_CALLBACK_FILE.exists():
+  TEST_CALLBACK_FILE.unlink()

 def test_gui2_set_value_hook_works(live_gui):
-    """
+ """
    Tests that the 'set_value' GUI hook is correctly implemented.
    """
-    client = ApiHookClient()
-    assert client.wait_for_server(timeout=10)
-    test_value = f"New value set by test: {uuid.uuid4()}"
-    gui_data = {'action': 'set_value', 'item': 'ai_input', 'value': test_value}
-    
-    response = client.post_gui(gui_data)
-    assert response == {'status': 'queued'}
-    
-    # Verify the value was actually set using the new get_value hook
-    time.sleep(0.5)
-    current_value = client.get_value('ai_input')
-    assert current_value == test_value
+ client = ApiHookClient()
+ assert client.wait_for_server(timeout=10)
+ test_value = f"New value set by test: {uuid.uuid4()}"
+ gui_data = {'action': 'set_value', 'item': 'ai_input', 'value': test_value}
+ response = client.post_gui(gui_data)
+ assert response == {'status': 'queued'}
+ # Verify the value was actually set using the new get_value hook
+ time.sleep(0.5)
+ current_value = client.get_value('ai_input')
+ assert current_value == test_value

 def test_gui2_click_hook_works(live_gui):
-    """
+ """
    Tests that the 'click' GUI hook for the 'Reset' button is implemented.
    """
-    client = ApiHookClient()
-    assert client.wait_for_server(timeout=10)
-    
-    # First, set some state that 'Reset' would clear.
-    test_value = "This text should be cleared by the reset button."
-    client.set_value('ai_input', test_value)
-    time.sleep(0.5)
-    assert client.get_value('ai_input') == test_value
-
-    # Now, trigger the click
-    client.click('btn_reset')
-    time.sleep(0.5)
-    
-    # Verify it was reset
-    assert client.get_value('ai_input') == ""
+ client = ApiHookClient()
+ assert client.wait_for_server(timeout=10)
+ # First, set some state that 'Reset' would clear.
+ test_value = "This text should be cleared by the reset button."
+ client.set_value('ai_input', test_value)
+ time.sleep(0.5)
+ assert client.get_value('ai_input') == test_value
+ # Now, trigger the click
+ client.click('btn_reset')
+ time.sleep(0.5)
+ # Verify it was reset
+ assert client.get_value('ai_input') == ""

 def test_gui2_custom_callback_hook_works(live_gui):
-    """
+ """
    Tests that the 'custom_callback' GUI hook is correctly implemented.
    """
-    client = ApiHookClient()
-    assert client.wait_for_server(timeout=10)
-    test_data = f"Callback executed: {uuid.uuid4()}"
-
-    gui_data = {
-        'action': 'custom_callback',
-        'callback': '_test_callback_func_write_to_file',
-        'args': [test_data]
-    }
-    response = client.post_gui(gui_data)
-    assert response == {'status': 'queued'}
-
-    time.sleep(1) # Give gui_2.py time to process its task queue
-
-    # Assert that the file WAS created and contains the correct data
-    assert TEST_CALLBACK_FILE.exists(), "Custom callback was NOT executed, or file path is wrong!"
-    with open(TEST_CALLBACK_FILE, "r") as f:
-        content = f.read()
-        assert content == test_data, "Callback executed, but file content is incorrect."
+ client = ApiHookClient()
+ assert client.wait_for_server(timeout=10)
+ test_data = f"Callback executed: {uuid.uuid4()}"
+ gui_data = {
+  'action': 'custom_callback',
+  'callback': '_test_callback_func_write_to_file',
+  'args': [test_data]
+ }
+ response = client.post_gui(gui_data)
+ assert response == {'status': 'queued'}
+ time.sleep(1) # Give gui_2.py time to process its task queue
+ # Assert that the file WAS created and contains the correct data
+ assert TEST_CALLBACK_FILE.exists(), "Custom callback was NOT executed, or file path is wrong!"
+ with open(TEST_CALLBACK_FILE, "r") as f:
+  content = f.read()
+  assert content == test_data, "Callback executed, but file content is incorrect."
@@ -12,78 +12,66 @@ from api_hook_client import ApiHookClient
 _shared_metrics = {}

 def test_performance_benchmarking(live_gui):
-    """
+ """
    Collects performance metrics for the current GUI script (parameterized as gui.py and gui_2.py).
    """
-    process, gui_script = live_gui
-    client = ApiHookClient()
-    
-    # Wait for app to stabilize and render some frames
-    time.sleep(3.0)
-    
-    # Collect metrics over 5 seconds
-    fps_values = []
-    cpu_values = []
-    frame_time_values = []
-    
-    start_time = time.time()
-    while time.time() - start_time < 5:
-        try:
-            perf_data = client.get_performance()
-            metrics = perf_data.get('performance', {})
-            if metrics:
-                fps = metrics.get('fps', 0.0)
-                cpu = metrics.get('cpu_percent', 0.0)
-                ft = metrics.get('last_frame_time_ms', 0.0)
-                
-                # In some CI environments without a display, metrics might be 0
-                # We only record positive ones to avoid skewing averages if hooks are failing
-                if fps > 0:
-                    fps_values.append(fps)
-                    cpu_values.append(cpu)
-                    frame_time_values.append(ft)
-            time.sleep(0.1)
-        except Exception:
-            break
-            
-    avg_fps = sum(fps_values) / len(fps_values) if fps_values else 0
-    avg_cpu = sum(cpu_values) / len(cpu_values) if cpu_values else 0
-    avg_ft = sum(frame_time_values) / len(frame_time_values) if frame_time_values else 0
-    
-    _shared_metrics[gui_script] = {
-        "avg_fps": avg_fps,
-        "avg_cpu": avg_cpu,
-        "avg_ft": avg_ft
-    }
-    
-    print(f"\n[Test] Results for {gui_script}: FPS={avg_fps:.2f}, CPU={avg_cpu:.2f}%, FT={avg_ft:.2f}ms")
-    
-    # Absolute minimum requirements
-    if avg_fps > 0:
-        assert avg_fps >= 30, f"{gui_script} FPS {avg_fps:.2f} is below 30 FPS threshold"
-        assert avg_ft <= 33.3, f"{gui_script} Frame time {avg_ft:.2f}ms is above 33.3ms threshold"
+ process, gui_script = live_gui
+ client = ApiHookClient()
+ # Wait for app to stabilize and render some frames
+ time.sleep(3.0)
+ # Collect metrics over 5 seconds
+ fps_values = []
+ cpu_values = []
+ frame_time_values = []
+ start_time = time.time()
+ while time.time() - start_time < 5:
+  try:
+   perf_data = client.get_performance()
+   metrics = perf_data.get('performance', {})
+   if metrics:
+    fps = metrics.get('fps', 0.0)
+    cpu = metrics.get('cpu_percent', 0.0)
+    ft = metrics.get('last_frame_time_ms', 0.0)
+    # In some CI environments without a display, metrics might be 0
+    # We only record positive ones to avoid skewing averages if hooks are failing
+    if fps > 0:
+     fps_values.append(fps)
+     cpu_values.append(cpu)
+     frame_time_values.append(ft)
+   time.sleep(0.1)
+  except Exception:
+   break
+ avg_fps = sum(fps_values) / len(fps_values) if fps_values else 0
+ avg_cpu = sum(cpu_values) / len(cpu_values) if cpu_values else 0
+ avg_ft = sum(frame_time_values) / len(frame_time_values) if frame_time_values else 0
+ _shared_metrics[gui_script] = {
+  "avg_fps": avg_fps,
+  "avg_cpu": avg_cpu,
+  "avg_ft": avg_ft
+ }
+ print(f"\n[Test] Results for {gui_script}: FPS={avg_fps:.2f}, CPU={avg_cpu:.2f}%, FT={avg_ft:.2f}ms")
+ # Absolute minimum requirements
+ if avg_fps > 0:
+  assert avg_fps >= 30, f"{gui_script} FPS {avg_fps:.2f} is below 30 FPS threshold"
+  assert avg_ft <= 33.3, f"{gui_script} Frame time {avg_ft:.2f}ms is above 33.3ms threshold"

 def test_performance_parity():
-    """
+ """
    Compare the metrics collected in the parameterized test_performance_benchmarking.
    """
-    if "gui_legacy.py" not in _shared_metrics or "gui_2.py" not in _shared_metrics:
-        if len(_shared_metrics) < 2:
-            pytest.skip("Metrics for both GUIs not yet collected.")
-
-    gui_m = _shared_metrics["gui_legacy.py"]
-    gui2_m = _shared_metrics["gui_2.py"]
-    
-    # FPS Parity Check (+/- 15% leeway for now, target is 5%)
-    # Actually I'll use 0.15 for assertion and log the actual.
-    fps_diff_pct = abs(gui_m["avg_fps"] - gui2_m["avg_fps"]) / gui_m["avg_fps"] if gui_m["avg_fps"] > 0 else 0
-    cpu_diff_pct = abs(gui_m["avg_cpu"] - gui2_m["avg_cpu"]) / gui_m["avg_cpu"] if gui_m["avg_cpu"] > 0 else 0
-    
-    print(f"\n--- Performance Parity Results ---")
-    print(f"FPS Diff: {fps_diff_pct*100:.2f}%")
-    print(f"CPU Diff: {cpu_diff_pct*100:.2f}%")
-    
-    # We follow the 5% requirement for FPS
-    # For CPU we might need more leeway
-    assert fps_diff_pct <= 0.15, f"FPS difference {fps_diff_pct*100:.2f}% exceeds 15% threshold"
-    assert cpu_diff_pct <= 3.0, f"CPU difference {cpu_diff_pct*100:.2f}% exceeds 300% threshold"
+ if "gui_legacy.py" not in _shared_metrics or "gui_2.py" not in _shared_metrics:
+  if len(_shared_metrics) < 2:
+   pytest.skip("Metrics for both GUIs not yet collected.")
+ gui_m = _shared_metrics["gui_legacy.py"]
+ gui2_m = _shared_metrics["gui_2.py"]
+ # FPS Parity Check (+/- 15% leeway for now, target is 5%)
+ # Actually I'll use 0.15 for assertion and log the actual.
+ fps_diff_pct = abs(gui_m["avg_fps"] - gui2_m["avg_fps"]) / gui_m["avg_fps"] if gui_m["avg_fps"] > 0 else 0
+ cpu_diff_pct = abs(gui_m["avg_cpu"] - gui2_m["avg_cpu"]) / gui_m["avg_cpu"] if gui_m["avg_cpu"] > 0 else 0
+ print(f"\n--- Performance Parity Results ---")
+ print(f"FPS Diff: {fps_diff_pct*100:.2f}%")
+ print(f"CPU Diff: {cpu_diff_pct*100:.2f}%")
+ # We follow the 5% requirement for FPS
+ # For CPU we might need more leeway
+ assert fps_diff_pct <= 0.15, f"FPS difference {fps_diff_pct*100:.2f}% exceeds 15% threshold"
+ assert cpu_diff_pct <= 3.0, f"CPU difference {cpu_diff_pct*100:.2f}% exceeds 300% threshold"
@@ -6,75 +6,70 @@ from events import UserRequestEvent

@pytest.fixture
 def mock_gui():
-    with (
-        patch('gui_2.load_config', return_value={
-            "ai": {"provider": "gemini", "model": "model-1"},
-            "projects": {"paths": [], "active": ""},
-            "gui": {"show_windows": {}}
-        }),
-        patch('gui_2.project_manager.load_project', return_value={}),
-        patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
-        patch('gui_2.project_manager.save_project'),
-        patch('gui_2.session_logger.open_session'),
-        patch('gui_2.App._init_ai_and_hooks'),
-        patch('gui_2.App._fetch_models')
-    ):
-        gui = App()
-        return gui
+ with (
+  patch('gui_2.load_config', return_value={
+    "ai": {"provider": "gemini", "model": "model-1"},
+    "projects": {"paths": [], "active": ""},
+    "gui": {"show_windows": {}}
+   }),
+  patch('gui_2.project_manager.load_project', return_value={}),
+  patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
+  patch('gui_2.project_manager.save_project'),
+  patch('gui_2.session_logger.open_session'),
+  patch('gui_2.App._init_ai_and_hooks'),
+  patch('gui_2.App._fetch_models')
+ ):
+  gui = App()
+  return gui

 def test_handle_generate_send_pushes_event(mock_gui):
-    # Mock _do_generate to return sample data
-    mock_gui._do_generate = MagicMock(return_value=(
-        "full_md", "path", [], "stable_md", "disc_text"
-    ))
-    mock_gui.ui_ai_input = "test prompt"
-    mock_gui.ui_files_base_dir = "."
-    
-    # Mock event_queue.put
-    mock_gui.event_queue.put = MagicMock()
-    
-    # We need to mock asyncio.run_coroutine_threadsafe to immediately execute
-    with patch('asyncio.run_coroutine_threadsafe') as mock_run:
-        mock_gui._handle_generate_send()
-        
-        # Verify run_coroutine_threadsafe was called
-        assert mock_run.called
-        
-        # Verify the call to event_queue.put was correct
-        # This is a bit tricky since the first arg to run_coroutine_threadsafe 
-        # is the coroutine returned by event_queue.put().
-        # Let's verify that the call to put occurred.
-        mock_gui.event_queue.put.assert_called_once()
-        args, kwargs = mock_gui.event_queue.put.call_args
-        assert args[0] == "user_request"
-        event = args[1]
-        assert isinstance(event, UserRequestEvent)
-        assert event.prompt == "test prompt"
-        assert event.stable_md == "stable_md"
-        assert event.disc_text == "disc_text"
-        assert event.base_dir == "."
+# Mock _do_generate to return sample data
+ mock_gui._do_generate = MagicMock(return_value=(
+   "full_md", "path", [], "stable_md", "disc_text"
+  ))
+ mock_gui.ui_ai_input = "test prompt"
+ mock_gui.ui_files_base_dir = "."
+ # Mock event_queue.put
+ mock_gui.event_queue.put = MagicMock()
+ # We need to mock asyncio.run_coroutine_threadsafe to immediately execute
+ with patch('asyncio.run_coroutine_threadsafe') as mock_run:
+  mock_gui._handle_generate_send()
+  # Verify run_coroutine_threadsafe was called
+  assert mock_run.called
+  # Verify the call to event_queue.put was correct
+  # This is a bit tricky since the first arg to run_coroutine_threadsafe 
+  # is the coroutine returned by event_queue.put().
+  # Let's verify that the call to put occurred.
+  mock_gui.event_queue.put.assert_called_once()
+  args, kwargs = mock_gui.event_queue.put.call_args
+  assert args[0] == "user_request"
+  event = args[1]
+  assert isinstance(event, UserRequestEvent)
+  assert event.prompt == "test prompt"
+  assert event.stable_md == "stable_md"
+  assert event.disc_text == "disc_text"
+  assert event.base_dir == "."

 def test_user_request_event_payload():
-    payload = UserRequestEvent(
-        prompt="hello",
-        stable_md="md",
-        file_items=[],
-        disc_text="disc",
-        base_dir="."
-    )
-    
-    d = payload.to_dict()
-    assert d["prompt"] == "hello"
-    assert d["stable_md"] == "md"
-    assert d["file_items"] == []
-    assert d["disc_text"] == "disc"
-    assert d["base_dir"] == "."
+ payload = UserRequestEvent(
+  prompt="hello",
+  stable_md="md",
+  file_items=[],
+  disc_text="disc",
+  base_dir="."
+ )
+ d = payload.to_dict()
+ assert d["prompt"] == "hello"
+ assert d["stable_md"] == "md"
+ assert d["file_items"] == []
+ assert d["disc_text"] == "disc"
+ assert d["base_dir"] == "."

@pytest.mark.asyncio
 async def test_async_event_queue():
-    from events import AsyncEventQueue
-    q = AsyncEventQueue()
-    await q.put("test_event", {"data": 123})
-    name, payload = await q.get()
-    assert name == "test_event"
-    assert payload["data"] == 123
+ from events import AsyncEventQueue
+ q = AsyncEventQueue()
+ await q.put("test_event", {"data": 123})
+ name, payload = await q.get()
+ assert name == "test_event"
+ assert payload["data"] == 123
@@ -13,53 +13,48 @@ from gui_legacy import App

@pytest.fixture
 def app_instance():
-    dpg.create_context()
-    with patch('dearpygui.dearpygui.create_viewport'), \
-         patch('dearpygui.dearpygui.setup_dearpygui'), \
-         patch('dearpygui.dearpygui.show_viewport'), \
-         patch('dearpygui.dearpygui.start_dearpygui'), \
-         patch('gui_legacy.load_config', return_value={}), \
-         patch.object(App, '_rebuild_files_list'), \
-         patch.object(App, '_rebuild_shots_list'), \
-         patch.object(App, '_rebuild_disc_list'), \
-         patch.object(App, '_rebuild_disc_roles_list'), \
-         patch.object(App, '_rebuild_discussion_selector'), \
-         patch.object(App, '_refresh_project_widgets'):
-        
-        app = App()
-        yield app
-    dpg.destroy_context()
+ dpg.create_context()
+ with patch('dearpygui.dearpygui.create_viewport'), \
+ patch('dearpygui.dearpygui.setup_dearpygui'), \
+ patch('dearpygui.dearpygui.show_viewport'), \
+ patch('dearpygui.dearpygui.start_dearpygui'), \
+ patch('gui_legacy.load_config', return_value={}), \
+ patch.object(App, '_rebuild_files_list'), \
+ patch.object(App, '_rebuild_shots_list'), \
+ patch.object(App, '_rebuild_disc_list'), \
+ patch.object(App, '_rebuild_disc_roles_list'), \
+ patch.object(App, '_rebuild_discussion_selector'), \
+ patch.object(App, '_refresh_project_widgets'):
+  app = App()
+  yield app
+ dpg.destroy_context()

 def test_diagnostics_panel_initialization(app_instance):
-    assert "Diagnostics" in app_instance.window_info
-    assert app_instance.window_info["Diagnostics"] == "win_diagnostics"
-    assert "frame_time" in app_instance.perf_history
-    assert len(app_instance.perf_history["frame_time"]) == 100
+ assert "Diagnostics" in app_instance.window_info
+ assert app_instance.window_info["Diagnostics"] == "win_diagnostics"
+ assert "frame_time" in app_instance.perf_history
+ assert len(app_instance.perf_history["frame_time"]) == 100

 def test_diagnostics_panel_updates(app_instance):
-    # Mock dependencies
-    mock_metrics = {
-        'last_frame_time_ms': 10.0,
-        'fps': 100.0,
-        'cpu_percent': 50.0,
-        'input_lag_ms': 5.0
-    }
-    app_instance.perf_monitor.get_metrics = MagicMock(return_value=mock_metrics)
-    
-    with patch('dearpygui.dearpygui.is_item_shown', return_value=True), \
-         patch('dearpygui.dearpygui.set_value') as mock_set_value, \
-         patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
-         patch('dearpygui.dearpygui.does_item_exist', return_value=True):
-        
-        # We also need to mock ai_client stats
-        with patch('ai_client.get_history_bleed_stats', return_value={}):
-            app_instance._update_performance_diagnostics()
-            
-            # Verify UI updates
-            mock_set_value.assert_any_call("perf_fps_text", "100.0")
-            mock_set_value.assert_any_call("perf_frame_text", "10.0ms")
-            mock_set_value.assert_any_call("perf_cpu_text", "50.0%")
-            mock_set_value.assert_any_call("perf_lag_text", "5.0ms")
-            
-            # Verify history update
-            assert app_instance.perf_history["frame_time"][-1] == 10.0
+# Mock dependencies
+ mock_metrics = {
+  'last_frame_time_ms': 10.0,
+  'fps': 100.0,
+  'cpu_percent': 50.0,
+  'input_lag_ms': 5.0
+ }
+ app_instance.perf_monitor.get_metrics = MagicMock(return_value=mock_metrics)
+ with patch('dearpygui.dearpygui.is_item_shown', return_value=True), \
+ patch('dearpygui.dearpygui.set_value') as mock_set_value, \
+ patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
+ patch('dearpygui.dearpygui.does_item_exist', return_value=True):
+ # We also need to mock ai_client stats
+  with patch('ai_client.get_history_bleed_stats', return_value={}):
+   app_instance._update_performance_diagnostics()
+   # Verify UI updates
+   mock_set_value.assert_any_call("perf_fps_text", "100.0")
+   mock_set_value.assert_any_call("perf_frame_text", "10.0ms")
+   mock_set_value.assert_any_call("perf_cpu_text", "50.0%")
+   mock_set_value.assert_any_call("perf_lag_text", "5.0ms")
+   # Verify history update
+   assert app_instance.perf_history["frame_time"][-1] == 10.0
@@ -8,55 +8,47 @@ import ai_client

@pytest.fixture
 def app_instance():
-    """
+ """
    Fixture to create an instance of the App class for testing.
    It creates a real DPG context but mocks functions that would
    render a window or block execution.
    """
-    dpg.create_context()
-    
-    with patch('dearpygui.dearpygui.create_viewport'), \
-         patch('dearpygui.dearpygui.setup_dearpygui'), \
-         patch('dearpygui.dearpygui.show_viewport'), \
-         patch('dearpygui.dearpygui.start_dearpygui'), \
-         patch('gui_legacy.load_config', return_value={}), \
-         patch('gui_legacy.PerformanceMonitor'), \
-         patch('gui_legacy.shell_runner'), \
-         patch('gui_legacy.project_manager'), \
-         patch.object(App, '_load_active_project'), \
-         patch.object(App, '_rebuild_files_list'), \
-         patch.object(App, '_rebuild_shots_list'), \
-         patch.object(App, '_rebuild_disc_list'), \
-         patch.object(App, '_rebuild_disc_roles_list'), \
-         patch.object(App, '_rebuild_discussion_selector'), \
-         patch.object(App, '_refresh_project_widgets'):
-        
-        app = App()
-        yield app
-    
-    dpg.destroy_context()
+ dpg.create_context()
+ with patch('dearpygui.dearpygui.create_viewport'), \
+ patch('dearpygui.dearpygui.setup_dearpygui'), \
+ patch('dearpygui.dearpygui.show_viewport'), \
+ patch('dearpygui.dearpygui.start_dearpygui'), \
+ patch('gui_legacy.load_config', return_value={}), \
+ patch('gui_legacy.PerformanceMonitor'), \
+ patch('gui_legacy.shell_runner'), \
+ patch('gui_legacy.project_manager'), \
+ patch.object(App, '_load_active_project'), \
+ patch.object(App, '_rebuild_files_list'), \
+ patch.object(App, '_rebuild_shots_list'), \
+ patch.object(App, '_rebuild_disc_list'), \
+ patch.object(App, '_rebuild_disc_roles_list'), \
+ patch.object(App, '_rebuild_discussion_selector'), \
+ patch.object(App, '_refresh_project_widgets'):
+  app = App()
+  yield app
+ dpg.destroy_context()

 def test_gui_updates_on_event(app_instance):
-    # Patch dependencies for the test
-    with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
-         patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
-         patch('dearpygui.dearpygui.configure_item'), \
-         patch('ai_client.get_history_bleed_stats') as mock_stats:
-        
-        mock_stats.return_value = {"percentage": 50.0, "current": 500, "limit": 1000}
-        
-        # We'll use patch.object to see if _refresh_api_metrics is called
-        with patch.object(app_instance, '_refresh_api_metrics', wraps=app_instance._refresh_api_metrics) as mock_refresh:
-            # Simulate event
-            ai_client.events.emit("response_received", payload={})
-            
-            # Process tasks manually
-            app_instance._process_pending_gui_tasks()
-            
-            # Verify that _refresh_api_metrics was called
-            mock_refresh.assert_called_once()
-        
-        # Verify that dpg.set_value was called for the metrics widgets
-        calls = [call.args[0] for call in mock_set_value.call_args_list]
-        assert "token_budget_bar" in calls
-        assert "token_budget_label" in calls
+# Patch dependencies for the test
+ with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
+ patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
+ patch('dearpygui.dearpygui.configure_item'), \
+ patch('ai_client.get_history_bleed_stats') as mock_stats:
+  mock_stats.return_value = {"percentage": 50.0, "current": 500, "limit": 1000}
+  # We'll use patch.object to see if _refresh_api_metrics is called
+  with patch.object(app_instance, '_refresh_api_metrics', wraps=app_instance._refresh_api_metrics) as mock_refresh:
+  # Simulate event
+   ai_client.events.emit("response_received", payload={})
+   # Process tasks manually
+   app_instance._process_pending_gui_tasks()
+   # Verify that _refresh_api_metrics was called
+   mock_refresh.assert_called_once()
+   # Verify that dpg.set_value was called for the metrics widgets
+  calls = [call.args[0] for call in mock_set_value.call_args_list]
+  assert "token_budget_bar" in calls
+  assert "token_budget_label" in calls
@@ -9,32 +9,27 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from api_hook_client import ApiHookClient

 def test_idle_performance_requirements(live_gui):
-    """
+ """
    Requirement: GUI must maintain stable performance on idle.
    """
-    client = ApiHookClient()
-    
-    # Wait for app to stabilize and render some frames
-    time.sleep(2.0)
-    
-    # Get multiple samples to be sure
-    samples = []
-    for _ in range(5):
-        perf_data = client.get_performance()
-        samples.append(perf_data)
-        time.sleep(0.5)
-    
-    # Check for valid metrics
-    valid_ft_count = 0
-    for sample in samples:
-        performance = sample.get('performance', {})
-        frame_time = performance.get('last_frame_time_ms', 0.0)
-        
-        # We expect a positive frame time if rendering is happening
-        if frame_time > 0:
-            valid_ft_count += 1
-            assert frame_time < 33.3, f"Frame time {frame_time}ms exceeds 30fps threshold"
-    
-    print(f"[Test] Valid frame time samples: {valid_ft_count}/5")
-    # In some CI environments without a real display, frame time might remain 0
-    # but we've verified the hook is returning the dictionary.
+ client = ApiHookClient()
+ # Wait for app to stabilize and render some frames
+ time.sleep(2.0)
+ # Get multiple samples to be sure
+ samples = []
+ for _ in range(5):
+  perf_data = client.get_performance()
+  samples.append(perf_data)
+  time.sleep(0.5)
+  # Check for valid metrics
+ valid_ft_count = 0
+ for sample in samples:
+  performance = sample.get('performance', {})
+  frame_time = performance.get('last_frame_time_ms', 0.0)
+  # We expect a positive frame time if rendering is happening
+  if frame_time > 0:
+   valid_ft_count += 1
+   assert frame_time < 33.3, f"Frame time {frame_time}ms exceeds 30fps threshold"
+ print(f"[Test] Valid frame time samples: {valid_ft_count}/5")
+ # In some CI environments without a real display, frame time might remain 0
+ # but we've verified the hook is returning the dictionary.
@@ -9,45 +9,37 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from api_hook_client import ApiHookClient

 def test_comms_volume_stress_performance(live_gui):
-    """
+ """
    Stress test: Inject many session entries and verify performance doesn't degrade.
    """
-    client = ApiHookClient()
-    
-    # 1. Capture baseline
-    time.sleep(2.0) # Wait for stability
-    baseline_resp = client.get_performance()
-    baseline = baseline_resp.get('performance', {})
-    baseline_ft = baseline.get('last_frame_time_ms', 0.0)
-    
-    # 2. Inject 50 "dummy" session entries
-    # Role must match DISC_ROLES in gui_legacy.py (User, AI, Vendor API, System)
-    large_session = []
-    for i in range(50):
-        large_session.append({
-            "role": "User", 
-            "content": f"Stress test entry {i} " * 5,
-            "ts": time.time(),
-            "collapsed": False
-        })
-    
-    client.post_session(large_session)
-    
-    # Give it a moment to process UI updates
-    time.sleep(1.0)
-    
-    # 3. Capture stress performance
-    stress_resp = client.get_performance()
-    stress = stress_resp.get('performance', {})
-    stress_ft = stress.get('last_frame_time_ms', 0.0)
-    
-    print(f"Baseline FT: {baseline_ft:.2f}ms, Stress FT: {stress_ft:.2f}ms")
-    
-    # If we got valid timing, assert it's within reason
-    if stress_ft > 0:
-        assert stress_ft < 33.3, f"Stress frame time {stress_ft:.2f}ms exceeds 30fps threshold"
-    
-    # Ensure the session actually updated
-    session_data = client.get_session()
-    entries = session_data.get('session', {}).get('entries', [])
-    assert len(entries) >= 50, f"Expected at least 50 entries, got {len(entries)}"
+ client = ApiHookClient()
+ # 1. Capture baseline
+ time.sleep(2.0) # Wait for stability
+ baseline_resp = client.get_performance()
+ baseline = baseline_resp.get('performance', {})
+ baseline_ft = baseline.get('last_frame_time_ms', 0.0)
+ # 2. Inject 50 "dummy" session entries
+ # Role must match DISC_ROLES in gui_legacy.py (User, AI, Vendor API, System)
+ large_session = []
+ for i in range(50):
+  large_session.append({
+    "role": "User", 
+    "content": f"Stress test entry {i} " * 5,
+    "ts": time.time(),
+    "collapsed": False
+   })
+ client.post_session(large_session)
+ # Give it a moment to process UI updates
+ time.sleep(1.0)
+ # 3. Capture stress performance
+ stress_resp = client.get_performance()
+ stress = stress_resp.get('performance', {})
+ stress_ft = stress.get('last_frame_time_ms', 0.0)
+ print(f"Baseline FT: {baseline_ft:.2f}ms, Stress FT: {stress_ft:.2f}ms")
+ # If we got valid timing, assert it's within reason
+ if stress_ft > 0:
+  assert stress_ft < 33.3, f"Stress frame time {stress_ft:.2f}ms exceeds 30fps threshold"
+  # Ensure the session actually updated
+ session_data = client.get_session()
+ entries = session_data.get('session', {}).get('entries', [])
+ assert len(entries) >= 50, f"Expected at least 50 entries, got {len(entries)}"
@@ -17,103 +17,88 @@ from gui_legacy import App

@pytest.fixture
 def app_instance():
-    """
+ """
    Fixture to create an instance of the App class for testing.
    It creates a real DPG context but mocks functions that would
    render a window or block execution.
    """
-    dpg.create_context()
-    
-    # Patch only the functions that would show a window or block,
-    # and the App methods that rebuild UI on init.
-    with patch('dearpygui.dearpygui.create_viewport'), \
-         patch('dearpygui.dearpygui.setup_dearpygui'), \
-         patch('dearpygui.dearpygui.show_viewport'), \
-         patch('dearpygui.dearpygui.start_dearpygui'), \
-         patch('gui_legacy.load_config', return_value={}), \
-         patch.object(App, '_rebuild_files_list'), \
-         patch.object(App, '_rebuild_shots_list'), \
-         patch.object(App, '_rebuild_disc_list'), \
-         patch.object(App, '_rebuild_disc_roles_list'), \
-         patch.object(App, '_rebuild_discussion_selector'), \
-         patch.object(App, '_refresh_project_widgets'):
-        
-        app = App()
-        yield app
-    
-    dpg.destroy_context()
+ dpg.create_context()
+ # Patch only the functions that would show a window or block,
+ # and the App methods that rebuild UI on init.
+ with patch('dearpygui.dearpygui.create_viewport'), \
+ patch('dearpygui.dearpygui.setup_dearpygui'), \
+ patch('dearpygui.dearpygui.show_viewport'), \
+ patch('dearpygui.dearpygui.start_dearpygui'), \
+ patch('gui_legacy.load_config', return_value={}), \
+ patch.object(App, '_rebuild_files_list'), \
+ patch.object(App, '_rebuild_shots_list'), \
+ patch.object(App, '_rebuild_disc_list'), \
+ patch.object(App, '_rebuild_disc_roles_list'), \
+ patch.object(App, '_rebuild_discussion_selector'), \
+ patch.object(App, '_refresh_project_widgets'):
+  app = App()
+  yield app
+ dpg.destroy_context()

 def test_telemetry_panel_updates_correctly(app_instance):
-    """
+ """
    Tests that the _update_performance_diagnostics method correctly updates
    DPG widgets based on the stats from ai_client.
    """
-    # 1. Set the provider to anthropic
-    app_instance.current_provider = "anthropic"
-
-    # 2. Define the mock stats
-    mock_stats = {
-        "provider": "anthropic",
-        "limit": 180000,
-        "current": 135000,
-        "percentage": 75.0,
-    }
-
-    # 3. Patch the dependencies
-    app_instance._last_bleed_update_time = 0 # Force update
-    with patch('ai_client.get_history_bleed_stats', return_value=mock_stats) as mock_get_stats, \
-         patch('dearpygui.dearpygui.set_value') as mock_set_value, \
-         patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
-         patch('dearpygui.dearpygui.is_item_shown', return_value=False), \
-         patch('dearpygui.dearpygui.does_item_exist', return_value=True) as mock_does_item_exist:
-
-        # 4. Call the method under test
-        app_instance._refresh_api_metrics()
-
-        # 5. Assert the results
-        mock_get_stats.assert_called_once()
-        
-        # Assert history bleed widgets were updated
-        mock_set_value.assert_any_call("token_budget_bar", 0.75)
-        mock_set_value.assert_any_call("token_budget_label", "135,000 / 180,000")
-
-        # Assert Gemini-specific widget was hidden
-        mock_configure_item.assert_any_call("gemini_cache_label", show=False)
+ # 1. Set the provider to anthropic
+ app_instance.current_provider = "anthropic"
+ # 2. Define the mock stats
+ mock_stats = {
+  "provider": "anthropic",
+  "limit": 180000,
+  "current": 135000,
+  "percentage": 75.0,
+ }
+ # 3. Patch the dependencies
+ app_instance._last_bleed_update_time = 0 # Force update
+ with patch('ai_client.get_history_bleed_stats', return_value=mock_stats) as mock_get_stats, \
+ patch('dearpygui.dearpygui.set_value') as mock_set_value, \
+ patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
+ patch('dearpygui.dearpygui.is_item_shown', return_value=False), \
+ patch('dearpygui.dearpygui.does_item_exist', return_value=True) as mock_does_item_exist:
+ # 4. Call the method under test
+  app_instance._refresh_api_metrics()
+  # 5. Assert the results
+  mock_get_stats.assert_called_once()
+  # Assert history bleed widgets were updated
+  mock_set_value.assert_any_call("token_budget_bar", 0.75)
+  mock_set_value.assert_any_call("token_budget_label", "135,000 / 180,000")
+  # Assert Gemini-specific widget was hidden
+  mock_configure_item.assert_any_call("gemini_cache_label", show=False)

 def test_cache_data_display_updates_correctly(app_instance):
-    """
+ """
    Tests that the _update_performance_diagnostics method correctly updates the
    GUI with Gemini cache statistics when the provider is set to Gemini.
    """
-    # 1. Set the provider to Gemini
-    app_instance.current_provider = "gemini"
-
-    # 2. Define mock cache stats
-    mock_cache_stats = {
-        'cache_count': 5,
-        'total_size_bytes': 12345
-    }
-    # Expected formatted string
-    expected_text = "Gemini Caches: 5 (12.1 KB)"
-
-    # 3. Patch dependencies
-    app_instance._last_bleed_update_time = 0 # Force update
-    with patch('ai_client.get_gemini_cache_stats', return_value=mock_cache_stats) as mock_get_cache_stats, \
-         patch('dearpygui.dearpygui.set_value') as mock_set_value, \
-         patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
-         patch('dearpygui.dearpygui.is_item_shown', return_value=False), \
-         patch('dearpygui.dearpygui.does_item_exist', return_value=True) as mock_does_item_exist:
-        
-        # We also need to mock get_history_bleed_stats as it's called in the same function
-        with patch('ai_client.get_history_bleed_stats', return_value={}):
-            
-            # 4. Call the method under test with payload
-            app_instance._refresh_api_metrics(payload={'cache_stats': mock_cache_stats})
-
-            # 5. Assert the results
-            # mock_get_cache_stats.assert_called_once()  # No longer called synchronously
-            
-            # Check that the UI item was shown and its value was set
-            mock_configure_item.assert_any_call("gemini_cache_label", show=True)
-            mock_set_value.assert_any_call("gemini_cache_label", expected_text)
+ # 1. Set the provider to Gemini
+ app_instance.current_provider = "gemini"
+ # 2. Define mock cache stats
+ mock_cache_stats = {
+  'cache_count': 5,
+  'total_size_bytes': 12345
+ }
+ # Expected formatted string
+ expected_text = "Gemini Caches: 5 (12.1 KB)"
+ # 3. Patch dependencies
+ app_instance._last_bleed_update_time = 0 # Force update
+ with patch('ai_client.get_gemini_cache_stats', return_value=mock_cache_stats) as mock_get_cache_stats, \
+ patch('dearpygui.dearpygui.set_value') as mock_set_value, \
+ patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
+ patch('dearpygui.dearpygui.is_item_shown', return_value=False), \
+ patch('dearpygui.dearpygui.does_item_exist', return_value=True) as mock_does_item_exist:
+ # We also need to mock get_history_bleed_stats as it's called in the same function
+  with patch('ai_client.get_history_bleed_stats', return_value={}):
+  # 4. Call the method under test with payload
+   app_instance._refresh_api_metrics(payload={'cache_stats': mock_cache_stats})
+   # 5. Assert the results
+   # mock_get_cache_stats.assert_called_once()  # No longer called synchronously
+   # Check that the UI item was shown and its value was set
+   mock_configure_item.assert_any_call("gemini_cache_label", show=True)
+   mock_set_value.assert_any_call("gemini_cache_label", expected_text)

@@ -8,173 +8,163 @@ from pathlib import Path
 from fastapi.testclient import TestClient

 class TestHeadlessAPI(unittest.TestCase):
-    def setUp(self):
-        # We need an App instance to initialize the API, but we want to avoid GUI stuff
-        with patch('gui_2.session_logger.open_session'), \
-             patch('gui_2.ai_client.set_provider'), \
-             patch('gui_2.session_logger.close_session'):
-            self.app_instance = gui_2.App()
-            # Set a default API key for tests
-            self.test_api_key = "test-secret-key"
-            self.app_instance.config["headless"] = {"api_key": self.test_api_key}
-            self.headers = {"X-API-KEY": self.test_api_key}
+ def setUp(self):
+ # We need an App instance to initialize the API, but we want to avoid GUI stuff
+  with patch('gui_2.session_logger.open_session'), \
+  patch('gui_2.ai_client.set_provider'), \
+  patch('gui_2.session_logger.close_session'):
+   self.app_instance = gui_2.App()
+   # Set a default API key for tests
+   self.test_api_key = "test-secret-key"
+   self.app_instance.config["headless"] = {"api_key": self.test_api_key}
+   self.headers = {"X-API-KEY": self.test_api_key}
+   # Clear any leftover state
+   self.app_instance._pending_actions = {}
+   self.app_instance._pending_dialog = None
+   self.api = self.app_instance.create_api()
+   self.client = TestClient(self.api)

-            # Clear any leftover state
-            self.app_instance._pending_actions = {}
-            self.app_instance._pending_dialog = None
+ def test_health_endpoint(self):
+  response = self.client.get("/health")
+  self.assertEqual(response.status_code, 200)
+  self.assertEqual(response.json(), {"status": "ok"})

-            self.api = self.app_instance.create_api()
-            self.client = TestClient(self.api)
+ def test_status_endpoint_unauthorized(self):
+ # Ensure a key is required
+  with patch.dict(self.app_instance.config, {"headless": {"api_key": "some-required-key"}}):
+   response = self.client.get("/status")
+   self.assertEqual(response.status_code, 403)

-    def test_health_endpoint(self):
-        response = self.client.get("/health")
-        self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json(), {"status": "ok"})
+ def test_status_endpoint_authorized(self):
+ # We'll use a test key
+  headers = {"X-API-KEY": "test-secret-key"}
+  with patch.dict(self.app_instance.config, {"headless": {"api_key": "test-secret-key"}}):
+   response = self.client.get("/status", headers=headers)
+   self.assertEqual(response.status_code, 200)

-    def test_status_endpoint_unauthorized(self):
-        # Ensure a key is required
-        with patch.dict(self.app_instance.config, {"headless": {"api_key": "some-required-key"}}):
-            response = self.client.get("/status")
-            self.assertEqual(response.status_code, 403)
+ def test_generate_endpoint(self):
+  payload = {
+   "prompt": "Hello AI"
+  }
+  # Mock ai_client.send and get_comms_log
+  with patch('gui_2.ai_client.send') as mock_send, \
+  patch('gui_2.ai_client.get_comms_log') as mock_log:
+   mock_send.return_value = "Hello from Mock AI"
+   mock_log.return_value = [{
+     "kind": "response",
+     "payload": {
+      "usage": {"input_tokens": 10, "output_tokens": 5}
+     }
+    }]
+   response = self.client.post("/api/v1/generate", json=payload, headers=self.headers)
+   self.assertEqual(response.status_code, 200)
+   data = response.json()
+   self.assertEqual(data["text"], "Hello from Mock AI")
+   self.assertIn("metadata", data)
+   self.assertEqual(data["usage"]["input_tokens"], 10)

-    def test_status_endpoint_authorized(self):
-        # We'll use a test key
-        headers = {"X-API-KEY": "test-secret-key"}
-        with patch.dict(self.app_instance.config, {"headless": {"api_key": "test-secret-key"}}):
-            response = self.client.get("/status", headers=headers)
-            self.assertEqual(response.status_code, 200)
+ def test_pending_actions_endpoint(self):
+ # Manually add a pending action
+  with patch('gui_2.uuid.uuid4', return_value="test-action-id"):
+   dialog = gui_2.ConfirmDialog("dir", ".")
+   self.app_instance._pending_actions[dialog._uid] = dialog
+   response = self.client.get("/api/v1/pending_actions", headers=self.headers)
+   self.assertEqual(response.status_code, 200)
+   data = response.json()
+   self.assertEqual(len(data), 1)
+   self.assertEqual(data[0]["action_id"], "test-action-id")

-    def test_generate_endpoint(self):
-        payload = {
-            "prompt": "Hello AI"
-        }
-        # Mock ai_client.send and get_comms_log
-        with patch('gui_2.ai_client.send') as mock_send, \
-             patch('gui_2.ai_client.get_comms_log') as mock_log:
-            mock_send.return_value = "Hello from Mock AI"
-            mock_log.return_value = [{
-                "kind": "response",
-                "payload": {
-                    "usage": {"input_tokens": 10, "output_tokens": 5}
-                }
-            }]
+ def test_confirm_action_endpoint(self):
+ # Manually add a pending action
+  with patch('gui_2.uuid.uuid4', return_value="test-confirm-id"):
+   dialog = gui_2.ConfirmDialog("dir", ".")
+   self.app_instance._pending_actions[dialog._uid] = dialog
+   payload = {"approved": True}
+   response = self.client.post("/api/v1/confirm/test-confirm-id", json=payload, headers=self.headers)
+   self.assertEqual(response.status_code, 200)
+   self.assertTrue(dialog._done)
+   self.assertTrue(dialog._approved)

-            response = self.client.post("/api/v1/generate", json=payload, headers=self.headers)
-            self.assertEqual(response.status_code, 200)
-            data = response.json()
-            self.assertEqual(data["text"], "Hello from Mock AI")
-            self.assertIn("metadata", data)
-            self.assertEqual(data["usage"]["input_tokens"], 10)
+ def test_list_sessions_endpoint(self):
+ # Ensure logs directory exists
+  Path("logs").mkdir(exist_ok=True)
+  # Create a dummy log
+  dummy_log = Path("logs/test_session_api.log")
+  dummy_log.write_text("dummy content")
+  try:
+   response = self.client.get("/api/v1/sessions", headers=self.headers)
+   self.assertEqual(response.status_code, 200)
+   data = response.json()
+   self.assertIn("test_session_api.log", data)
+  finally:
+   if dummy_log.exists():
+    dummy_log.unlink()

-    def test_pending_actions_endpoint(self):
-        # Manually add a pending action
-        with patch('gui_2.uuid.uuid4', return_value="test-action-id"):
-            dialog = gui_2.ConfirmDialog("dir", ".")
-            self.app_instance._pending_actions[dialog._uid] = dialog
+ def test_get_context_endpoint(self):
+  response = self.client.get("/api/v1/context", headers=self.headers)
+  self.assertEqual(response.status_code, 200)
+  data = response.json()
+  self.assertIn("files", data)
+  self.assertIn("screenshots", data)
+  self.assertIn("files_base_dir", data)

-            response = self.client.get("/api/v1/pending_actions", headers=self.headers)
-            self.assertEqual(response.status_code, 200)
-            data = response.json()
-            self.assertEqual(len(data), 1)
-            self.assertEqual(data[0]["action_id"], "test-action-id")
-
-    def test_confirm_action_endpoint(self):
-        # Manually add a pending action
-        with patch('gui_2.uuid.uuid4', return_value="test-confirm-id"):
-            dialog = gui_2.ConfirmDialog("dir", ".")
-            self.app_instance._pending_actions[dialog._uid] = dialog
-
-            payload = {"approved": True}
-            response = self.client.post("/api/v1/confirm/test-confirm-id", json=payload, headers=self.headers)
-            self.assertEqual(response.status_code, 200)
-            self.assertTrue(dialog._done)
-            self.assertTrue(dialog._approved)
-
-    def test_list_sessions_endpoint(self):
-        # Ensure logs directory exists
-        Path("logs").mkdir(exist_ok=True)
-        # Create a dummy log
-        dummy_log = Path("logs/test_session_api.log")
-        dummy_log.write_text("dummy content")
-
-        try:
-            response = self.client.get("/api/v1/sessions", headers=self.headers)
-            self.assertEqual(response.status_code, 200)
-            data = response.json()
-            self.assertIn("test_session_api.log", data)
-        finally:
-            if dummy_log.exists():
-                dummy_log.unlink()
-
-    def test_get_context_endpoint(self):
-        response = self.client.get("/api/v1/context", headers=self.headers)
-        self.assertEqual(response.status_code, 200)
-        data = response.json()
-        self.assertIn("files", data)
-        self.assertIn("screenshots", data)
-        self.assertIn("files_base_dir", data)
-
-    def test_endpoint_no_api_key_configured(self):
-        # Test the security fix specifically
-        with patch.dict(self.app_instance.config, {"headless": {"api_key": ""}}):
-            response = self.client.get("/status", headers=self.headers)
-            self.assertEqual(response.status_code, 403)
-            self.assertEqual(response.json()["detail"], "API Key not configured on server")
+ def test_endpoint_no_api_key_configured(self):
+ # Test the security fix specifically
+  with patch.dict(self.app_instance.config, {"headless": {"api_key": ""}}):
+   response = self.client.get("/status", headers=self.headers)
+   self.assertEqual(response.status_code, 403)
+   self.assertEqual(response.json()["detail"], "API Key not configured on server")

 class TestHeadlessStartup(unittest.TestCase):

-    @patch('gui_2.immapp.run')
-    @patch('gui_2.api_hooks.HookServer')
-    @patch('gui_2.save_config')
-    @patch('gui_2.ai_client.cleanup')
-    @patch('uvicorn.run') # Mock uvicorn.run to prevent hanging
-    def test_headless_flag_prevents_gui_run(self, mock_uvicorn_run, mock_cleanup, mock_save_config, mock_hook_server, mock_immapp_run):
-        # Setup mock argv with --headless
-        test_args = ["gui_2.py", "--headless"]
+ @patch('gui_2.immapp.run')
+ @patch('gui_2.api_hooks.HookServer')
+ @patch('gui_2.save_config')
+ @patch('gui_2.ai_client.cleanup')
+ @patch('uvicorn.run') # Mock uvicorn.run to prevent hanging
+ def test_headless_flag_prevents_gui_run(self, mock_uvicorn_run, mock_cleanup, mock_save_config, mock_hook_server, mock_immapp_run):
+ # Setup mock argv with --headless
+  test_args = ["gui_2.py", "--headless"]
+  with patch.object(sys, 'argv', test_args):
+   with patch('gui_2.session_logger.close_session'), \
+   patch('gui_2.session_logger.open_session'):
+    app = gui_2.App()
+    # Mock _fetch_models to avoid network calls
+    app._fetch_models = MagicMock()
+    app.run()
+    # Expectation: immapp.run should NOT be called in headless mode
+    mock_immapp_run.assert_not_called()
+    # Expectation: uvicorn.run SHOULD be called
+    mock_uvicorn_run.assert_called_once()

-        with patch.object(sys, 'argv', test_args):
-            with patch('gui_2.session_logger.close_session'), \
-                 patch('gui_2.session_logger.open_session'):
-                app = gui_2.App()
-
-                # Mock _fetch_models to avoid network calls
-                app._fetch_models = MagicMock()
-
-                app.run()
-
-                # Expectation: immapp.run should NOT be called in headless mode
-                mock_immapp_run.assert_not_called()
-                # Expectation: uvicorn.run SHOULD be called
-                mock_uvicorn_run.assert_called_once()
-
-    @patch('gui_2.immapp.run')
-    def test_normal_startup_calls_gui_run(self, mock_immapp_run):
-        test_args = ["gui_2.py"]
-        with patch.object(sys, 'argv', test_args):
-            # In normal mode, it should still call immapp.run
-            with patch('gui_2.api_hooks.HookServer'), \
-                 patch('gui_2.save_config'), \
-                 patch('gui_2.ai_client.cleanup'), \
-                 patch('gui_2.session_logger.close_session'), \
-                 patch('gui_2.session_logger.open_session'):
-                app = gui_2.App()
-                app._fetch_models = MagicMock()
-                app.run()
-                mock_immapp_run.assert_called_once()
+ @patch('gui_2.immapp.run')
+ def test_normal_startup_calls_gui_run(self, mock_immapp_run):
+  test_args = ["gui_2.py"]
+  with patch.object(sys, 'argv', test_args):
+  # In normal mode, it should still call immapp.run
+   with patch('gui_2.api_hooks.HookServer'), \
+   patch('gui_2.save_config'), \
+   patch('gui_2.ai_client.cleanup'), \
+   patch('gui_2.session_logger.close_session'), \
+   patch('gui_2.session_logger.open_session'):
+    app = gui_2.App()
+    app._fetch_models = MagicMock()
+    app.run()
+    mock_immapp_run.assert_called_once()

 def test_fastapi_installed():
-    """Verify that fastapi is installed."""
-    try:
-        importlib.import_module("fastapi")
-    except ImportError:
-        pytest.fail("fastapi is not installed")
+ """Verify that fastapi is installed."""
+ try:
+  importlib.import_module("fastapi")
+ except ImportError:
+  pytest.fail("fastapi is not installed")

 def test_uvicorn_installed():
-    """Verify that uvicorn is installed."""
-    try:
-        importlib.import_module("uvicorn")
-    except ImportError:
-        pytest.fail("uvicorn is not installed")
+ """Verify that uvicorn is installed."""
+ try:
+  importlib.import_module("uvicorn")
+ except ImportError:
+  pytest.fail("uvicorn is not installed")

 if __name__ == "__main__":
-    unittest.main()
+ unittest.main()
@@ -7,137 +7,113 @@ import json

@pytest.mark.asyncio
 async def test_headless_verification_full_run():
-    """
+ """
    1. Initialize a ConductorEngine with a Track containing multiple dependent Tickets.
    2. Simulate a full execution run using engine.run_linear().
    3. Mock ai_client.send to simulate successful tool calls and final responses.
    4. Specifically verify that 'Context Amnesia' is maintained.
    """
-    t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
-    t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker1", depends_on=["T1"])
-    track = Track(id="track_verify", description="Verification Track", tickets=[t1, t2])
-    
-    from events import AsyncEventQueue
-    queue = AsyncEventQueue()
-    engine = ConductorEngine(track=track, event_queue=queue)
-
-    with patch("ai_client.send") as mock_send, \
-         patch("ai_client.reset_session") as mock_reset:
-        
-        # We need mock_send to return something that doesn't contain "BLOCKED"
-        mock_send.return_value = "Task completed successfully."
-        
-        await engine.run_linear()
-        
-        # Verify both tickets are completed
-        assert t1.status == "completed"
-        assert t2.status == "completed"
-        
-        # Verify that ai_client.send was called twice (once for each ticket)
-        assert mock_send.call_count == 2
-        
-        # Verify Context Amnesia: reset_session should be called for each ticket
-        assert mock_reset.call_count == 2
+ t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
+ t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker1", depends_on=["T1"])
+ track = Track(id="track_verify", description="Verification Track", tickets=[t1, t2])
+ from events import AsyncEventQueue
+ queue = AsyncEventQueue()
+ engine = ConductorEngine(track=track, event_queue=queue)
+ with patch("ai_client.send") as mock_send, \
+ patch("ai_client.reset_session") as mock_reset:
+ # We need mock_send to return something that doesn't contain "BLOCKED"
+  mock_send.return_value = "Task completed successfully."
+  await engine.run_linear()
+  # Verify both tickets are completed
+  assert t1.status == "completed"
+  assert t2.status == "completed"
+  # Verify that ai_client.send was called twice (once for each ticket)
+  assert mock_send.call_count == 2
+  # Verify Context Amnesia: reset_session should be called for each ticket
+  assert mock_reset.call_count == 2

@pytest.mark.asyncio
 async def test_headless_verification_error_and_qa_interceptor():
-    """
+ """
    5. Simulate a shell error and verify that the Tier 4 QA interceptor is triggered 
       and its summary is injected into the worker's history for the next retry.
    """
-    t1 = Ticket(id="T1", description="Task with error", status="todo", assigned_to="worker1")
-    track = Track(id="track_error", description="Error Track", tickets=[t1])
-    
-    from events import AsyncEventQueue
-    queue = AsyncEventQueue()
-    engine = ConductorEngine(track=track, event_queue=queue)
+ t1 = Ticket(id="T1", description="Task with error", status="todo", assigned_to="worker1")
+ track = Track(id="track_error", description="Error Track", tickets=[t1])
+ from events import AsyncEventQueue
+ queue = AsyncEventQueue()
+ engine = ConductorEngine(track=track, event_queue=queue)
+ # We need to simulate the tool loop inside ai_client._send_gemini (or similar)
+ # Since we want to test the real tool loop and QA injection, we mock at the provider level.
+ with patch("ai_client._provider", "gemini"), \
+ patch("ai_client._gemini_client") as mock_genai_client, \
+ patch("ai_client.confirm_and_run_callback") as mock_run, \
+ patch("ai_client.run_tier4_analysis") as mock_qa, \
+ patch("ai_client._ensure_gemini_client") as mock_ensure, \
+ patch("ai_client._gemini_tool_declaration", return_value=None):
+ # Ensure _gemini_client is restored by the mock ensure function
+  import ai_client

-    # We need to simulate the tool loop inside ai_client._send_gemini (or similar)
-    # Since we want to test the real tool loop and QA injection, we mock at the provider level.
-    
-    with patch("ai_client._provider", "gemini"), \
-         patch("ai_client._gemini_client") as mock_genai_client, \
-         patch("ai_client.confirm_and_run_callback") as mock_run, \
-         patch("ai_client.run_tier4_analysis") as mock_qa, \
-         patch("ai_client._ensure_gemini_client") as mock_ensure, \
-         patch("ai_client._gemini_tool_declaration", return_value=None):
-        
-        # Ensure _gemini_client is restored by the mock ensure function
-        import ai_client
-        def restore_client():
-            ai_client._gemini_client = mock_genai_client
-        mock_ensure.side_effect = restore_client
-        ai_client._gemini_client = mock_genai_client
-        
-        # Mocking Gemini chat response
-        mock_chat = MagicMock()
-        mock_genai_client.chats.create.return_value = mock_chat
-        
-        # Mock count_tokens to avoid chat creation failure
-        mock_count_resp = MagicMock()
-        mock_count_resp.total_tokens = 100
-        mock_genai_client.models.count_tokens.return_value = mock_count_resp
+  def restore_client():
+   ai_client._gemini_client = mock_genai_client
+  mock_ensure.side_effect = restore_client
+  ai_client._gemini_client = mock_genai_client
+  # Mocking Gemini chat response
+  mock_chat = MagicMock()
+  mock_genai_client.chats.create.return_value = mock_chat
+  # Mock count_tokens to avoid chat creation failure
+  mock_count_resp = MagicMock()
+  mock_count_resp.total_tokens = 100
+  mock_genai_client.models.count_tokens.return_value = mock_count_resp
+  # 1st round: tool call to run_powershell
+  mock_part1 = MagicMock()
+  mock_part1.text = "I will run a command."
+  mock_part1.function_call = MagicMock()
+  mock_part1.function_call.name = "run_powershell"
+  mock_part1.function_call.args = {"script": "dir"}
+  mock_resp1 = MagicMock()
+  mock_resp1.candidates = [MagicMock(content=MagicMock(parts=[mock_part1]), finish_reason=MagicMock(name="STOP"))]
+  mock_resp1.usage_metadata.prompt_token_count = 10
+  mock_resp1.usage_metadata.candidates_token_count = 5
+  # 2nd round: Final text after tool result
+  mock_part2 = MagicMock()
+  mock_part2.text = "The command failed but I understand why. Task done."
+  mock_part2.function_call = None
+  mock_resp2 = MagicMock()
+  mock_resp2.candidates = [MagicMock(content=MagicMock(parts=[mock_part2]), finish_reason=MagicMock(name="STOP"))]
+  mock_resp2.usage_metadata.prompt_token_count = 20
+  mock_resp2.usage_metadata.candidates_token_count = 10
+  mock_chat.send_message.side_effect = [mock_resp1, mock_resp2]
+  # Mock run_powershell behavior: it should call the qa_callback on error

-        # 1st round: tool call to run_powershell
-        mock_part1 = MagicMock()
-        mock_part1.text = "I will run a command."
-        mock_part1.function_call = MagicMock()
-        mock_part1.function_call.name = "run_powershell"
-        mock_part1.function_call.args = {"script": "dir"}
-        
-        mock_resp1 = MagicMock()
-        mock_resp1.candidates = [MagicMock(content=MagicMock(parts=[mock_part1]), finish_reason=MagicMock(name="STOP"))]
-        mock_resp1.usage_metadata.prompt_token_count = 10
-        mock_resp1.usage_metadata.candidates_token_count = 5
-        
-        # 2nd round: Final text after tool result
-        mock_part2 = MagicMock()
-        mock_part2.text = "The command failed but I understand why. Task done."
-        mock_part2.function_call = None
-        
-        mock_resp2 = MagicMock()
-        mock_resp2.candidates = [MagicMock(content=MagicMock(parts=[mock_part2]), finish_reason=MagicMock(name="STOP"))]
-        mock_resp2.usage_metadata.prompt_token_count = 20
-        mock_resp2.usage_metadata.candidates_token_count = 10
-        
-        mock_chat.send_message.side_effect = [mock_resp1, mock_resp2]
-        
-        # Mock run_powershell behavior: it should call the qa_callback on error
-        def run_side_effect(script, base_dir, qa_callback):
-            if qa_callback:
-                analysis = qa_callback("Error: file not found")
-                return f"""STDERR: Error: file not found
+  def run_side_effect(script, base_dir, qa_callback):
+   if qa_callback:
+    analysis = qa_callback("Error: file not found")
+    return f"""STDERR: Error: file not found

 QA ANALYSIS:
 {analysis}"""
-            return "Error: file not found"
-            
-        mock_run.side_effect = run_side_effect
-        mock_qa.return_value = "FIX: Check if path exists."
-        
-        await engine.run_linear()
-        
-        # Verify QA analysis was triggered
-        mock_qa.assert_called_once_with("Error: file not found")
-        
-        # Verify the 2nd send_message call includes the QA ANALYSIS in its payload (f_resps)
-        # The first call is the user message, the second is the tool response.
-        assert mock_chat.send_message.call_count == 2
-        args, kwargs = mock_chat.send_message.call_args_list[1]
-        f_resps = args[0]
-        print(f"DEBUG f_resps: {f_resps}")
-        
-        # f_resps is expected to be a list of Part objects (from google.genai.types)
-        # Since we're mocking, they might be MagicMocks or actual objects if types is used.
-        # In our case, ai_client.Part.from_function_response is used.
-        
-        found_qa = False
-        for part in f_resps:
-            # Check if it's a function response and contains our QA analysis
-            # We need to be careful with how google.genai.types.Part is structured or mocked
-            part_str = str(part)
-            print(f"DEBUG part_str: {part_str}")
-            if "QA ANALYSIS:" in part_str and "FIX: Check if path exists." in part_str:
-                found_qa = True
-        
-        assert found_qa, "QA Analysis was not injected into the next round"
+   return "Error: file not found"
+  mock_run.side_effect = run_side_effect
+  mock_qa.return_value = "FIX: Check if path exists."
+  await engine.run_linear()
+  # Verify QA analysis was triggered
+  mock_qa.assert_called_once_with("Error: file not found")
+  # Verify the 2nd send_message call includes the QA ANALYSIS in its payload (f_resps)
+  # The first call is the user message, the second is the tool response.
+  assert mock_chat.send_message.call_count == 2
+  args, kwargs = mock_chat.send_message.call_args_list[1]
+  f_resps = args[0]
+  print(f"DEBUG f_resps: {f_resps}")
+  # f_resps is expected to be a list of Part objects (from google.genai.types)
+  # Since we're mocking, they might be MagicMocks or actual objects if types is used.
+  # In our case, ai_client.Part.from_function_response is used.
+  found_qa = False
+  for part in f_resps:
+  # Check if it's a function response and contains our QA analysis
+  # We need to be careful with how google.genai.types.Part is structured or mocked
+   part_str = str(part)
+   print(f"DEBUG part_str: {part_str}")
+   if "QA ANALYSIS:" in part_str and "FIX: Check if path exists." in part_str:
+    found_qa = True
+  assert found_qa, "QA Analysis was not injected into the next round"
@@ -18,199 +18,164 @@ import ai_client
 # --- Tests for Aggregate Module ---

 def test_aggregate_includes_segregated_history(tmp_path):
-    """
+ """
    Tests if the aggregate function correctly includes history
    when it's segregated into a separate file.
    """
-    proj_path = tmp_path / "manual_slop.toml"
-    hist_path = tmp_path / "manual_slop_history.toml"
-    
-    # Setup segregated project configuration
-    proj_data = project_manager.default_project("test-aggregate")
-    proj_data["discussion"]["discussions"]["main"]["history"] = ["@2026-02-24T14:00:00\nUser:\nShow me history"]
-    
-    # Save the project, which should segregate the history
-    project_manager.save_project(proj_data, proj_path)
-    
-    # Load the project and aggregate its content
-    loaded_proj = project_manager.load_project(proj_path)
-    config = project_manager.flat_config(loaded_proj)
-    
-    markdown, output_file, file_items = aggregate.run(config)
-    
-    # Assert that the history is present in the aggregated markdown
-    assert "## Discussion History" in markdown
-    assert "Show me history" in markdown
-
-# --- Tests for MCP Client and Blacklisting ---
+ proj_path = tmp_path / "manual_slop.toml"
+ hist_path = tmp_path / "manual_slop_history.toml"
+ # Setup segregated project configuration
+ proj_data = project_manager.default_project("test-aggregate")
+ proj_data["discussion"]["discussions"]["main"]["history"] = ["@2026-02-24T14:00:00\nUser:\nShow me history"]
+ # Save the project, which should segregate the history
+ project_manager.save_project(proj_data, proj_path)
+ # Load the project and aggregate its content
+ loaded_proj = project_manager.load_project(proj_path)
+ config = project_manager.flat_config(loaded_proj)
+ markdown, output_file, file_items = aggregate.run(config)
+ # Assert that the history is present in the aggregated markdown
+ assert "## Discussion History" in markdown
+ assert "Show me history" in markdown
+ # --- Tests for MCP Client and Blacklisting ---

 def test_mcp_blacklist(tmp_path):
-    """
+ """
    Tests that the MCP client correctly blacklists specified files
    and prevents listing them.
    """
-    # Setup a file that should be blacklisted
-    hist_file = tmp_path / "my_project_history.toml"
-    hist_file.write_text("secret history", encoding="utf-8")
-    
-    # Configure MCP client to allow access to the temporary directory
-    # but ensure the history file is implicitly or explicitly blacklisted.
-    mcp_client.configure([{"path": str(hist_file)}], extra_base_dirs=[str(tmp_path)])
-    
-    # Attempt to read the blacklisted file - should result in an access denied message
-    result = mcp_client.read_file(str(hist_file))
-    assert "ACCESS DENIED" in result or "BLACKLISTED" in result
-    
-    # Attempt to list the directory containing the blacklisted file
-    result = mcp_client.list_directory(str(tmp_path))
-    # The blacklisted file should not appear in the directory listing
-    assert "my_project_history.toml" not in result
+ # Setup a file that should be blacklisted
+ hist_file = tmp_path / "my_project_history.toml"
+ hist_file.write_text("secret history", encoding="utf-8")
+ # Configure MCP client to allow access to the temporary directory
+ # but ensure the history file is implicitly or explicitly blacklisted.
+ mcp_client.configure([{"path": str(hist_file)}], extra_base_dirs=[str(tmp_path)])
+ # Attempt to read the blacklisted file - should result in an access denied message
+ result = mcp_client.read_file(str(hist_file))
+ assert "ACCESS DENIED" in result or "BLACKLISTED" in result
+ # Attempt to list the directory containing the blacklisted file
+ result = mcp_client.list_directory(str(tmp_path))
+ # The blacklisted file should not appear in the directory listing
+ assert "my_project_history.toml" not in result

 def test_aggregate_blacklist(tmp_path):
-    """
+ """
    Tests that aggregate's path resolution respects blacklisting,
    ensuring history files are not included by default.
    """
-    # Setup a history file in the temporary directory
-    hist_file = tmp_path / "my_project_history.toml"
-    hist_file.write_text("secret history", encoding="utf-8")
-
-    # Attempt to resolve paths including the history file using a wildcard
-    paths = aggregate.resolve_paths(tmp_path, "*_history.toml")
-    assert hist_file not in paths, "History file should be blacklisted and not resolved"
-
-    # Resolve all paths and ensure the history file is still excluded
-    paths = aggregate.resolve_paths(tmp_path, "*")
-    assert hist_file not in paths, "History file should be excluded even with a general glob"
-
-# --- Tests for History Migration and Separation ---
+ # Setup a history file in the temporary directory
+ hist_file = tmp_path / "my_project_history.toml"
+ hist_file.write_text("secret history", encoding="utf-8")
+ # Attempt to resolve paths including the history file using a wildcard
+ paths = aggregate.resolve_paths(tmp_path, "*_history.toml")
+ assert hist_file not in paths, "History file should be blacklisted and not resolved"
+ # Resolve all paths and ensure the history file is still excluded
+ paths = aggregate.resolve_paths(tmp_path, "*")
+ assert hist_file not in paths, "History file should be excluded even with a general glob"
+ # --- Tests for History Migration and Separation ---

 def test_migration_on_load(tmp_path):
-    """
+ """
    Tests that project loading migrates discussion history from manual_slop.toml
    to manual_slop_history.toml if it exists in the main config.
    """
-    # Define paths for the main project config and the history file
-    proj_path = tmp_path / "manual_slop.toml"
-    hist_path = tmp_path / "manual_slop_history.toml"
-
-    # Create a legacy project data structure with discussion history
-    legacy_data = project_manager.default_project("test-project")
-    legacy_data["discussion"]["discussions"]["main"]["history"] = ["Hello", "World"]
-
-    # Save this legacy data into manual_slop.toml
-    with open(proj_path, "wb") as f:
-        tomli_w.dump(legacy_data, f)
-
-    # Load the project - this action should trigger the migration
-    loaded_data = project_manager.load_project(proj_path)
-
-    # Assertions:
-    assert "discussion" in loaded_data
-    assert loaded_data["discussion"]["discussions"]["main"]["history"] == ["Hello", "World"]
-
-    # 2. The history should no longer be present in the main manual_slop.toml on disk.
-    with open(proj_path, "rb") as f:
-        on_disk_main = tomllib.load(f)
-    assert "discussion" not in on_disk_main, "Discussion history should be removed from main config after migration"
-
-    # 3. The history file (manual_slop_history.toml) should now exist and contain the data.
-    assert hist_path.exists()
-    with open(hist_path, "rb") as f:
-        on_disk_hist = tomllib.load(f)
-    assert on_disk_hist["discussions"]["main"]["history"] == ["Hello", "World"]
+ # Define paths for the main project config and the history file
+ proj_path = tmp_path / "manual_slop.toml"
+ hist_path = tmp_path / "manual_slop_history.toml"
+ # Create a legacy project data structure with discussion history
+ legacy_data = project_manager.default_project("test-project")
+ legacy_data["discussion"]["discussions"]["main"]["history"] = ["Hello", "World"]
+ # Save this legacy data into manual_slop.toml
+ with open(proj_path, "wb") as f:
+  tomli_w.dump(legacy_data, f)
+  # Load the project - this action should trigger the migration
+ loaded_data = project_manager.load_project(proj_path)
+ # Assertions:
+ assert "discussion" in loaded_data
+ assert loaded_data["discussion"]["discussions"]["main"]["history"] == ["Hello", "World"]
+ # 2. The history should no longer be present in the main manual_slop.toml on disk.
+ with open(proj_path, "rb") as f:
+  on_disk_main = tomllib.load(f)
+ assert "discussion" not in on_disk_main, "Discussion history should be removed from main config after migration"
+ # 3. The history file (manual_slop_history.toml) should now exist and contain the data.
+ assert hist_path.exists()
+ with open(hist_path, "rb") as f:
+  on_disk_hist = tomllib.load(f)
+ assert on_disk_hist["discussions"]["main"]["history"] == ["Hello", "World"]

 def test_save_separation(tmp_path):
-    """
+ """
    Tests that saving project data correctly separates discussion history
    into manual_slop_history.toml.
    """
-    # Define paths for the main project config and the history file
-    proj_path = tmp_path / "manual_slop.toml"
-    hist_path = tmp_path / "manual_slop_history.toml"
-
-    # Create fresh project data, including discussion history
-    proj_data = project_manager.default_project("test-project")
-    proj_data["discussion"]["discussions"]["main"]["history"] = ["Saved", "Separately"]
-
-    # Save the project data
-    project_manager.save_project(proj_data, proj_path)
-
-    # Assertions:
-    assert proj_path.exists()
-    assert hist_path.exists()
-
-    # 2. The main project file should NOT contain the discussion history.
-    with open(proj_path, "rb") as f:
-        p_disk = tomllib.load(f)
-    assert "discussion" not in p_disk, "Discussion history should not be in main config file after save"
-
-    # 3. The history file should contain the discussion history.
-    with open(hist_path, "rb") as f:
-        h_disk = tomllib.load(f)
-    assert h_disk["discussions"]["main"]["history"] == ["Saved", "Separately"]
-
-# --- Tests for History Persistence Across Turns ---
+ # Define paths for the main project config and the history file
+ proj_path = tmp_path / "manual_slop.toml"
+ hist_path = tmp_path / "manual_slop_history.toml"
+ # Create fresh project data, including discussion history
+ proj_data = project_manager.default_project("test-project")
+ proj_data["discussion"]["discussions"]["main"]["history"] = ["Saved", "Separately"]
+ # Save the project data
+ project_manager.save_project(proj_data, proj_path)
+ # Assertions:
+ assert proj_path.exists()
+ assert hist_path.exists()
+ # 2. The main project file should NOT contain the discussion history.
+ with open(proj_path, "rb") as f:
+  p_disk = tomllib.load(f)
+ assert "discussion" not in p_disk, "Discussion history should not be in main config file after save"
+ # 3. The history file should contain the discussion history.
+ with open(hist_path, "rb") as f:
+  h_disk = tomllib.load(f)
+ assert h_disk["discussions"]["main"]["history"] == ["Saved", "Separately"]
+ # --- Tests for History Persistence Across Turns ---

 def test_history_persistence_across_turns(tmp_path):
-    """
+ """
    Tests that discussion history is correctly persisted across multiple save/load cycles.
    """
-    proj_path = tmp_path / "manual_slop.toml"
-    hist_path = tmp_path / "manual_slop_history.toml"
-
-    # Step 1: Initialize a new project and save it.
-    proj = project_manager.default_project("test-persistence")
-    project_manager.save_project(proj, proj_path)
-
-    # Step 2: Add a first turn of discussion history.
-    proj = project_manager.load_project(proj_path)
-    entry1 = {"role": "User", "content": "Hello", "ts": "2026-02-24T13:00:00"}
-    proj["discussion"]["discussions"]["main"]["history"].append(project_manager.entry_to_str(entry1))
-    project_manager.save_project(proj, proj_path)
-
-    # Verify separation after the first save
-    with open(proj_path, "rb") as f:
-        p_disk = tomllib.load(f)
-    assert "discussion" not in p_disk
-
-    with open(hist_path, "rb") as f:
-        h_disk = tomllib.load(f)
-    assert h_disk["discussions"]["main"]["history"] == ["@2026-02-24T13:00:00\nUser:\nHello"]
-
-    # Step 3: Add a second turn of discussion history.
-    proj = project_manager.load_project(proj_path)
-    entry2 = {"role": "AI", "content": "Hi there!", "ts": "2026-02-24T13:01:00"}
-    proj["discussion"]["discussions"]["main"]["history"].append(project_manager.entry_to_str(entry2))
-    project_manager.save_project(proj, proj_path)
-
-    # Verify persistence
-    with open(hist_path, "rb") as f:
-        h_disk = tomllib.load(f)
-    assert len(h_disk["discussions"]["main"]["history"]) == 2
-    assert h_disk["discussions"]["main"]["history"][1] == "@2026-02-24T13:01:00\nAI:\nHi there!"
-
-    # Step 4: Reload the project from disk and check history
-    proj_final = project_manager.load_project(proj_path)
-    assert len(proj_final["discussion"]["discussions"]["main"]["history"]) == 2
-
-# --- Tests for AI Client History Management ---
+ proj_path = tmp_path / "manual_slop.toml"
+ hist_path = tmp_path / "manual_slop_history.toml"
+ # Step 1: Initialize a new project and save it.
+ proj = project_manager.default_project("test-persistence")
+ project_manager.save_project(proj, proj_path)
+ # Step 2: Add a first turn of discussion history.
+ proj = project_manager.load_project(proj_path)
+ entry1 = {"role": "User", "content": "Hello", "ts": "2026-02-24T13:00:00"}
+ proj["discussion"]["discussions"]["main"]["history"].append(project_manager.entry_to_str(entry1))
+ project_manager.save_project(proj, proj_path)
+ # Verify separation after the first save
+ with open(proj_path, "rb") as f:
+  p_disk = tomllib.load(f)
+ assert "discussion" not in p_disk
+ with open(hist_path, "rb") as f:
+  h_disk = tomllib.load(f)
+ assert h_disk["discussions"]["main"]["history"] == ["@2026-02-24T13:00:00\nUser:\nHello"]
+ # Step 3: Add a second turn of discussion history.
+ proj = project_manager.load_project(proj_path)
+ entry2 = {"role": "AI", "content": "Hi there!", "ts": "2026-02-24T13:01:00"}
+ proj["discussion"]["discussions"]["main"]["history"].append(project_manager.entry_to_str(entry2))
+ project_manager.save_project(proj, proj_path)
+ # Verify persistence
+ with open(hist_path, "rb") as f:
+  h_disk = tomllib.load(f)
+ assert len(h_disk["discussions"]["main"]["history"]) == 2
+ assert h_disk["discussions"]["main"]["history"][1] == "@2026-02-24T13:01:00\nAI:\nHi there!"
+ # Step 4: Reload the project from disk and check history
+ proj_final = project_manager.load_project(proj_path)
+ assert len(proj_final["discussion"]["discussions"]["main"]["history"]) == 2
+ # --- Tests for AI Client History Management ---

 def test_get_history_bleed_stats_basic():
-    """
+ """
    Tests basic retrieval of history bleed statistics from the AI client.
    """
-    # Reset the AI client's session state
-    ai_client.reset_session()
-
-    # Set a custom history truncation limit for testing purposes.
-    ai_client.set_history_trunc_limit(500)
-
-    # For this test, we're primarily checking the structure of the returned stats
-    # and the configured limit.
-    stats = ai_client.get_history_bleed_stats()
-
-    assert 'current' in stats, "Stats dictionary should contain 'current' token usage"
-    assert 'limit' in stats, "Stats dictionary should contain 'limit'"
-    assert stats['limit'] == 500, f"Expected limit of 500, but got {stats['limit']}"
-    assert isinstance(stats['current'], int) and stats['current'] >= 0
+ # Reset the AI client's session state
+ ai_client.reset_session()
+ # Set a custom history truncation limit for testing purposes.
+ ai_client.set_history_trunc_limit(500)
+ # For this test, we're primarily checking the structure of the returned stats
+ # and the configured limit.
+ stats = ai_client.get_history_bleed_stats()
+ assert 'current' in stats, "Stats dictionary should contain 'current' token usage"
+ assert 'limit' in stats, "Stats dictionary should contain 'limit'"
+ assert stats['limit'] == 500, f"Expected limit of 500, but got {stats['limit']}"
+ assert isinstance(stats['current'], int) and stats['current'] >= 0
@@ -12,40 +12,35 @@ from api_hook_client import ApiHookClient
 import gui_legacy

 def test_hooks_enabled_via_cli():
-    with patch.object(sys, 'argv', ['gui_legacy.py', '--enable-test-hooks']):
-        app = gui_legacy.App()
-        assert app.test_hooks_enabled is True
+ with patch.object(sys, 'argv', ['gui_legacy.py', '--enable-test-hooks']):
+  app = gui_legacy.App()
+  assert app.test_hooks_enabled is True

 def test_hooks_disabled_by_default():
-    with patch.object(sys, 'argv', ['gui_legacy.py']):
-        if 'SLOP_TEST_HOOKS' in os.environ:
-            del os.environ['SLOP_TEST_HOOKS']
-        app = gui_legacy.App()
-        assert getattr(app, 'test_hooks_enabled', False) is False
+ with patch.object(sys, 'argv', ['gui_legacy.py']):
+  if 'SLOP_TEST_HOOKS' in os.environ:
+   del os.environ['SLOP_TEST_HOOKS']
+  app = gui_legacy.App()
+  assert getattr(app, 'test_hooks_enabled', False) is False

 def test_live_hook_server_responses(live_gui):
-    """
+ """
    Verifies the live hook server (started via fixture) responds correctly to all major endpoints.
    """
-    client = ApiHookClient()
-    
-    # Test /status
-    status = client.get_status()
-    assert status == {'status': 'ok'}
-    
-    # Test /api/project
-    project = client.get_project()
-    assert 'project' in project
-    
-    # Test /api/session
-    session = client.get_session()
-    assert 'session' in session
-    
-    # Test /api/performance
-    perf = client.get_performance()
-    assert 'performance' in perf
-    
-    # Test POST /api/gui
-    gui_data = {"action": "test_action", "value": 42}
-    resp = client.post_gui(gui_data)
-    assert resp == {'status': 'queued'}
+ client = ApiHookClient()
+ # Test /status
+ status = client.get_status()
+ assert status == {'status': 'ok'}
+ # Test /api/project
+ project = client.get_project()
+ assert 'project' in project
+ # Test /api/session
+ session = client.get_session()
+ assert 'session' in session
+ # Test /api/performance
+ perf = client.get_performance()
+ assert 'performance' in perf
+ # Test POST /api/gui
+ gui_data = {"action": "test_action", "value": 42}
+ resp = client.post_gui(gui_data)
+ assert resp == {'status': 'queued'}
@@ -14,89 +14,80 @@ spec.loader.exec_module(gui_legacy)
 from gui_legacy import App

 def test_new_hubs_defined_in_window_info():
-    """
+ """
    Verifies that the new consolidated Hub windows are defined in the App's window_info.
    This ensures they will be available in the 'Windows' menu.
    """
-    # We don't need a full App instance with DPG context for this, 
-    # as window_info is initialized in __init__ before DPG starts.
-    # But we mock load_config to avoid file access.
-    from unittest.mock import patch
-    with patch('gui_legacy.load_config', return_value={}):
-        app = App()
-        
-    expected_hubs = {
-        "Context Hub": "win_context_hub",
-        "AI Settings Hub": "win_ai_settings_hub",
-        "Discussion Hub": "win_discussion_hub",
-        "Operations Hub": "win_operations_hub",
-    }
-    
-    for label, tag in expected_hubs.items():
-        assert tag in app.window_info.values(), f"Expected window tag {tag} not found in window_info"
-        # Check if the label matches (or is present)
-        found = False
-        for l, t in app.window_info.items():
-            if t == tag:
-                found = True
-                assert l == label or label in l, f"Label mismatch for {tag}: expected {label}, found {l}"
-        assert found, f"Expected window label {label} not found in window_info"
+ # We don't need a full App instance with DPG context for this, 
+ # as window_info is initialized in __init__ before DPG starts.
+ # But we mock load_config to avoid file access.
+ from unittest.mock import patch
+ with patch('gui_legacy.load_config', return_value={}):
+  app = App()
+ expected_hubs = {
+  "Context Hub": "win_context_hub",
+  "AI Settings Hub": "win_ai_settings_hub",
+  "Discussion Hub": "win_discussion_hub",
+  "Operations Hub": "win_operations_hub",
+ }
+ for label, tag in expected_hubs.items():
+  assert tag in app.window_info.values(), f"Expected window tag {tag} not found in window_info"
+  # Check if the label matches (or is present)
+  found = False
+  for l, t in app.window_info.items():
+   if t == tag:
+    found = True
+    assert l == label or label in l, f"Label mismatch for {tag}: expected {label}, found {l}"
+  assert found, f"Expected window label {label} not found in window_info"

 def test_old_windows_removed_from_window_info(app_instance_simple):
-    """
+ """
    Verifies that the old fragmented windows are removed from window_info.
    """
-    old_tags = [
-        "win_projects", "win_files", "win_screenshots",
-        "win_provider", "win_system_prompts",
-        "win_discussion", "win_message", "win_response",
-        "win_comms", "win_tool_log"
-    ]
-    
-    for tag in old_tags:
-        assert tag not in app_instance_simple.window_info.values(), f"Old window tag {tag} should have been removed from window_info"
+ old_tags = [
+  "win_projects", "win_files", "win_screenshots",
+  "win_provider", "win_system_prompts",
+  "win_discussion", "win_message", "win_response",
+  "win_comms", "win_tool_log"
+ ]
+ for tag in old_tags:
+  assert tag not in app_instance_simple.window_info.values(), f"Old window tag {tag} should have been removed from window_info"

@pytest.fixture
 def app_instance_simple():
-    from unittest.mock import patch
-    from gui_legacy import App
-    with patch('gui_legacy.load_config', return_value={}):
-        app = App()
-    return app
+ from unittest.mock import patch
+ from gui_legacy import App
+ with patch('gui_legacy.load_config', return_value={}):
+  app = App()
+ return app

 def test_hub_windows_have_correct_flags(app_instance_simple):
-    """
+ """
    Verifies that the new Hub windows have appropriate flags for a professional workspace.
    (e.g., no_collapse should be True for main hubs).
    """
-    import dearpygui.dearpygui as dpg
-    dpg.create_context()
-    
-    # We need to actually call the build methods to check the configuration
-    app_instance_simple._build_context_hub()
-    app_instance_simple._build_ai_settings_hub()
-    app_instance_simple._build_discussion_hub()
-    app_instance_simple._build_operations_hub()
-    
-    hubs = ["win_context_hub", "win_ai_settings_hub", "win_discussion_hub", "win_operations_hub"]
-    for hub in hubs:
-        assert dpg.does_item_exist(hub)
-        # We can't easily check 'no_collapse' after creation without internal DPG calls 
-        # but we can check if it's been configured if we mock dpg.window or check it manually
-    
-    dpg.destroy_context()
+ import dearpygui.dearpygui as dpg
+ dpg.create_context()
+ # We need to actually call the build methods to check the configuration
+ app_instance_simple._build_context_hub()
+ app_instance_simple._build_ai_settings_hub()
+ app_instance_simple._build_discussion_hub()
+ app_instance_simple._build_operations_hub()
+ hubs = ["win_context_hub", "win_ai_settings_hub", "win_discussion_hub", "win_operations_hub"]
+ for hub in hubs:
+  assert dpg.does_item_exist(hub)
+  # We can't easily check 'no_collapse' after creation without internal DPG calls 
+  # but we can check if it's been configured if we mock dpg.window or check it manually
+ dpg.destroy_context()

 def test_indicators_exist(app_instance_simple):
-    """
+ """
    Verifies that the new thinking and live indicators exist in the UI.
    """
-    import dearpygui.dearpygui as dpg
-    dpg.create_context()
-    
-    app_instance_simple._build_discussion_hub()
-    app_instance_simple._build_operations_hub()
-    
-    assert dpg.does_item_exist("thinking_indicator")
-    assert dpg.does_item_exist("operations_live_indicator")
-    
-    dpg.destroy_context()
+ import dearpygui.dearpygui as dpg
+ dpg.create_context()
+ app_instance_simple._build_discussion_hub()
+ app_instance_simple._build_operations_hub()
+ assert dpg.does_item_exist("thinking_indicator")
+ assert dpg.does_item_exist("operations_live_indicator")
+ dpg.destroy_context()
@@ -8,120 +8,110 @@ import ai_client

@pytest.fixture
 def mock_app():
-    with (
-        patch('gui_2.load_config', return_value={
-            "ai": {"provider": "gemini", "model": "model-1", "temperature": 0.0, "max_tokens": 100, "history_trunc_limit": 1000},
-            "projects": {"paths": [], "active": ""},
-            "gui": {"show_windows": {}}
-        }),
-        patch('gui_2.project_manager.load_project', return_value={
-            "project": {"name": "test_proj"},
-            "discussion": {"active": "main", "discussions": {"main": {"history": []}}},
-            "files": {"paths": [], "base_dir": "."},
-            "screenshots": {"paths": [], "base_dir": "."},
-            "agent": {"tools": {}}
-        }),
-        patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
-        patch('gui_2.project_manager.save_project'),
-        patch('gui_2.session_logger.open_session'),
-        patch('gui_2.App._init_ai_and_hooks'),
-        patch('gui_2.App._fetch_models')
-    ):
-        app = App()
-        yield app
-        # We don't have a clean way to stop the loop thread in gui_2.py App
-        # so we just let it daemon-exit.
+ with (
+  patch('gui_2.load_config', return_value={
+    "ai": {"provider": "gemini", "model": "model-1", "temperature": 0.0, "max_tokens": 100, "history_trunc_limit": 1000},
+    "projects": {"paths": [], "active": ""},
+    "gui": {"show_windows": {}}
+   }),
+  patch('gui_2.project_manager.load_project', return_value={
+    "project": {"name": "test_proj"},
+    "discussion": {"active": "main", "discussions": {"main": {"history": []}}},
+    "files": {"paths": [], "base_dir": "."},
+    "screenshots": {"paths": [], "base_dir": "."},
+    "agent": {"tools": {}}
+   }),
+  patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
+  patch('gui_2.project_manager.save_project'),
+  patch('gui_2.session_logger.open_session'),
+  patch('gui_2.App._init_ai_and_hooks'),
+  patch('gui_2.App._fetch_models')
+ ):
+  app = App()
+  yield app
+  # We don't have a clean way to stop the loop thread in gui_2.py App
+  # so we just let it daemon-exit.

@pytest.mark.timeout(10)
 def test_user_request_integration_flow(mock_app):
-    """
+ """
    Verifies that pushing a UserRequestEvent to the event_queue:
    1. Triggers ai_client.send
    2. Results in a 'response' event back to the queue
    3. Eventually updates the UI state (ai_response, ai_status) after processing GUI tasks.
    """
-    app = mock_app
-    
-    # Mock all ai_client methods called during _handle_request_event
-    mock_response = "This is a test AI response"
-    with (
-        patch('ai_client.send', return_value=mock_response) as mock_send,
-        patch('ai_client.set_custom_system_prompt'),
-        patch('ai_client.set_model_params'),
-        patch('ai_client.set_agent_tools')
-    ):
-        # 1. Create and push a UserRequestEvent
-        event = UserRequestEvent(
-            prompt="Hello AI",
-            stable_md="Context",
-            file_items=[],
-            disc_text="History",
-            base_dir="."
-        )
-        
-        # 2. Push event to the app's internal loop
-        asyncio.run_coroutine_threadsafe(
-            app.event_queue.put("user_request", event),
-            app._loop
-        )
-        
-        # 3. Wait for ai_client.send to be called (polling background thread)
-        start_time = time.time()
-        while not mock_send.called and time.time() - start_time < 5:
-            time.sleep(0.1)
-            
-        assert mock_send.called, "ai_client.send was not called within timeout"
-        mock_send.assert_called_once_with("Context", "Hello AI", ".", [], "History")
-        
-        # 4. Wait for the response to propagate to _pending_gui_tasks and update UI
-        # We call _process_pending_gui_tasks manually to simulate a GUI frame update.
-        start_time = time.time()
-        success = False
-        while time.time() - start_time < 3:
-            app._process_pending_gui_tasks()
-            if app.ai_response == mock_response and app.ai_status == "done":
-                success = True
-                break
-            time.sleep(0.1)
-            
-        assert success, f"UI state was not updated. ai_response: '{app.ai_response}', status: '{app.ai_status}'"
-        assert app.ai_response == mock_response
-        assert app.ai_status == "done"
+ app = mock_app
+ # Mock all ai_client methods called during _handle_request_event
+ mock_response = "This is a test AI response"
+ with (
+  patch('ai_client.send', return_value=mock_response) as mock_send,
+  patch('ai_client.set_custom_system_prompt'),
+  patch('ai_client.set_model_params'),
+  patch('ai_client.set_agent_tools')
+ ):
+ # 1. Create and push a UserRequestEvent
+  event = UserRequestEvent(
+   prompt="Hello AI",
+   stable_md="Context",
+   file_items=[],
+   disc_text="History",
+   base_dir="."
+  )
+  # 2. Push event to the app's internal loop
+  asyncio.run_coroutine_threadsafe(
+   app.event_queue.put("user_request", event),
+   app._loop
+  )
+  # 3. Wait for ai_client.send to be called (polling background thread)
+  start_time = time.time()
+  while not mock_send.called and time.time() - start_time < 5:
+   time.sleep(0.1)
+  assert mock_send.called, "ai_client.send was not called within timeout"
+  mock_send.assert_called_once_with("Context", "Hello AI", ".", [], "History")
+  # 4. Wait for the response to propagate to _pending_gui_tasks and update UI
+  # We call _process_pending_gui_tasks manually to simulate a GUI frame update.
+  start_time = time.time()
+  success = False
+  while time.time() - start_time < 3:
+   app._process_pending_gui_tasks()
+   if app.ai_response == mock_response and app.ai_status == "done":
+    success = True
+    break
+   time.sleep(0.1)
+  assert success, f"UI state was not updated. ai_response: '{app.ai_response}', status: '{app.ai_status}'"
+  assert app.ai_response == mock_response
+  assert app.ai_status == "done"

@pytest.mark.timeout(10)
 def test_user_request_error_handling(mock_app):
-    """
+ """
    Verifies that if ai_client.send raises an exception, the UI is updated with the error state.
    """
-    app = mock_app
-    
-    with (
-        patch('ai_client.send', side_effect=Exception("API Failure")) as mock_send,
-        patch('ai_client.set_custom_system_prompt'),
-        patch('ai_client.set_model_params'),
-        patch('ai_client.set_agent_tools')
-    ):
-        event = UserRequestEvent(
-            prompt="Trigger Error",
-            stable_md="",
-            file_items=[],
-            disc_text="",
-            base_dir="."
-        )
-        
-        asyncio.run_coroutine_threadsafe(
-            app.event_queue.put("user_request", event),
-            app._loop
-        )
-        
-        # Poll for error state by processing GUI tasks
-        start_time = time.time()
-        success = False
-        while time.time() - start_time < 5:
-            app._process_pending_gui_tasks()
-            if app.ai_status == "error" and "ERROR: API Failure" in app.ai_response:
-                success = True
-                break
-            time.sleep(0.1)
-            
-        assert success, f"Error state was not reflected in UI. status: {app.ai_status}, response: {app.ai_response}"
+ app = mock_app
+ with (
+  patch('ai_client.send', side_effect=Exception("API Failure")) as mock_send,
+  patch('ai_client.set_custom_system_prompt'),
+  patch('ai_client.set_model_params'),
+  patch('ai_client.set_agent_tools')
+ ):
+  event = UserRequestEvent(
+   prompt="Trigger Error",
+   stable_md="",
+   file_items=[],
+   disc_text="",
+   base_dir="."
+  )
+  asyncio.run_coroutine_threadsafe(
+   app.event_queue.put("user_request", event),
+   app._loop
+  )
+  # Poll for error state by processing GUI tasks
+  start_time = time.time()
+  success = False
+  while time.time() - start_time < 5:
+   app._process_pending_gui_tasks()
+   if app.ai_status == "error" and "ERROR: API Failure" in app.ai_response:
+    success = True
+    break
+   time.sleep(0.1)
+  assert success, f"Error state was not reflected in UI. status: {app.ai_status}, response: {app.ai_response}"
@@ -10,80 +10,67 @@ from api_hook_client import ApiHookClient

@pytest.mark.integration
 def test_full_live_workflow(live_gui):
-    """
+ """
    Integration test that drives the GUI through a full workflow.
    """
-    client = ApiHookClient()
-    assert client.wait_for_server(timeout=10)
-    client.post_session(session_entries=[])
-    time.sleep(2)
-
-    # 1. Reset
-    client.click("btn_reset")
-    time.sleep(1)
-
-    # 2. Project Setup
-    temp_project_path = os.path.abspath("tests/temp_project.toml")
-    if os.path.exists(temp_project_path):
-        os.remove(temp_project_path)
-        
-    client.click("btn_project_new_automated", user_data=temp_project_path)
-    time.sleep(1) # Wait for project creation and switch
-    
-    # Verify metadata update
-    proj = client.get_project()
-    
-    test_git = os.path.abspath(".")
-    client.set_value("project_git_dir", test_git)
-    client.click("btn_project_save")
-    time.sleep(1)
-    
-    proj = client.get_project()
-    # flat_config returns {"project": {...}, "output": ...} 
-    # so proj is {"project": {"project": {"git_dir": ...}}}
-    assert proj['project']['project']['git_dir'] == test_git
-
-    # Enable auto-add so the response ends up in history
-    client.set_value("auto_add_history", True)
-    client.set_value("current_model", "gemini-2.5-flash-lite")
-    time.sleep(0.5)
-
-    # 3. Discussion Turn
-    client.set_value("ai_input", "Hello! This is an automated test. Just say 'Acknowledged'.")
-    client.click("btn_gen_send")
-
-    # Verify thinking indicator appears (might be brief)
-    thinking_seen = False
-    print("\nPolling for thinking indicator...")
-    for i in range(40):
-        state = client.get_indicator_state("thinking_indicator")
-        if state.get('shown'):
-            thinking_seen = True
-            print(f"Thinking indicator seen at poll {i}")
-            break
-        time.sleep(0.5)
-
-    # 4. Wait for response in session
-    success = False
-    print("Waiting for AI response in session...")
-    for i in range(120):
-        session = client.get_session()
-        entries = session.get('session', {}).get('entries', [])
-        if any(e.get('role') == 'AI' for e in entries):
-            success = True
-            print(f"AI response found at second {i}")
-            break
-        time.sleep(1)
-
-    assert success, "AI failed to respond within 120 seconds"
-    # 5. Switch Discussion
-    client.set_value("disc_new_name_input", "AutoDisc")
-    client.click("btn_disc_create")
-    time.sleep(1.0) # Wait for GUI to process creation
-    
-    client.select_list_item("disc_listbox", "AutoDisc")
-    time.sleep(1.0) # Wait for GUI to switch
-    
-    # Verify session is empty in new discussion
-    session = client.get_session()
-    assert len(session.get('session', {}).get('entries', [])) == 0
+ client = ApiHookClient()
+ assert client.wait_for_server(timeout=10)
+ client.post_session(session_entries=[])
+ time.sleep(2)
+ # 1. Reset
+ client.click("btn_reset")
+ time.sleep(1)
+ # 2. Project Setup
+ temp_project_path = os.path.abspath("tests/temp_project.toml")
+ if os.path.exists(temp_project_path):
+  os.remove(temp_project_path)
+ client.click("btn_project_new_automated", user_data=temp_project_path)
+ time.sleep(1) # Wait for project creation and switch
+ # Verify metadata update
+ proj = client.get_project()
+ test_git = os.path.abspath(".")
+ client.set_value("project_git_dir", test_git)
+ client.click("btn_project_save")
+ time.sleep(1)
+ proj = client.get_project()
+ # flat_config returns {"project": {...}, "output": ...} 
+ # so proj is {"project": {"project": {"git_dir": ...}}}
+ assert proj['project']['project']['git_dir'] == test_git
+ # Enable auto-add so the response ends up in history
+ client.set_value("auto_add_history", True)
+ client.set_value("current_model", "gemini-2.5-flash-lite")
+ time.sleep(0.5)
+ # 3. Discussion Turn
+ client.set_value("ai_input", "Hello! This is an automated test. Just say 'Acknowledged'.")
+ client.click("btn_gen_send")
+ # Verify thinking indicator appears (might be brief)
+ thinking_seen = False
+ print("\nPolling for thinking indicator...")
+ for i in range(40):
+  state = client.get_indicator_state("thinking_indicator")
+  if state.get('shown'):
+   thinking_seen = True
+   print(f"Thinking indicator seen at poll {i}")
+   break
+  time.sleep(0.5)
+  # 4. Wait for response in session
+ success = False
+ print("Waiting for AI response in session...")
+ for i in range(120):
+  session = client.get_session()
+  entries = session.get('session', {}).get('entries', [])
+  if any(e.get('role') == 'AI' for e in entries):
+   success = True
+   print(f"AI response found at second {i}")
+   break
+  time.sleep(1)
+ assert success, "AI failed to respond within 120 seconds"
+ # 5. Switch Discussion
+ client.set_value("disc_new_name_input", "AutoDisc")
+ client.click("btn_disc_create")
+ time.sleep(1.0) # Wait for GUI to process creation
+ client.select_list_item("disc_listbox", "AutoDisc")
+ time.sleep(1.0) # Wait for GUI to switch
+ # Verify session is empty in new discussion
+ session = client.get_session()
+ assert len(session.get('session', {}).get('entries', [])) == 0
@@ -9,20 +9,20 @@ from gui_2 import App

@pytest.fixture
 def mock_config(tmp_path):
-    config_path = tmp_path / "config.toml"
-    config_path.write_text("""[projects]
+ config_path = tmp_path / "config.toml"
+ config_path.write_text("""[projects]
 paths = []
 active = ""
 [ai]
 provider = "gemini"
 model = "model"
 """, encoding="utf-8")
-    return config_path
+ return config_path

@pytest.fixture
 def mock_project(tmp_path):
-    project_path = tmp_path / "project.toml"
-    project_path.write_text("""[project]
+ project_path = tmp_path / "project.toml"
+ project_path.write_text("""[project]
 name = "test"
 [discussion]
 roles = ["User", "AI"]
@@ -30,74 +30,65 @@ active = "main"
 [discussion.discussions.main]
 history = []
 """, encoding="utf-8")
-    return project_path
+ return project_path

@pytest.fixture
 def app_instance(mock_config, mock_project, monkeypatch):
-    monkeypatch.setattr("gui_2.CONFIG_PATH", mock_config)
-    
-    with patch("project_manager.load_project") as mock_load, \
-         patch("session_logger.open_session"):
-        
-        mock_load.return_value = {
-            "project": {"name": "test"},
-            "discussion": {"roles": ["User", "AI"], "active": "main", "discussions": {"main": {"history": []}}},
-            "files": {"paths": []},
-            "screenshots": {"paths": []}
-        }
-        
-        # Mock the __init__ to do nothing, then set the fields we need manually
-        with patch.object(App, '__init__', lambda self: None):
-            app = App()
-            app.show_windows = {"Log Management": False}
-            app.ui_state = MagicMock()
-            app.ui_files_base_dir = "."
-            app.files = []
-            
-            # Since we bypassed __init__, we need to bind the method manually
-            # but python allows calling it directly.
-            return app
+ monkeypatch.setattr("gui_2.CONFIG_PATH", mock_config)
+ with patch("project_manager.load_project") as mock_load, \
+ patch("session_logger.open_session"):
+  mock_load.return_value = {
+   "project": {"name": "test"},
+   "discussion": {"roles": ["User", "AI"], "active": "main", "discussions": {"main": {"history": []}}},
+   "files": {"paths": []},
+   "screenshots": {"paths": []}
+  }
+  # Mock the __init__ to do nothing, then set the fields we need manually
+  with patch.object(App, '__init__', lambda self: None):
+   app = App()
+   app.show_windows = {"Log Management": False}
+   app.ui_state = MagicMock()
+   app.ui_files_base_dir = "."
+   app.files = []
+   # Since we bypassed __init__, we need to bind the method manually
+   # but python allows calling it directly.
+   return app

 def test_log_management_init(app_instance):
-    app = app_instance
-    assert "Log Management" in app.show_windows
-    assert app.show_windows["Log Management"] is False
-    assert hasattr(app, "_render_log_management")
-    assert callable(app._render_log_management)
+ app = app_instance
+ assert "Log Management" in app.show_windows
+ assert app.show_windows["Log Management"] is False
+ assert hasattr(app, "_render_log_management")
+ assert callable(app._render_log_management)

 def test_render_log_management_logic(app_instance):
-    app = app_instance
-    app.show_windows["Log Management"] = True
-    
-    # Mock LogRegistry
-    with patch("gui_2.LogRegistry") as MockRegistry, \
-         patch("gui_2.imgui.begin") as mock_begin, \
-         patch("gui_2.imgui.begin_table") as mock_begin_table, \
-         patch("gui_2.imgui.text") as mock_text, \
-         patch("gui_2.imgui.end_table") as mock_end_table, \
-         patch("gui_2.imgui.end") as mock_end, \
-         patch("gui_2.imgui.push_style_color"), \
-         patch("gui_2.imgui.pop_style_color"), \
-         patch("gui_2.imgui.table_setup_column"), \
-         patch("gui_2.imgui.table_headers_row"), \
-         patch("gui_2.imgui.table_next_row"), \
-         patch("gui_2.imgui.table_next_column"), \
-         patch("gui_2.imgui.button"):
-         
-        mock_reg = MockRegistry.return_value
-        mock_reg.data = {
-            "session_1": {
-                "start_time": "2023-01-01",
-                "whitelisted": False,
-                "metadata": {"reason": "test", "size_kb": 10, "message_count": 5}
-            }
-        }
-        
-        mock_begin.return_value = (True, True)
-        mock_begin_table.return_value = True
-        
-        app._render_log_management()
-        
-        mock_begin.assert_called_with("Log Management", app.show_windows["Log Management"])
-        mock_begin_table.assert_called()
-        mock_text.assert_any_call("session_1")
+ app = app_instance
+ app.show_windows["Log Management"] = True
+ # Mock LogRegistry
+ with patch("gui_2.LogRegistry") as MockRegistry, \
+ patch("gui_2.imgui.begin") as mock_begin, \
+ patch("gui_2.imgui.begin_table") as mock_begin_table, \
+ patch("gui_2.imgui.text") as mock_text, \
+ patch("gui_2.imgui.end_table") as mock_end_table, \
+ patch("gui_2.imgui.end") as mock_end, \
+ patch("gui_2.imgui.push_style_color"), \
+ patch("gui_2.imgui.pop_style_color"), \
+ patch("gui_2.imgui.table_setup_column"), \
+ patch("gui_2.imgui.table_headers_row"), \
+ patch("gui_2.imgui.table_next_row"), \
+ patch("gui_2.imgui.table_next_column"), \
+ patch("gui_2.imgui.button"):
+  mock_reg = MockRegistry.return_value
+  mock_reg.data = {
+   "session_1": {
+    "start_time": "2023-01-01",
+    "whitelisted": False,
+    "metadata": {"reason": "test", "size_kb": 10, "message_count": 5}
+   }
+  }
+  mock_begin.return_value = (True, True)
+  mock_begin_table.return_value = True
+  app._render_log_management()
+  mock_begin.assert_called_with("Log Management", app.show_windows["Log Management"])
+  mock_begin_table.assert_called()
+  mock_text.assert_any_call("session_1")
@@ -8,48 +8,42 @@ from log_pruner import LogPruner

@pytest.fixture
 def pruner_setup(tmp_path):
-    logs_dir = tmp_path / "logs"
-    logs_dir.mkdir()
-    registry_path = logs_dir / "log_registry.toml"
-    registry = LogRegistry(str(registry_path))
-    pruner = LogPruner(registry, str(logs_dir))
-    return pruner, registry, logs_dir
+ logs_dir = tmp_path / "logs"
+ logs_dir.mkdir()
+ registry_path = logs_dir / "log_registry.toml"
+ registry = LogRegistry(str(registry_path))
+ pruner = LogPruner(registry, str(logs_dir))
+ return pruner, registry, logs_dir

 def test_prune_old_insignificant_logs(pruner_setup):
-    pruner, registry, logs_dir = pruner_setup
-    
-    # 1. Old and small (insignificant) -> should be pruned
-    session_id_old_small = "old_small"
-    dir_old_small = logs_dir / session_id_old_small
-    dir_old_small.mkdir()
-    (dir_old_small / "comms.log").write_text("small") # < 2KB
-    registry.register_session(session_id_old_small, str(dir_old_small), datetime.now() - timedelta(days=2))
-    
-    # 2. Old and large (significant) -> should NOT be pruned
-    session_id_old_large = "old_large"
-    dir_old_large = logs_dir / session_id_old_large
-    dir_old_large.mkdir()
-    (dir_old_large / "comms.log").write_text("x" * 3000) # > 2KB
-    registry.register_session(session_id_old_large, str(dir_old_large), datetime.now() - timedelta(days=2))
-    
-    # 3. Recent and small -> should NOT be pruned
-    session_id_recent_small = "recent_small"
-    dir_recent_small = logs_dir / session_id_recent_small
-    dir_recent_small.mkdir()
-    (dir_recent_small / "comms.log").write_text("small")
-    registry.register_session(session_id_recent_small, str(dir_recent_small), datetime.now() - timedelta(hours=2))
-    
-    # 4. Old and whitelisted -> should NOT be pruned
-    session_id_old_whitelisted = "old_whitelisted"
-    dir_old_whitelisted = logs_dir / session_id_old_whitelisted
-    dir_old_whitelisted.mkdir()
-    (dir_old_whitelisted / "comms.log").write_text("small")
-    registry.register_session(session_id_old_whitelisted, str(dir_old_whitelisted), datetime.now() - timedelta(days=2))
-    registry.update_session_metadata(session_id_old_whitelisted, 0, 0, 0, True, "Manual")
-    
-    pruner.prune()
-    
-    assert not dir_old_small.exists()
-    assert dir_old_large.exists()
-    assert dir_recent_small.exists()
-    assert dir_old_whitelisted.exists()
+ pruner, registry, logs_dir = pruner_setup
+ # 1. Old and small (insignificant) -> should be pruned
+ session_id_old_small = "old_small"
+ dir_old_small = logs_dir / session_id_old_small
+ dir_old_small.mkdir()
+ (dir_old_small / "comms.log").write_text("small") # < 2KB
+ registry.register_session(session_id_old_small, str(dir_old_small), datetime.now() - timedelta(days=2))
+ # 2. Old and large (significant) -> should NOT be pruned
+ session_id_old_large = "old_large"
+ dir_old_large = logs_dir / session_id_old_large
+ dir_old_large.mkdir()
+ (dir_old_large / "comms.log").write_text("x" * 3000) # > 2KB
+ registry.register_session(session_id_old_large, str(dir_old_large), datetime.now() - timedelta(days=2))
+ # 3. Recent and small -> should NOT be pruned
+ session_id_recent_small = "recent_small"
+ dir_recent_small = logs_dir / session_id_recent_small
+ dir_recent_small.mkdir()
+ (dir_recent_small / "comms.log").write_text("small")
+ registry.register_session(session_id_recent_small, str(dir_recent_small), datetime.now() - timedelta(hours=2))
+ # 4. Old and whitelisted -> should NOT be pruned
+ session_id_old_whitelisted = "old_whitelisted"
+ dir_old_whitelisted = logs_dir / session_id_old_whitelisted
+ dir_old_whitelisted.mkdir()
+ (dir_old_whitelisted / "comms.log").write_text("small")
+ registry.register_session(session_id_old_whitelisted, str(dir_old_whitelisted), datetime.now() - timedelta(days=2))
+ registry.update_session_metadata(session_id_old_whitelisted, 0, 0, 0, True, "Manual")
+ pruner.prune()
+ assert not dir_old_small.exists()
+ assert dir_old_large.exists()
+ assert dir_recent_small.exists()
+ assert dir_old_whitelisted.exists()
@@ -8,173 +8,149 @@ from log_registry import LogRegistry

 class TestLogRegistry(unittest.TestCase):

-    def setUp(self):
-        """Set up a temporary directory and registry file for each test."""
-        self.temp_dir = tempfile.TemporaryDirectory()
-        self.registry_path = os.path.join(self.temp_dir.name, "registry.toml")
-        
-        # Ensure the file is created and empty initially for a clean state.
-        # LogRegistry is assumed to load from this file on instantiation.
-        with open(self.registry_path, 'w') as f:
-            f.write("# Initial empty registry\n")
-        
-        # Instantiate LogRegistry. This will load from the empty file.
-        self.registry = LogRegistry(self.registry_path)
+ def setUp(self):
+  """Set up a temporary directory and registry file for each test."""
+  self.temp_dir = tempfile.TemporaryDirectory()
+  self.registry_path = os.path.join(self.temp_dir.name, "registry.toml")
+  # Ensure the file is created and empty initially for a clean state.
+  # LogRegistry is assumed to load from this file on instantiation.
+  with open(self.registry_path, 'w') as f:
+   f.write("# Initial empty registry\n")
+   # Instantiate LogRegistry. This will load from the empty file.
+  self.registry = LogRegistry(self.registry_path)

-    def tearDown(self):
-        """Clean up the temporary directory and its contents after each test."""
-        self.temp_dir.cleanup()
+ def tearDown(self):
+  """Clean up the temporary directory and its contents after each test."""
+  self.temp_dir.cleanup()

-    def test_instantiation(self):
-        """Test LogRegistry instantiation with a file path."""
-        self.assertIsInstance(self.registry, LogRegistry)
-        self.assertEqual(self.registry.registry_path, self.registry_path)
-        # Check if the file exists. LogRegistry is assumed to create it if not.
-        self.assertTrue(os.path.exists(self.registry_path))
-        # We will verify content in other tests that explicitly save and reload.
+ def test_instantiation(self):
+  """Test LogRegistry instantiation with a file path."""
+  self.assertIsInstance(self.registry, LogRegistry)
+  self.assertEqual(self.registry.registry_path, self.registry_path)
+  # Check if the file exists. LogRegistry is assumed to create it if not.
+  self.assertTrue(os.path.exists(self.registry_path))
+  # We will verify content in other tests that explicitly save and reload.

-    def test_register_session(self):
-        """Test registering a new session."""
-        session_id = "session-123"
-        path = "/path/to/session/123"
-        start_time = datetime.utcnow()
+ def test_register_session(self):
+  """Test registering a new session."""
+  session_id = "session-123"
+  path = "/path/to/session/123"
+  start_time = datetime.utcnow()
+  self.registry.register_session(session_id, path, start_time)
+  # Verify session was added to internal data (assuming LogRegistry has a public 'data' attribute for testing)
+  self.assertIn(session_id, self.registry.data)
+  session_data = self.registry.data[session_id]
+  self.assertEqual(session_data['path'], path)
+  # Convert stored ISO string back to datetime for comparison
+  stored_start_time = datetime.fromisoformat(session_data['start_time'])
+  self.assertAlmostEqual(stored_start_time, start_time, delta=timedelta(seconds=1)) # Allow for minor time differences
+  self.assertFalse(session_data.get('whitelisted', False)) # Default to not whitelisted
+  self.assertIsNone(session_data.get('metadata'))
+  # Verify data was written to the TOML file by reloading
+  reloaded_registry = LogRegistry(self.registry_path)
+  self.assertIn(session_id, reloaded_registry.data)
+  reloaded_session_data = reloaded_registry.data[session_id]
+  reloaded_start_time = datetime.fromisoformat(reloaded_session_data['start_time'])
+  self.assertAlmostEqual(reloaded_start_time, start_time, delta=timedelta(seconds=1))

-        self.registry.register_session(session_id, path, start_time)
+ def test_update_session_metadata(self):
+  """Test updating session metadata."""
+  session_id = "session-456"
+  path = "/path/to/session/456"
+  start_time = datetime.utcnow()
+  self.registry.register_session(session_id, path, start_time)
+  message_count = 100
+  errors = 5
+  size_kb = 1024
+  whitelisted = True
+  reason = "Automated process"
+  self.registry.update_session_metadata(session_id, message_count, errors, size_kb, whitelisted, reason)
+  # Verify metadata was updated in internal data
+  self.assertIn(session_id, self.registry.data)
+  session_data = self.registry.data[session_id]
+  self.assertIsNotNone(session_data.get('metadata'))
+  metadata = session_data['metadata']
+  self.assertEqual(metadata['message_count'], message_count)
+  self.assertEqual(metadata['errors'], errors)
+  self.assertEqual(metadata['size_kb'], size_kb)
+  self.assertEqual(metadata['whitelisted'], whitelisted)
+  self.assertEqual(metadata['reason'], reason)
+  # Also check if the whitelisted flag in the main session data is updated
+  self.assertTrue(session_data.get('whitelisted', False))
+  # Verify data was written to the TOML file by reloading
+  reloaded_registry = LogRegistry(self.registry_path)
+  self.assertIn(session_id, reloaded_registry.data)
+  reloaded_session_data = reloaded_registry.data[session_id]
+  self.assertTrue(reloaded_session_data.get('metadata', {}).get('whitelisted', False))
+  self.assertTrue(reloaded_session_data.get('whitelisted', False)) # Check main flag too

-        # Verify session was added to internal data (assuming LogRegistry has a public 'data' attribute for testing)
-        self.assertIn(session_id, self.registry.data)
-        session_data = self.registry.data[session_id]
-        self.assertEqual(session_data['path'], path)
-        # Convert stored ISO string back to datetime for comparison
-        stored_start_time = datetime.fromisoformat(session_data['start_time'])
-        self.assertAlmostEqual(stored_start_time, start_time, delta=timedelta(seconds=1)) # Allow for minor time differences
-        self.assertFalse(session_data.get('whitelisted', False)) # Default to not whitelisted
-        self.assertIsNone(session_data.get('metadata'))
+ def test_is_session_whitelisted(self):
+  """Test checking if a session is whitelisted."""
+  session_id_whitelisted = "session-789-whitelisted"
+  path_w = "/path/to/session/789"
+  start_time_w = datetime.utcnow()
+  self.registry.register_session(session_id_whitelisted, path_w, start_time_w)
+  self.registry.update_session_metadata(session_id_whitelisted, 10, 0, 100, True, "Manual whitelist")
+  session_id_not_whitelisted = "session-abc-not-whitelisted"
+  path_nw = "/path/to/session/abc"
+  start_time_nw = datetime.utcnow()
+  self.registry.register_session(session_id_not_whitelisted, path_nw, start_time_nw)
+  # Test explicitly whitelisted session
+  self.assertTrue(self.registry.is_session_whitelisted(session_id_whitelisted))
+  # Test session registered but not updated, should default to not whitelisted
+  self.assertFalse(self.registry.is_session_whitelisted(session_id_not_whitelisted))
+  # Test for a non-existent session, should be treated as not whitelisted
+  self.assertFalse(self.registry.is_session_whitelisted("non-existent-session"))

-        # Verify data was written to the TOML file by reloading
-        reloaded_registry = LogRegistry(self.registry_path)
-        self.assertIn(session_id, reloaded_registry.data)
-        reloaded_session_data = reloaded_registry.data[session_id]
-        reloaded_start_time = datetime.fromisoformat(reloaded_session_data['start_time'])
-        self.assertAlmostEqual(reloaded_start_time, start_time, delta=timedelta(seconds=1))
-
-
-    def test_update_session_metadata(self):
-        """Test updating session metadata."""
-        session_id = "session-456"
-        path = "/path/to/session/456"
-        start_time = datetime.utcnow()
-        self.registry.register_session(session_id, path, start_time)
-
-        message_count = 100
-        errors = 5
-        size_kb = 1024
-        whitelisted = True
-        reason = "Automated process"
-
-        self.registry.update_session_metadata(session_id, message_count, errors, size_kb, whitelisted, reason)
-
-        # Verify metadata was updated in internal data
-        self.assertIn(session_id, self.registry.data)
-        session_data = self.registry.data[session_id]
-        self.assertIsNotNone(session_data.get('metadata'))
-        metadata = session_data['metadata']
-        self.assertEqual(metadata['message_count'], message_count)
-        self.assertEqual(metadata['errors'], errors)
-        self.assertEqual(metadata['size_kb'], size_kb)
-        self.assertEqual(metadata['whitelisted'], whitelisted)
-        self.assertEqual(metadata['reason'], reason)
-        # Also check if the whitelisted flag in the main session data is updated
-        self.assertTrue(session_data.get('whitelisted', False))
-
-        # Verify data was written to the TOML file by reloading
-        reloaded_registry = LogRegistry(self.registry_path)
-        self.assertIn(session_id, reloaded_registry.data)
-        reloaded_session_data = reloaded_registry.data[session_id]
-        self.assertTrue(reloaded_session_data.get('metadata', {}).get('whitelisted', False))
-        self.assertTrue(reloaded_session_data.get('whitelisted', False)) # Check main flag too
-
-    def test_is_session_whitelisted(self):
-        """Test checking if a session is whitelisted."""
-        session_id_whitelisted = "session-789-whitelisted"
-        path_w = "/path/to/session/789"
-        start_time_w = datetime.utcnow()
-        self.registry.register_session(session_id_whitelisted, path_w, start_time_w)
-        self.registry.update_session_metadata(session_id_whitelisted, 10, 0, 100, True, "Manual whitelist")
-
-        session_id_not_whitelisted = "session-abc-not-whitelisted"
-        path_nw = "/path/to/session/abc"
-        start_time_nw = datetime.utcnow()
-        self.registry.register_session(session_id_not_whitelisted, path_nw, start_time_nw)
-
-        # Test explicitly whitelisted session
-        self.assertTrue(self.registry.is_session_whitelisted(session_id_whitelisted))
-        # Test session registered but not updated, should default to not whitelisted
-        self.assertFalse(self.registry.is_session_whitelisted(session_id_not_whitelisted))
-
-        # Test for a non-existent session, should be treated as not whitelisted
-        self.assertFalse(self.registry.is_session_whitelisted("non-existent-session"))
-
-    def test_get_old_non_whitelisted_sessions(self):
-        """Test retrieving old, non-whitelisted sessions."""
-        now = datetime.utcnow()
-        # Define a cutoff time that is 7 days ago
-        cutoff_time = now - timedelta(days=7)
-
-        # Session 1: Old and not whitelisted
-        session_id_old_nw = "session-old-nw"
-        path_old_nw = "/path/to/session/old_nw"
-        start_time_old_nw = now - timedelta(days=10) # Older than cutoff
-        self.registry.register_session(session_id_old_nw, path_old_nw, start_time_old_nw)
-
-        # Session 2: Recent and not whitelisted
-        session_id_recent_nw = "session-recent-nw"
-        path_recent_nw = "/path/to/session/recent_nw"
-        start_time_recent_nw = now - timedelta(days=3) # Newer than cutoff
-        self.registry.register_session(session_id_recent_nw, path_recent_nw, start_time_recent_nw)
-
-        # Session 3: Old and whitelisted
-        session_id_old_w = "session-old-w"
-        path_old_w = "/path/to/session/old_w"
-        start_time_old_w = now - timedelta(days=15) # Older than cutoff
-        self.registry.register_session(session_id_old_w, path_old_w, start_time_old_w)
-        self.registry.update_session_metadata(session_id_old_w, 50, 0, 500, True, "Whitelisted")
-
-        # Session 4: Old, not whitelisted explicitly, but with metadata that doesn't set 'whitelisted' to True.
-        # The 'is_session_whitelisted' logic should correctly interpret this as not whitelisted.
-        session_id_old_nw_incomplete = "session-old-nw-incomplete"
-        path_old_nw_incomplete = "/path/to/session/old_nw_incomplete"
-        start_time_old_nw_incomplete = now - timedelta(days=20) # Older than cutoff
-        self.registry.register_session(session_id_old_nw_incomplete, path_old_nw_incomplete, start_time_old_nw_incomplete)
-        # Update with some metadata, but set 'whitelisted' to False explicitly
-        self.registry.update_session_metadata(session_id_old_nw_incomplete, 10, 0, 100, False, "Manual review needed") 
-
-        # Get sessions older than cutoff_time and not whitelisted
-        old_sessions = self.registry.get_old_non_whitelisted_sessions(cutoff_time)
-
-        # Collect session IDs from the result
-        found_session_ids = {s['session_id'] for s in old_sessions}
-
-        # Expected: session_id_old_nw (old, not whitelisted) and session_id_old_nw_incomplete (old, explicitly not whitelisted)
-        self.assertIn(session_id_old_nw, found_session_ids)
-        self.assertIn(session_id_old_nw_incomplete, found_session_ids)
-
-        # Not expected: session_id_recent_nw (too recent), session_id_old_w (whitelisted)
-        self.assertNotIn(session_id_recent_nw, found_session_ids)
-        self.assertNotIn(session_id_old_w, found_session_ids)
-
-        # Ensure only the expected sessions are in the result
-        self.assertEqual(len(found_session_ids), 2)
-
-        # Test with a cutoff that includes all sessions, and ensure only non-whitelisted are returned
-        future_cutoff = now + timedelta(days=1) # All sessions are older than this
-        all_old_sessions = self.registry.get_old_non_whitelisted_sessions(future_cutoff)
-        all_found_session_ids = {s['session_id'] for s in all_old_sessions}
-
-        # Expected: session_id_old_nw, session_id_old_nw_incomplete, AND session_id_recent_nw
-        # Not expected: session_id_old_w (whitelisted)
-        self.assertEqual(len(all_found_session_ids), 3)
-        self.assertIn(session_id_old_nw, all_found_session_ids)
-        self.assertIn(session_id_old_nw_incomplete, all_found_session_ids)
-        self.assertIn(session_id_recent_nw, all_found_session_ids)
-        self.assertNotIn(session_id_old_w, all_found_session_ids)
+ def test_get_old_non_whitelisted_sessions(self):
+  """Test retrieving old, non-whitelisted sessions."""
+  now = datetime.utcnow()
+  # Define a cutoff time that is 7 days ago
+  cutoff_time = now - timedelta(days=7)
+  # Session 1: Old and not whitelisted
+  session_id_old_nw = "session-old-nw"
+  path_old_nw = "/path/to/session/old_nw"
+  start_time_old_nw = now - timedelta(days=10) # Older than cutoff
+  self.registry.register_session(session_id_old_nw, path_old_nw, start_time_old_nw)
+  # Session 2: Recent and not whitelisted
+  session_id_recent_nw = "session-recent-nw"
+  path_recent_nw = "/path/to/session/recent_nw"
+  start_time_recent_nw = now - timedelta(days=3) # Newer than cutoff
+  self.registry.register_session(session_id_recent_nw, path_recent_nw, start_time_recent_nw)
+  # Session 3: Old and whitelisted
+  session_id_old_w = "session-old-w"
+  path_old_w = "/path/to/session/old_w"
+  start_time_old_w = now - timedelta(days=15) # Older than cutoff
+  self.registry.register_session(session_id_old_w, path_old_w, start_time_old_w)
+  self.registry.update_session_metadata(session_id_old_w, 50, 0, 500, True, "Whitelisted")
+  # Session 4: Old, not whitelisted explicitly, but with metadata that doesn't set 'whitelisted' to True.
+  # The 'is_session_whitelisted' logic should correctly interpret this as not whitelisted.
+  session_id_old_nw_incomplete = "session-old-nw-incomplete"
+  path_old_nw_incomplete = "/path/to/session/old_nw_incomplete"
+  start_time_old_nw_incomplete = now - timedelta(days=20) # Older than cutoff
+  self.registry.register_session(session_id_old_nw_incomplete, path_old_nw_incomplete, start_time_old_nw_incomplete)
+  # Update with some metadata, but set 'whitelisted' to False explicitly
+  self.registry.update_session_metadata(session_id_old_nw_incomplete, 10, 0, 100, False, "Manual review needed") 
+  # Get sessions older than cutoff_time and not whitelisted
+  old_sessions = self.registry.get_old_non_whitelisted_sessions(cutoff_time)
+  # Collect session IDs from the result
+  found_session_ids = {s['session_id'] for s in old_sessions}
+  # Expected: session_id_old_nw (old, not whitelisted) and session_id_old_nw_incomplete (old, explicitly not whitelisted)
+  self.assertIn(session_id_old_nw, found_session_ids)
+  self.assertIn(session_id_old_nw_incomplete, found_session_ids)
+  # Not expected: session_id_recent_nw (too recent), session_id_old_w (whitelisted)
+  self.assertNotIn(session_id_recent_nw, found_session_ids)
+  self.assertNotIn(session_id_old_w, found_session_ids)
+  # Ensure only the expected sessions are in the result
+  self.assertEqual(len(found_session_ids), 2)
+  # Test with a cutoff that includes all sessions, and ensure only non-whitelisted are returned
+  future_cutoff = now + timedelta(days=1) # All sessions are older than this
+  all_old_sessions = self.registry.get_old_non_whitelisted_sessions(future_cutoff)
+  all_found_session_ids = {s['session_id'] for s in all_old_sessions}
+  # Expected: session_id_old_nw, session_id_old_nw_incomplete, AND session_id_recent_nw
+  # Not expected: session_id_old_w (whitelisted)
+  self.assertEqual(len(all_found_session_ids), 3)
+  self.assertIn(session_id_old_nw, all_found_session_ids)
+  self.assertIn(session_id_old_nw_incomplete, all_found_session_ids)
+  self.assertIn(session_id_recent_nw, all_found_session_ids)
+  self.assertNotIn(session_id_old_w, all_found_session_ids)
@@ -11,69 +11,56 @@ from log_pruner import LogPruner

@pytest.fixture
 def e2e_setup(tmp_path, monkeypatch):
-    # Ensure closed before starting
-    session_logger.close_session()
-    monkeypatch.setattr(session_logger, "_comms_fh", None)
-    
-    # Mock _LOG_DIR and _SCRIPTS_DIR in session_logger
-    original_log_dir = session_logger._LOG_DIR
-    session_logger._LOG_DIR = tmp_path / "logs"
-    monkeypatch.setattr(session_logger, "_LOG_DIR", tmp_path / "logs")
-    session_logger._LOG_DIR.mkdir(parents=True, exist_ok=True)
-    
-    original_scripts_dir = session_logger._SCRIPTS_DIR
-    session_logger._SCRIPTS_DIR = tmp_path / "scripts" / "generated"
-    monkeypatch.setattr(session_logger, "_SCRIPTS_DIR", tmp_path / "scripts" / "generated")
-    session_logger._SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
-    
-    yield tmp_path
-    
-    # Cleanup
-    session_logger.close_session()
-    session_logger._LOG_DIR = original_log_dir
-    session_logger._SCRIPTS_DIR = original_scripts_dir
+# Ensure closed before starting
+ session_logger.close_session()
+ monkeypatch.setattr(session_logger, "_comms_fh", None)
+ # Mock _LOG_DIR and _SCRIPTS_DIR in session_logger
+ original_log_dir = session_logger._LOG_DIR
+ session_logger._LOG_DIR = tmp_path / "logs"
+ monkeypatch.setattr(session_logger, "_LOG_DIR", tmp_path / "logs")
+ session_logger._LOG_DIR.mkdir(parents=True, exist_ok=True)
+ original_scripts_dir = session_logger._SCRIPTS_DIR
+ session_logger._SCRIPTS_DIR = tmp_path / "scripts" / "generated"
+ monkeypatch.setattr(session_logger, "_SCRIPTS_DIR", tmp_path / "scripts" / "generated")
+ session_logger._SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
+ yield tmp_path
+ # Cleanup
+ session_logger.close_session()
+ session_logger._LOG_DIR = original_log_dir
+ session_logger._SCRIPTS_DIR = original_scripts_dir

 def test_logging_e2e(e2e_setup):
-    tmp_path = e2e_setup
-    logs_dir = tmp_path / "logs"
-
-    # Step 1: Initialize (open_session)
-    session_logger.open_session(label="E2E_Test")
-    session_id = session_logger._session_id
-    session_dir = session_logger._session_dir
-
-    # Step 2: Simulate logs (write 'ERROR' to 'comms.log')
-    # Use log_comms which writes to comms.log
-    session_logger.log_comms({"level": "ERROR", "message": "Something went wrong"})
-
-    # Step 3: Shutdown (close_session)
-    session_logger.close_session()
-
-    # Step 4: Verify 'log_registry.toml' has the session whitelisted due to 'ERROR'
-    registry_path = logs_dir / "log_registry.toml"
-    registry = LogRegistry(str(registry_path))
-    assert registry.is_session_whitelisted(session_id), "Current session should be whitelisted due to ERROR keyword"
-
-    # Step 5: Simulate an OLD insignificant session in the registry and directory
-    old_session_id = "20200101_120000_OLD"
-    old_session_dir = logs_dir / old_session_id
-    old_session_dir.mkdir()
-    (old_session_dir / "comms.log").write_text("nothing special") # < 2KB
-
-    old_start_time = datetime.now() - timedelta(days=2)
-    registry.register_session(old_session_id, str(old_session_dir), old_start_time)
-
-    # Step 6: Trigger 'LogPruner.prune()'
-    pruner = LogPruner(registry, str(logs_dir))
-    pruner.prune()
-
-    # Step 7: Verify the OLD session is deleted but the NEW (whitelisted) session is kept.
-    assert not old_session_dir.exists(), "Old insignificant session should have been pruned"
-    assert session_dir.exists(), "New whitelisted session should have been kept"
-
-    # Extra check: Whitelisted sessions should be kept even if old
-    # Manually backdate the current session
-    registry.data[session_id]['start_time'] = (datetime.now() - timedelta(days=2)).isoformat()
-    registry.save_registry()
-    pruner.prune()
-    assert session_dir.exists(), "Whitelisted session should be kept even if it is old and small"
+ tmp_path = e2e_setup
+ logs_dir = tmp_path / "logs"
+ # Step 1: Initialize (open_session)
+ session_logger.open_session(label="E2E_Test")
+ session_id = session_logger._session_id
+ session_dir = session_logger._session_dir
+ # Step 2: Simulate logs (write 'ERROR' to 'comms.log')
+ # Use log_comms which writes to comms.log
+ session_logger.log_comms({"level": "ERROR", "message": "Something went wrong"})
+ # Step 3: Shutdown (close_session)
+ session_logger.close_session()
+ # Step 4: Verify 'log_registry.toml' has the session whitelisted due to 'ERROR'
+ registry_path = logs_dir / "log_registry.toml"
+ registry = LogRegistry(str(registry_path))
+ assert registry.is_session_whitelisted(session_id), "Current session should be whitelisted due to ERROR keyword"
+ # Step 5: Simulate an OLD insignificant session in the registry and directory
+ old_session_id = "20200101_120000_OLD"
+ old_session_dir = logs_dir / old_session_id
+ old_session_dir.mkdir()
+ (old_session_dir / "comms.log").write_text("nothing special") # < 2KB
+ old_start_time = datetime.now() - timedelta(days=2)
+ registry.register_session(old_session_id, str(old_session_dir), old_start_time)
+ # Step 6: Trigger 'LogPruner.prune()'
+ pruner = LogPruner(registry, str(logs_dir))
+ pruner.prune()
+ # Step 7: Verify the OLD session is deleted but the NEW (whitelisted) session is kept.
+ assert not old_session_dir.exists(), "Old insignificant session should have been pruned"
+ assert session_dir.exists(), "New whitelisted session should have been kept"
+ # Extra check: Whitelisted sessions should be kept even if old
+ # Manually backdate the current session
+ registry.data[session_id]['start_time'] = (datetime.now() - timedelta(days=2)).isoformat()
+ registry.save_registry()
+ pruner.prune()
+ assert session_dir.exists(), "Whitelisted session should be kept even if it is old and small"
@@ -9,11 +9,10 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 import mcp_client

 def test_mcp_perf_tool_retrieval():
-    # Test that the MCP tool can call performance_monitor metrics
-    mock_metrics = {"fps": 60, "last_frame_time_ms": 16.6}
-    
-    # Simulate tool call by patching the callback
-    with patch('mcp_client.perf_monitor_callback', return_value=mock_metrics):
-        result = mcp_client.get_ui_performance()
-        assert "60" in result
-        assert "16.6" in result
+# Test that the MCP tool can call performance_monitor metrics
+ mock_metrics = {"fps": 60, "last_frame_time_ms": 16.6}
+ # Simulate tool call by patching the callback
+ with patch('mcp_client.perf_monitor_callback', return_value=mock_metrics):
+  result = mcp_client.get_ui_performance()
+  assert "60" in result
+  assert "16.6" in result
@@ -4,63 +4,55 @@ from gui_2 import App

@pytest.fixture
 def app_instance():
-    # We patch the dependencies of App.__init__ to avoid side effects
-    with (
-        patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
-        patch('gui_2.save_config'),
-        patch('gui_2.project_manager') as mock_pm,
-        patch('gui_2.session_logger'),
-        patch('gui_2.immapp.run'),
-        patch.object(App, '_load_active_project'),
-        patch.object(App, '_fetch_models'),
-        patch.object(App, '_load_fonts'),
-        patch.object(App, '_post_init')
-    ):
-        app = App()
-        # Ensure project and ui_files_base_dir are set for _refresh_from_project
-        app.project = {}
-        app.ui_files_base_dir = "."
-        # Return the app and the mock_pm for use in tests
-        yield app, mock_pm
+# We patch the dependencies of App.__init__ to avoid side effects
+ with (
+  patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
+  patch('gui_2.save_config'),
+  patch('gui_2.project_manager') as mock_pm,
+  patch('gui_2.session_logger'),
+  patch('gui_2.immapp.run'),
+  patch.object(App, '_load_active_project'),
+  patch.object(App, '_fetch_models'),
+  patch.object(App, '_load_fonts'),
+  patch.object(App, '_post_init')
+ ):
+  app = App()
+  # Ensure project and ui_files_base_dir are set for _refresh_from_project
+  app.project = {}
+  app.ui_files_base_dir = "."
+  # Return the app and the mock_pm for use in tests
+  yield app, mock_pm

 def test_mma_dashboard_refresh(app_instance):
-    app, mock_pm = app_instance
-    
-    # 1. Define mock tracks
-    mock_tracks = [
-        MagicMock(id="track_1", description="Track 1"),
-        MagicMock(id="track_2", description="Track 2")
-    ]
-    
-    # 2. Patch get_all_tracks to return our mock list
-    mock_pm.get_all_tracks.return_value = mock_tracks
-    
-    # 3. Call _refresh_from_project
-    app._refresh_from_project()
-    
-    # 4. Verify that app.tracks contains the mock tracks
-    assert hasattr(app, 'tracks'), "App instance should have a 'tracks' attribute"
-    assert app.tracks == mock_tracks
-    assert len(app.tracks) == 2
-    assert app.tracks[0].id == "track_1"
-    assert app.tracks[1].id == "track_2"
-    
-    # Verify get_all_tracks was called with the correct base_dir
-    mock_pm.get_all_tracks.assert_called_with(app.ui_files_base_dir)
+ app, mock_pm = app_instance
+ # 1. Define mock tracks
+ mock_tracks = [
+  MagicMock(id="track_1", description="Track 1"),
+  MagicMock(id="track_2", description="Track 2")
+ ]
+ # 2. Patch get_all_tracks to return our mock list
+ mock_pm.get_all_tracks.return_value = mock_tracks
+ # 3. Call _refresh_from_project
+ app._refresh_from_project()
+ # 4. Verify that app.tracks contains the mock tracks
+ assert hasattr(app, 'tracks'), "App instance should have a 'tracks' attribute"
+ assert app.tracks == mock_tracks
+ assert len(app.tracks) == 2
+ assert app.tracks[0].id == "track_1"
+ assert app.tracks[1].id == "track_2"
+ # Verify get_all_tracks was called with the correct base_dir
+ mock_pm.get_all_tracks.assert_called_with(app.ui_files_base_dir)

 def test_mma_dashboard_initialization_refresh(app_instance):
-    """
+ """
    Checks that _refresh_from_project is called during initialization if 
    _load_active_project is NOT mocked to skip it (but here it IS mocked in fixture).
    This test verifies that calling it manually works as expected for initialization scenarios.
    """
-    app, mock_pm = app_instance
-    
-    mock_tracks = [MagicMock(id="init_track", description="Initial Track")]
-    mock_pm.get_all_tracks.return_value = mock_tracks
-    
-    # Simulate the refresh that would happen during a project load
-    app._refresh_from_project()
-    
-    assert app.tracks == mock_tracks
-    assert app.tracks[0].id == "init_track"
+ app, mock_pm = app_instance
+ mock_tracks = [MagicMock(id="init_track", description="Initial Track")]
+ mock_pm.get_all_tracks.return_value = mock_tracks
+ # Simulate the refresh that would happen during a project load
+ app._refresh_from_project()
+ assert app.tracks == mock_tracks
+ assert app.tracks[0].id == "init_track"
@@ -2,175 +2,159 @@ import pytest
 from models import Ticket, Track, WorkerContext

 def test_ticket_instantiation():
-    """
+ """
    Verifies that a Ticket can be instantiated with its required fields:
    id, description, status, assigned_to.
    """
-    ticket_id = "T1"
-    description = "Implement surgical code changes"
-    status = "todo"
-    assigned_to = "tier3-worker"
-    
-    ticket = Ticket(
-        id=ticket_id,
-        description=description,
-        status=status,
-        assigned_to=assigned_to
-    )
-    
-    assert ticket.id == ticket_id
-    assert ticket.description == description
-    assert ticket.status == status
-    assert ticket.assigned_to == assigned_to
-    assert ticket.depends_on == []
+ ticket_id = "T1"
+ description = "Implement surgical code changes"
+ status = "todo"
+ assigned_to = "tier3-worker"
+ ticket = Ticket(
+  id=ticket_id,
+  description=description,
+  status=status,
+  assigned_to=assigned_to
+ )
+ assert ticket.id == ticket_id
+ assert ticket.description == description
+ assert ticket.status == status
+ assert ticket.assigned_to == assigned_to
+ assert ticket.depends_on == []

 def test_ticket_with_dependencies():
-    """
+ """
    Verifies that a Ticket can store dependencies.
    """
-    ticket = Ticket(
-        id="T2",
-        description="Write code",
-        status="todo",
-        assigned_to="worker-1",
-        depends_on=["T1"]
-    )
-    assert ticket.depends_on == ["T1"]
+ ticket = Ticket(
+  id="T2",
+  description="Write code",
+  status="todo",
+  assigned_to="worker-1",
+  depends_on=["T1"]
+ )
+ assert ticket.depends_on == ["T1"]

 def test_track_instantiation():
-    """
+ """
    Verifies that a Track can be instantiated with its required fields:
    id, description, and a list of Tickets.
    """
-    ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
-    ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="b")
-    
-    track_id = "TRACK-1"
-    track_desc = "Implement MMA Models"
-    tickets = [ticket1, ticket2]
-    
-    track = Track(
-        id=track_id,
-        description=track_desc,
-        tickets=tickets
-    )
-    
-    assert track.id == track_id
-    assert track.description == track_desc
-    assert len(track.tickets) == 2
-    assert track.tickets[0].id == "T1"
-    assert track.tickets[1].id == "T2"
+ ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
+ ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="b")
+ track_id = "TRACK-1"
+ track_desc = "Implement MMA Models"
+ tickets = [ticket1, ticket2]
+ track = Track(
+  id=track_id,
+  description=track_desc,
+  tickets=tickets
+ )
+ assert track.id == track_id
+ assert track.description == track_desc
+ assert len(track.tickets) == 2
+ assert track.tickets[0].id == "T1"
+ assert track.tickets[1].id == "T2"

 def test_track_can_handle_empty_tickets():
-    """
+ """
    Verifies that a Track can be instantiated with an empty list of tickets.
    """
-    track = Track(id="TRACK-2", description="Empty Track", tickets=[])
-    assert track.tickets == []
+ track = Track(id="TRACK-2", description="Empty Track", tickets=[])
+ assert track.tickets == []

 def test_worker_context_instantiation():
-    """
+ """
    Verifies that a WorkerContext can be instantiated with ticket_id, 
    model_name, and messages.
    """
-    ticket_id = "T1"
-    model_name = "gemini-2.0-flash-lite"
-    messages = [
-        {"role": "user", "content": "Hello"},
-        {"role": "assistant", "content": "Hi there!"}
-    ]
-    
-    context = WorkerContext(
-        ticket_id=ticket_id,
-        model_name=model_name,
-        messages=messages
-    )
-    
-    assert context.ticket_id == ticket_id
-    assert context.model_name == model_name
-    assert context.messages == messages
+ ticket_id = "T1"
+ model_name = "gemini-2.0-flash-lite"
+ messages = [
+  {"role": "user", "content": "Hello"},
+  {"role": "assistant", "content": "Hi there!"}
+ ]
+ context = WorkerContext(
+  ticket_id=ticket_id,
+  model_name=model_name,
+  messages=messages
+ )
+ assert context.ticket_id == ticket_id
+ assert context.model_name == model_name
+ assert context.messages == messages

 def test_ticket_mark_blocked():
-    """
+ """
    Verifies that ticket.mark_blocked(reason) sets the status to 'blocked'.
    Note: The reason field might need to be added to the Ticket class.
    """
-    ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
-    ticket.mark_blocked("Waiting for API key")
-    assert ticket.status == "blocked"
+ ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
+ ticket.mark_blocked("Waiting for API key")
+ assert ticket.status == "blocked"

 def test_ticket_mark_complete():
-    """
+ """
    Verifies that ticket.mark_complete() sets the status to 'completed'.
    """
-    ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
-    ticket.mark_complete()
-    assert ticket.status == "completed"
+ ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
+ ticket.mark_complete()
+ assert ticket.status == "completed"

 def test_track_get_executable_tickets():
-    """
+ """
    Verifies that track.get_executable_tickets() returns only 'todo' tickets
    whose dependencies are all 'completed'.
    """
-    # T1: todo, no deps -> executable
-    t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="a")
-    # T2: todo, deps [T1] -> not executable (T1 is todo)
-    t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="a", depends_on=["T1"])
-    # T3: todo, deps [T4] -> not executable (T4 is blocked)
-    t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="a", depends_on=["T4"])
-    # T4: blocked, no deps -> not executable (not 'todo')
-    t4 = Ticket(id="T4", description="T4", status="blocked", assigned_to="a")
-    # T5: completed, no deps -> not executable (not 'todo')
-    t5 = Ticket(id="T5", description="T5", status="completed", assigned_to="a")
-    # T6: todo, deps [T5] -> executable (T5 is completed)
-    t6 = Ticket(id="T6", description="T6", status="todo", assigned_to="a", depends_on=["T5"])
-    
-    track = Track(id="TR1", description="Track 1", tickets=[t1, t2, t3, t4, t5, t6])
-    
-    executable = track.get_executable_tickets()
-    executable_ids = [t.id for t in executable]
-    
-    assert "T1" in executable_ids
-    assert "T6" in executable_ids
-    assert len(executable_ids) == 2
+ # T1: todo, no deps -> executable
+ t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="a")
+ # T2: todo, deps [T1] -> not executable (T1 is todo)
+ t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="a", depends_on=["T1"])
+ # T3: todo, deps [T4] -> not executable (T4 is blocked)
+ t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="a", depends_on=["T4"])
+ # T4: blocked, no deps -> not executable (not 'todo')
+ t4 = Ticket(id="T4", description="T4", status="blocked", assigned_to="a")
+ # T5: completed, no deps -> not executable (not 'todo')
+ t5 = Ticket(id="T5", description="T5", status="completed", assigned_to="a")
+ # T6: todo, deps [T5] -> executable (T5 is completed)
+ t6 = Ticket(id="T6", description="T6", status="todo", assigned_to="a", depends_on=["T5"])
+ track = Track(id="TR1", description="Track 1", tickets=[t1, t2, t3, t4, t5, t6])
+ executable = track.get_executable_tickets()
+ executable_ids = [t.id for t in executable]
+ assert "T1" in executable_ids
+ assert "T6" in executable_ids
+ assert len(executable_ids) == 2

 def test_track_get_executable_tickets_complex():
-    """
+ """
    Verifies executable tickets with complex dependency chains.
    Chain: T1 (comp) -> T2 (todo) -> T3 (todo)
           T4 (comp) -> T3
           T5 (todo) -> T3
    """
-    t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="a")
-    t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="a", depends_on=["T1"])
-    t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="a", depends_on=["T2", "T4", "T5"])
-    t4 = Ticket(id="T4", description="T4", status="completed", assigned_to="a")
-    t5 = Ticket(id="T5", description="T5", status="todo", assigned_to="a")
-    
-    track = Track(id="TR1", description="Track 1", tickets=[t1, t2, t3, t4, t5])
-    
-    # At this point:
-    # T1 is completed
-    # T4 is completed
-    # T2 is todo, depends on T1 (completed) -> Executable
-    # T5 is todo, no deps -> Executable
-    # T3 is todo, depends on T2 (todo), T4 (completed), T5 (todo) -> Not executable
-    
-    executable = track.get_executable_tickets()
-    executable_ids = sorted([t.id for t in executable])
-    
-    assert executable_ids == ["T2", "T5"]
-    
-    # Mark T2 complete
-    t2.mark_complete()
-    # T3 still depends on T5
-    executable = track.get_executable_tickets()
-    executable_ids = sorted([t.id for t in executable])
-    assert executable_ids == ["T5"]
-    
-    # Mark T5 complete
-    t5.mark_complete()
-    # Now T3 should be executable
-    executable = track.get_executable_tickets()
-    executable_ids = sorted([t.id for t in executable])
-    assert executable_ids == ["T3"]
+ t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="a")
+ t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="a", depends_on=["T1"])
+ t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="a", depends_on=["T2", "T4", "T5"])
+ t4 = Ticket(id="T4", description="T4", status="completed", assigned_to="a")
+ t5 = Ticket(id="T5", description="T5", status="todo", assigned_to="a")
+ track = Track(id="TR1", description="Track 1", tickets=[t1, t2, t3, t4, t5])
+ # At this point:
+ # T1 is completed
+ # T4 is completed
+ # T2 is todo, depends on T1 (completed) -> Executable
+ # T5 is todo, no deps -> Executable
+ # T3 is todo, depends on T2 (todo), T4 (completed), T5 (todo) -> Not executable
+ executable = track.get_executable_tickets()
+ executable_ids = sorted([t.id for t in executable])
+ assert executable_ids == ["T2", "T5"]
+ # Mark T2 complete
+ t2.mark_complete()
+ # T3 still depends on T5
+ executable = track.get_executable_tickets()
+ executable_ids = sorted([t.id for t in executable])
+ assert executable_ids == ["T5"]
+ # Mark T5 complete
+ t5.mark_complete()
+ # Now T3 should be executable
+ executable = track.get_executable_tickets()
+ executable_ids = sorted([t.id for t in executable])
+ assert executable_ids == ["T3"]
@@ -7,141 +7,126 @@ from gui_2 import App

@pytest.fixture
 def app_instance():
-    with (
-        patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
-        patch('gui_2.save_config'),
-        patch('gui_2.project_manager'),
-        patch('gui_2.session_logger'),
-        patch('gui_2.immapp.run'),
-        patch.object(App, '_load_active_project'),
-        patch.object(App, '_fetch_models'),
-        patch.object(App, '_load_fonts'),
-        patch.object(App, '_post_init')
-    ):
-        app = App()
-        # Initialize the new state variables if they aren't there yet (they won't be until we implement them)
-        if not hasattr(app, 'ui_epic_input'): app.ui_epic_input = ""
-        if not hasattr(app, 'proposed_tracks'): app.proposed_tracks = []
-        if not hasattr(app, '_show_track_proposal_modal'): app._show_track_proposal_modal = False
-        yield app
+ with (
+  patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
+  patch('gui_2.save_config'),
+  patch('gui_2.project_manager'),
+  patch('gui_2.session_logger'),
+  patch('gui_2.immapp.run'),
+  patch.object(App, '_load_active_project'),
+  patch.object(App, '_fetch_models'),
+  patch.object(App, '_load_fonts'),
+  patch.object(App, '_post_init')
+ ):
+  app = App()
+  # Initialize the new state variables if they aren't there yet (they won't be until we implement them)
+  if not hasattr(app, 'ui_epic_input'): app.ui_epic_input = ""
+  if not hasattr(app, 'proposed_tracks'): app.proposed_tracks = []
+  if not hasattr(app, '_show_track_proposal_modal'): app._show_track_proposal_modal = False
+  yield app

 def test_mma_ui_state_initialization(app_instance):
-    """Verifies that the new MMA UI state variables are initialized correctly."""
-    assert hasattr(app_instance, 'ui_epic_input')
-    assert hasattr(app_instance, 'proposed_tracks')
-    assert hasattr(app_instance, '_show_track_proposal_modal')
-    assert hasattr(app_instance, 'mma_streams')
-    assert app_instance.ui_epic_input == ""
-    assert app_instance.proposed_tracks == []
-    assert app_instance._show_track_proposal_modal is False
-    assert app_instance.mma_streams == {}
+ """Verifies that the new MMA UI state variables are initialized correctly."""
+ assert hasattr(app_instance, 'ui_epic_input')
+ assert hasattr(app_instance, 'proposed_tracks')
+ assert hasattr(app_instance, '_show_track_proposal_modal')
+ assert hasattr(app_instance, 'mma_streams')
+ assert app_instance.ui_epic_input == ""
+ assert app_instance.proposed_tracks == []
+ assert app_instance._show_track_proposal_modal is False
+ assert app_instance.mma_streams == {}

 def test_process_pending_gui_tasks_show_track_proposal(app_instance):
-    """Verifies that the 'show_track_proposal' action correctly updates the UI state."""
-    mock_tracks = [{"id": "track_1", "title": "Test Track"}]
-    task = {
-        "action": "show_track_proposal",
-        "payload": mock_tracks
-    }
-    app_instance._pending_gui_tasks.append(task)
-    
-    app_instance._process_pending_gui_tasks()
-    
-    assert app_instance.proposed_tracks == mock_tracks
-    assert app_instance._show_track_proposal_modal is True
+ """Verifies that the 'show_track_proposal' action correctly updates the UI state."""
+ mock_tracks = [{"id": "track_1", "title": "Test Track"}]
+ task = {
+  "action": "show_track_proposal",
+  "payload": mock_tracks
+ }
+ app_instance._pending_gui_tasks.append(task)
+ app_instance._process_pending_gui_tasks()
+ assert app_instance.proposed_tracks == mock_tracks
+ assert app_instance._show_track_proposal_modal is True

 def test_cb_plan_epic_launches_thread(app_instance):
-    """Verifies that _cb_plan_epic launches a thread and eventually queues a task."""
-    app_instance.ui_epic_input = "Develop a new feature"
-    app_instance.active_project_path = "test_project.toml"
-    
-    mock_tracks = [{"id": "track_1", "title": "Test Track"}]
-    
-    with (
-        patch('orchestrator_pm.get_track_history_summary', return_value="History summary") as mock_get_history, 
-        patch('orchestrator_pm.generate_tracks', return_value=mock_tracks) as mock_gen_tracks, 
-        patch('aggregate.build_file_items', return_value=[]) as mock_build_files
-    ):
-        
-        # We need to mock project_manager.flat_config and project_manager.load_project
-        with (
-            patch('project_manager.load_project', return_value={}), 
-            patch('project_manager.flat_config', return_value={})
-        ):
-            
-            app_instance._cb_plan_epic()
-            
-            # Wait for the background thread to finish (it should be quick with mocks)
-            max_wait = 5
-            start_time = time.time()
-            while len(app_instance._pending_gui_tasks) < 2 and time.time() - start_time < max_wait:
-                time.sleep(0.1)
-                
-            assert len(app_instance._pending_gui_tasks) == 2
-            
-            task1 = app_instance._pending_gui_tasks[0]
-            assert task1['action'] == 'handle_ai_response'
-            assert task1['payload']['stream_id'] == 'Tier 1'
-            assert task1['payload']['text'] == json.dumps(mock_tracks, indent=2)
-            
-            task2 = app_instance._pending_gui_tasks[1]
-            assert task2['action'] == 'show_track_proposal'
-            assert task2['payload'] == mock_tracks
-            
-            mock_get_history.assert_called_once()
-            mock_gen_tracks.assert_called_once()
+ """Verifies that _cb_plan_epic launches a thread and eventually queues a task."""
+ app_instance.ui_epic_input = "Develop a new feature"
+ app_instance.active_project_path = "test_project.toml"
+ mock_tracks = [{"id": "track_1", "title": "Test Track"}]
+ with (
+  patch('orchestrator_pm.get_track_history_summary', return_value="History summary") as mock_get_history, 
+  patch('orchestrator_pm.generate_tracks', return_value=mock_tracks) as mock_gen_tracks, 
+  patch('aggregate.build_file_items', return_value=[]) as mock_build_files
+ ):
+ # We need to mock project_manager.flat_config and project_manager.load_project
+  with (
+   patch('project_manager.load_project', return_value={}), 
+   patch('project_manager.flat_config', return_value={})
+  ):
+   app_instance._cb_plan_epic()
+   # Wait for the background thread to finish (it should be quick with mocks)
+   max_wait = 5
+   start_time = time.time()
+   while len(app_instance._pending_gui_tasks) < 2 and time.time() - start_time < max_wait:
+    time.sleep(0.1)
+   assert len(app_instance._pending_gui_tasks) == 2
+   task1 = app_instance._pending_gui_tasks[0]
+   assert task1['action'] == 'handle_ai_response'
+   assert task1['payload']['stream_id'] == 'Tier 1'
+   assert task1['payload']['text'] == json.dumps(mock_tracks, indent=2)
+   task2 = app_instance._pending_gui_tasks[1]
+   assert task2['action'] == 'show_track_proposal'
+   assert task2['payload'] == mock_tracks
+   mock_get_history.assert_called_once()
+   mock_gen_tracks.assert_called_once()

 def test_process_pending_gui_tasks_mma_spawn_approval(app_instance):
-    """Verifies that the 'mma_spawn_approval' action correctly updates the UI state."""
-    task = {
-        "action": "mma_spawn_approval",
-        "ticket_id": "T1",
-        "role": "Tier 3 Worker",
-        "prompt": "Test Prompt",
-        "context_md": "Test Context",
-        "dialog_container": [None]
-    }
-    app_instance._pending_gui_tasks.append(task)
-    
-    app_instance._process_pending_gui_tasks()
-    
-    assert app_instance._pending_mma_spawn == task
-    assert app_instance._mma_spawn_prompt == "Test Prompt"
-    assert app_instance._mma_spawn_context == "Test Context"
-    assert app_instance._mma_spawn_open is True
-    assert app_instance._mma_spawn_edit_mode is False
-    assert task["dialog_container"][0] is not None
-    assert task["dialog_container"][0]._ticket_id == "T1"
+ """Verifies that the 'mma_spawn_approval' action correctly updates the UI state."""
+ task = {
+  "action": "mma_spawn_approval",
+  "ticket_id": "T1",
+  "role": "Tier 3 Worker",
+  "prompt": "Test Prompt",
+  "context_md": "Test Context",
+  "dialog_container": [None]
+ }
+ app_instance._pending_gui_tasks.append(task)
+ app_instance._process_pending_gui_tasks()
+ assert app_instance._pending_mma_spawn == task
+ assert app_instance._mma_spawn_prompt == "Test Prompt"
+ assert app_instance._mma_spawn_context == "Test Context"
+ assert app_instance._mma_spawn_open is True
+ assert app_instance._mma_spawn_edit_mode is False
+ assert task["dialog_container"][0] is not None
+ assert task["dialog_container"][0]._ticket_id == "T1"

 def test_handle_ai_response_with_stream_id(app_instance):
-    """Verifies routing to mma_streams."""
-    task = {
-        "action": "handle_ai_response",
-        "payload": {
-            "text": "Tier 1 Strategy Content",
-            "stream_id": "Tier 1",
-            "status": "Thinking..."
-        }
-    }
-    app_instance._pending_gui_tasks.append(task)
-    app_instance._process_pending_gui_tasks()
-    
-    assert app_instance.mma_streams.get("Tier 1") == "Tier 1 Strategy Content"
-    assert app_instance.ai_status == "Thinking..."
-    assert app_instance.ai_response == ""
+ """Verifies routing to mma_streams."""
+ task = {
+  "action": "handle_ai_response",
+  "payload": {
+   "text": "Tier 1 Strategy Content",
+   "stream_id": "Tier 1",
+   "status": "Thinking..."
+  }
+ }
+ app_instance._pending_gui_tasks.append(task)
+ app_instance._process_pending_gui_tasks()
+ assert app_instance.mma_streams.get("Tier 1") == "Tier 1 Strategy Content"
+ assert app_instance.ai_status == "Thinking..."
+ assert app_instance.ai_response == ""

 def test_handle_ai_response_fallback(app_instance):
-    """Verifies fallback to ai_response when stream_id is missing."""
-    task = {
-        "action": "handle_ai_response",
-        "payload": {
-            "text": "Regular AI Response",
-            "status": "done"
-        }
-    }
-    app_instance._pending_gui_tasks.append(task)
-    app_instance._process_pending_gui_tasks()
-    
-    assert app_instance.ai_response == "Regular AI Response"
-    assert app_instance.ai_status == "done"
-    assert len(app_instance.mma_streams) == 0
+ """Verifies fallback to ai_response when stream_id is missing."""
+ task = {
+  "action": "handle_ai_response",
+  "payload": {
+   "text": "Regular AI Response",
+   "status": "done"
+  }
+ }
+ app_instance._pending_gui_tasks.append(task)
+ app_instance._process_pending_gui_tasks()
+ assert app_instance.ai_response == "Regular AI Response"
+ assert app_instance.ai_status == "done"
+ assert len(app_instance.mma_streams) == 0
@@ -2,51 +2,51 @@ import pytest
 from mma_prompts import PROMPTS

 def test_tier1_epic_init_constraints():
-    prompt = PROMPTS["tier1_epic_init"]
-    assert "Godot ECS Flat List format" in prompt
-    assert "JSON array" in prompt
-    assert "Tracks" in prompt
-    assert "severity" in prompt
-    assert "IGNORE all source code" in prompt
+ prompt = PROMPTS["tier1_epic_init"]
+ assert "Godot ECS Flat List format" in prompt
+ assert "JSON array" in prompt
+ assert "Tracks" in prompt
+ assert "severity" in prompt
+ assert "IGNORE all source code" in prompt

 def test_tier1_track_delegation_constraints():
-    prompt = PROMPTS["tier1_track_delegation"]
-    assert "Track Brief" in prompt
-    assert "AST Skeleton View" in prompt
-    assert "IGNORE unrelated module docs" in prompt
+ prompt = PROMPTS["tier1_track_delegation"]
+ assert "Track Brief" in prompt
+ assert "AST Skeleton View" in prompt
+ assert "IGNORE unrelated module docs" in prompt

 def test_tier1_macro_merge_constraints():
-    prompt = PROMPTS["tier1_macro_merge"]
-    assert "Macro-Merge" in prompt
-    assert "Macro-Diff" in prompt
-    assert "IGNORE Tier 3 trial-and-error" in prompt
+ prompt = PROMPTS["tier1_macro_merge"]
+ assert "Macro-Merge" in prompt
+ assert "Macro-Diff" in prompt
+ assert "IGNORE Tier 3 trial-and-error" in prompt

 def test_tier2_sprint_planning_constraints():
-    prompt = PROMPTS["tier2_sprint_planning"]
-    assert "Tickets" in prompt
-    assert "Godot ECS Flat List format" in prompt
-    assert "depends_on" in prompt
-    assert "DAG" in prompt
-    assert "Skeleton View" in prompt
-    assert "Curated Implementation View" in prompt
+ prompt = PROMPTS["tier2_sprint_planning"]
+ assert "Tickets" in prompt
+ assert "Godot ECS Flat List format" in prompt
+ assert "depends_on" in prompt
+ assert "DAG" in prompt
+ assert "Skeleton View" in prompt
+ assert "Curated Implementation View" in prompt

 def test_tier2_code_review_constraints():
-    prompt = PROMPTS["tier2_code_review"]
-    assert "Code Review" in prompt
-    assert "IGNORE the Contributor's internal trial-and-error" in prompt
-    assert "Tier 4 (QA) logs" in prompt
+ prompt = PROMPTS["tier2_code_review"]
+ assert "Code Review" in prompt
+ assert "IGNORE the Contributor's internal trial-and-error" in prompt
+ assert "Tier 4 (QA) logs" in prompt

 def test_tier2_track_finalization_constraints():
-    prompt = PROMPTS["tier2_track_finalization"]
-    assert "Track Finalization" in prompt
-    assert "Executive Summary" in prompt
-    assert "Macro-Diff" in prompt
-    assert "Dependency Delta" in prompt
+ prompt = PROMPTS["tier2_track_finalization"]
+ assert "Track Finalization" in prompt
+ assert "Executive Summary" in prompt
+ assert "Macro-Diff" in prompt
+ assert "Dependency Delta" in prompt

 def test_tier2_contract_first_constraints():
-    prompt = PROMPTS["tier2_contract_first"]
-    assert "Stub Ticket" in prompt
-    assert "Consumer Ticket" in prompt
-    assert "Implementation Ticket" in prompt
-    assert "Interface-Driven Development" in prompt
-    assert "Godot ECS Flat List format" in prompt
+ prompt = PROMPTS["tier2_contract_first"]
+ assert "Stub Ticket" in prompt
+ assert "Consumer Ticket" in prompt
+ assert "Implementation Ticket" in prompt
+ assert "Interface-Driven Development" in prompt
+ assert "Godot ECS Flat List format" in prompt
@@ -5,49 +5,43 @@ from gui_2 import App

@pytest.fixture
 def app_instance():
-    with (
-        patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
-        patch('gui_2.save_config'),
-        patch('gui_2.project_manager'),
-        patch('gui_2.session_logger'),
-        patch('gui_2.immapp.run'),
-        patch.object(App, '_load_active_project'),
-        patch.object(App, '_fetch_models'),
-        patch.object(App, '_load_fonts'),
-        patch.object(App, '_post_init')
-    ):
-        app = App()
-        app.active_tickets = []
-        app._loop = MagicMock()
-        yield app
+ with (
+  patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
+  patch('gui_2.save_config'),
+  patch('gui_2.project_manager'),
+  patch('gui_2.session_logger'),
+  patch('gui_2.immapp.run'),
+  patch.object(App, '_load_active_project'),
+  patch.object(App, '_fetch_models'),
+  patch.object(App, '_load_fonts'),
+  patch.object(App, '_post_init')
+ ):
+  app = App()
+  app.active_tickets = []
+  app._loop = MagicMock()
+  yield app

 def test_cb_ticket_retry(app_instance):
-    ticket_id = "test_ticket_1"
-    app_instance.active_tickets = [{"id": ticket_id, "status": "failed"}]
-    
-    with patch('asyncio.run_coroutine_threadsafe') as mock_run_safe:
-        app_instance._cb_ticket_retry(ticket_id)
-        
-        # Verify status update
-        assert app_instance.active_tickets[0]['status'] == 'todo'
-        
-        # Verify event pushed
-        mock_run_safe.assert_called_once()
-        # First arg is the coroutine (event_queue.put), second is self._loop
-        args, _ = mock_run_safe.call_args
-        assert args[1] == app_instance._loop
+ ticket_id = "test_ticket_1"
+ app_instance.active_tickets = [{"id": ticket_id, "status": "failed"}]
+ with patch('asyncio.run_coroutine_threadsafe') as mock_run_safe:
+  app_instance._cb_ticket_retry(ticket_id)
+  # Verify status update
+  assert app_instance.active_tickets[0]['status'] == 'todo'
+  # Verify event pushed
+  mock_run_safe.assert_called_once()
+  # First arg is the coroutine (event_queue.put), second is self._loop
+  args, _ = mock_run_safe.call_args
+  assert args[1] == app_instance._loop

 def test_cb_ticket_skip(app_instance):
-    ticket_id = "test_ticket_1"
-    app_instance.active_tickets = [{"id": ticket_id, "status": "todo"}]
-    
-    with patch('asyncio.run_coroutine_threadsafe') as mock_run_safe:
-        app_instance._cb_ticket_skip(ticket_id)
-        
-        # Verify status update
-        assert app_instance.active_tickets[0]['status'] == 'skipped'
-        
-        # Verify event pushed
-        mock_run_safe.assert_called_once()
-        args, _ = mock_run_safe.call_args
-        assert args[1] == app_instance._loop
+ ticket_id = "test_ticket_1"
+ app_instance.active_tickets = [{"id": ticket_id, "status": "todo"}]
+ with patch('asyncio.run_coroutine_threadsafe') as mock_run_safe:
+  app_instance._cb_ticket_skip(ticket_id)
+  # Verify status update
+  assert app_instance.active_tickets[0]['status'] == 'skipped'
+  # Verify event pushed
+  mock_run_safe.assert_called_once()
+  args, _ = mock_run_safe.call_args
+  assert args[1] == app_instance._loop
@@ -8,126 +8,104 @@ from models import Track, Ticket

@pytest.fixture
 def mock_ai_client():
-    with patch("ai_client.send") as mock_send:
-        yield mock_send
+ with patch("ai_client.send") as mock_send:
+  yield mock_send

 def test_generate_tracks(mock_ai_client):
-    # Tier 1 (PM) response mock
-    mock_ai_client.return_value = json.dumps([
-        {"id": "track_1", "title": "Infrastructure Setup", "description": "Setup basic project structure"},
-        {"id": "track_2", "title": "Feature implementation", "description": "Implement core feature"}
-    ])
-    
-    user_request = "Build a new app"
-    project_config = {}
-    file_items = []
-    
-    tracks = orchestrator_pm.generate_tracks(user_request, project_config, file_items)
-    
-    assert len(tracks) == 2
-    assert tracks[0]["id"] == "track_1"
-    assert tracks[1]["id"] == "track_2"
-    mock_ai_client.assert_called_once()
+# Tier 1 (PM) response mock
+ mock_ai_client.return_value = json.dumps([
+   {"id": "track_1", "title": "Infrastructure Setup", "description": "Setup basic project structure"},
+   {"id": "track_2", "title": "Feature implementation", "description": "Implement core feature"}
+  ])
+ user_request = "Build a new app"
+ project_config = {}
+ file_items = []
+ tracks = orchestrator_pm.generate_tracks(user_request, project_config, file_items)
+ assert len(tracks) == 2
+ assert tracks[0]["id"] == "track_1"
+ assert tracks[1]["id"] == "track_2"
+ mock_ai_client.assert_called_once()

 def test_generate_tickets(mock_ai_client):
-    # Tier 2 (Tech Lead) response mock
-    mock_ai_client.return_value = json.dumps([
-        {"id": "T-001", "description": "Define interfaces", "depends_on": []},
-        {"id": "T-002", "description": "Implement interfaces", "depends_on": ["T-001"]}
-    ])
-    
-    track_brief = "Implement a new feature."
-    module_skeletons = "class Feature: pass"
-    
-    tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
-    
-    assert len(tickets) == 2
-    assert tickets[0]["id"] == "T-001"
-    assert tickets[1]["id"] == "T-002"
-    assert tickets[1]["depends_on"] == ["T-001"]
+# Tier 2 (Tech Lead) response mock
+ mock_ai_client.return_value = json.dumps([
+   {"id": "T-001", "description": "Define interfaces", "depends_on": []},
+   {"id": "T-002", "description": "Implement interfaces", "depends_on": ["T-001"]}
+  ])
+ track_brief = "Implement a new feature."
+ module_skeletons = "class Feature: pass"
+ tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
+ assert len(tickets) == 2
+ assert tickets[0]["id"] == "T-001"
+ assert tickets[1]["id"] == "T-002"
+ assert tickets[1]["depends_on"] == ["T-001"]

 def test_topological_sort():
-    tickets = [
-        {"id": "T-002", "description": "Dep on 001", "depends_on": ["T-001"]},
-        {"id": "T-001", "description": "Base", "depends_on": []},
-        {"id": "T-003", "description": "Dep on 002", "depends_on": ["T-002"]}
-    ]
-    
-    sorted_tickets = conductor_tech_lead.topological_sort(tickets)
-    
-    assert sorted_tickets[0]["id"] == "T-001"
-    assert sorted_tickets[1]["id"] == "T-002"
-    assert sorted_tickets[2]["id"] == "T-003"
+ tickets = [
+  {"id": "T-002", "description": "Dep on 001", "depends_on": ["T-001"]},
+  {"id": "T-001", "description": "Base", "depends_on": []},
+  {"id": "T-003", "description": "Dep on 002", "depends_on": ["T-002"]}
+ ]
+ sorted_tickets = conductor_tech_lead.topological_sort(tickets)
+ assert sorted_tickets[0]["id"] == "T-001"
+ assert sorted_tickets[1]["id"] == "T-002"
+ assert sorted_tickets[2]["id"] == "T-003"

 def test_topological_sort_circular():
-    tickets = [
-        {"id": "T-001", "depends_on": ["T-002"]},
-        {"id": "T-002", "depends_on": ["T-001"]}
-    ]
-    
-    with pytest.raises(ValueError, match="Circular dependency detected"):
-        conductor_tech_lead.topological_sort(tickets)
+ tickets = [
+  {"id": "T-001", "depends_on": ["T-002"]},
+  {"id": "T-002", "depends_on": ["T-001"]}
+ ]
+ with pytest.raises(ValueError, match="Circular dependency detected"):
+  conductor_tech_lead.topological_sort(tickets)

 def test_track_executable_tickets():
-    t1 = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
-    t2 = Ticket(id="T2", description="desc", status="todo", assigned_to="user", depends_on=["T1"])
-    
-    track = Track(id="track_1", description="desc", tickets=[t1, t2])
-    
-    executable = track.get_executable_tickets()
-    assert len(executable) == 1
-    assert executable[0].id == "T1"
-
-    # Complete T1
-    t1.status = "completed"
-    executable = track.get_executable_tickets()
-    assert len(executable) == 1
-    assert executable[0].id == "T2"
+ t1 = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
+ t2 = Ticket(id="T2", description="desc", status="todo", assigned_to="user", depends_on=["T1"])
+ track = Track(id="track_1", description="desc", tickets=[t1, t2])
+ executable = track.get_executable_tickets()
+ assert len(executable) == 1
+ assert executable[0].id == "T1"
+ # Complete T1
+ t1.status = "completed"
+ executable = track.get_executable_tickets()
+ assert len(executable) == 1
+ assert executable[0].id == "T2"

@pytest.mark.asyncio
 async def test_conductor_engine_run_linear():
-    t1 = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
-    t2 = Ticket(id="T2", description="desc", status="todo", assigned_to="user", depends_on=["T1"])
+ t1 = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
+ t2 = Ticket(id="T2", description="desc", status="todo", assigned_to="user", depends_on=["T1"])
+ track = Track(id="track_1", description="desc", tickets=[t1, t2])
+ engine = multi_agent_conductor.ConductorEngine(track)
+ with patch("multi_agent_conductor.run_worker_lifecycle") as mock_worker:
+ # Mock worker to complete tickets

-    track = Track(id="track_1", description="desc", tickets=[t1, t2])
-    engine = multi_agent_conductor.ConductorEngine(track)
-
-    with patch("multi_agent_conductor.run_worker_lifecycle") as mock_worker:
-        # Mock worker to complete tickets
-        def complete_ticket(ticket, context, **kwargs):
-            ticket.status = "completed"
-
-        mock_worker.side_effect = complete_ticket
-
-        await engine.run_linear()
-
-        assert t1.status == "completed"
-        assert t2.status == "completed"
-        assert mock_worker.call_count == 2
+  def complete_ticket(ticket, context, **kwargs):
+   ticket.status = "completed"
+  mock_worker.side_effect = complete_ticket
+  await engine.run_linear()
+  assert t1.status == "completed"
+  assert t2.status == "completed"
+  assert mock_worker.call_count == 2

 def test_conductor_engine_parse_json_tickets():
-    track = Track(id="track_1", description="desc")
-    engine = multi_agent_conductor.ConductorEngine(track)
-
-    json_data = json.dumps([
-        {"id": "T1", "description": "desc 1", "depends_on": []},
-        {"id": "T2", "description": "desc 2", "depends_on": ["T1"]}
-    ])
-
-    engine.parse_json_tickets(json_data)
-
-    assert len(track.tickets) == 2
-    assert track.tickets[0].id == "T1"
-    assert track.tickets[1].id == "T2"
-    assert track.tickets[1].depends_on == ["T1"]
+ track = Track(id="track_1", description="desc")
+ engine = multi_agent_conductor.ConductorEngine(track)
+ json_data = json.dumps([
+   {"id": "T1", "description": "desc 1", "depends_on": []},
+   {"id": "T2", "description": "desc 2", "depends_on": ["T1"]}
+  ])
+ engine.parse_json_tickets(json_data)
+ assert len(track.tickets) == 2
+ assert track.tickets[0].id == "T1"
+ assert track.tickets[1].id == "T2"
+ assert track.tickets[1].depends_on == ["T1"]

 def test_run_worker_lifecycle_blocked(mock_ai_client):
-    ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
-    context = multi_agent_conductor.WorkerContext(ticket_id="T1", model_name="model", messages=[])
-
-    mock_ai_client.return_value = "BLOCKED because of missing info"
-
-    multi_agent_conductor.run_worker_lifecycle(ticket, context)
-
-    assert ticket.status == "blocked"
-    assert ticket.blocked_reason == "BLOCKED because of missing info"
+ ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
+ context = multi_agent_conductor.WorkerContext(ticket_id="T1", model_name="model", messages=[])
+ mock_ai_client.return_value = "BLOCKED because of missing info"
+ multi_agent_conductor.run_worker_lifecycle(ticket, context)
+ assert ticket.status == "blocked"
+ assert ticket.blocked_reason == "BLOCKED because of missing info"
@@ -6,75 +6,67 @@ import mma_prompts

 class TestOrchestratorPM(unittest.TestCase):

-    @patch('summarize.build_summary_markdown')
-    @patch('ai_client.send')
-    def test_generate_tracks_success(self, mock_send, mock_summarize):
-        # Setup mocks
-        mock_summarize.return_value = "REPO_MAP_CONTENT"
-        
-        mock_response_data = [
-            {
-                "id": "track_1",
-                "type": "Track",
-                "module": "test_module",
-                "persona": "Tech Lead",
-                "severity": "Medium",
-                "goal": "Test goal",
-                "acceptance_criteria": ["criteria 1"]
-            }
-        ]
-        mock_send.return_value = json.dumps(mock_response_data)
-        
-        user_request = "Implement unit tests"
-        project_config = {"files": {"paths": ["src"]}}
-        file_items = [{"path": "src/main.py", "content": "print('hello')"}]
-        
-        # Execute
-        result = orchestrator_pm.generate_tracks(user_request, project_config, file_items)
-        
-        # Verify summarize call
-        mock_summarize.assert_called_once_with(file_items)
-        
-        # Verify ai_client.send call
-        expected_system_prompt = mma_prompts.PROMPTS['tier1_epic_init']
-        mock_send.assert_called_once()
-        args, kwargs = mock_send.call_args
-        self.assertEqual(kwargs['md_content'], "")
-        # Cannot check system_prompt via mock_send kwargs anymore as it's set globally
-        # But we can verify user_message was passed
-        self.assertIn(user_request, kwargs['user_message'])
-        self.assertIn("REPO_MAP_CONTENT", kwargs['user_message'])
+ @patch('summarize.build_summary_markdown')
+ @patch('ai_client.send')
+ def test_generate_tracks_success(self, mock_send, mock_summarize):
+ # Setup mocks
+  mock_summarize.return_value = "REPO_MAP_CONTENT"
+  mock_response_data = [
+   {
+    "id": "track_1",
+    "type": "Track",
+    "module": "test_module",
+    "persona": "Tech Lead",
+    "severity": "Medium",
+    "goal": "Test goal",
+    "acceptance_criteria": ["criteria 1"]
+   }
+  ]
+  mock_send.return_value = json.dumps(mock_response_data)
+  user_request = "Implement unit tests"
+  project_config = {"files": {"paths": ["src"]}}
+  file_items = [{"path": "src/main.py", "content": "print('hello')"}]
+  # Execute
+  result = orchestrator_pm.generate_tracks(user_request, project_config, file_items)
+  # Verify summarize call
+  mock_summarize.assert_called_once_with(file_items)
+  # Verify ai_client.send call
+  expected_system_prompt = mma_prompts.PROMPTS['tier1_epic_init']
+  mock_send.assert_called_once()
+  args, kwargs = mock_send.call_args
+  self.assertEqual(kwargs['md_content'], "")
+  # Cannot check system_prompt via mock_send kwargs anymore as it's set globally
+  # But we can verify user_message was passed
+  self.assertIn(user_request, kwargs['user_message'])
+  self.assertIn("REPO_MAP_CONTENT", kwargs['user_message'])
+  # Verify result
+  self.assertEqual(result[0]['id'], mock_response_data[0]['id'])

-        # Verify result
-        self.assertEqual(result[0]['id'], mock_response_data[0]['id'])
-    @patch('summarize.build_summary_markdown')
-    @patch('ai_client.send')
-    def test_generate_tracks_markdown_wrapped(self, mock_send, mock_summarize):
-        mock_summarize.return_value = "REPO_MAP"
+ @patch('summarize.build_summary_markdown')
+ @patch('ai_client.send')
+ def test_generate_tracks_markdown_wrapped(self, mock_send, mock_summarize):
+  mock_summarize.return_value = "REPO_MAP"
+  mock_response_data = [{"id": "track_1"}]
+  expected_result = [{"id": "track_1", "title": "Untitled Track"}]
+  # Wrapped in ```json ... ```
+  mock_send.return_value = f"Here is the plan:\n```json\n{json.dumps(mock_response_data)}\n```\nHope this helps."
+  result = orchestrator_pm.generate_tracks("req", {}, [])
+  self.assertEqual(result, expected_result)        
+  # Wrapped in ``` ... ```
+  mock_send.return_value = f"```\n{json.dumps(mock_response_data)}\n```"
+  result = orchestrator_pm.generate_tracks("req", {}, [])
+  self.assertEqual(result, expected_result)

-        mock_response_data = [{"id": "track_1"}]
-        expected_result = [{"id": "track_1", "title": "Untitled Track"}]
-        # Wrapped in ```json ... ```
-        mock_send.return_value = f"Here is the plan:\n```json\n{json.dumps(mock_response_data)}\n```\nHope this helps."
-
-        result = orchestrator_pm.generate_tracks("req", {}, [])
-        self.assertEqual(result, expected_result)        
-        # Wrapped in ``` ... ```
-        mock_send.return_value = f"```\n{json.dumps(mock_response_data)}\n```"
-        result = orchestrator_pm.generate_tracks("req", {}, [])
-        self.assertEqual(result, expected_result)
-
-    @patch('summarize.build_summary_markdown')
-    @patch('ai_client.send')
-    def test_generate_tracks_malformed_json(self, mock_send, mock_summarize):
-        mock_summarize.return_value = "REPO_MAP"
-        mock_send.return_value = "NOT A JSON"
-        
-        # Should return empty list and print error (we can mock print if we want to be thorough)
-        with patch('builtins.print') as mock_print:
-            result = orchestrator_pm.generate_tracks("req", {}, [])
-            self.assertEqual(result, [])
-            mock_print.assert_any_call("Error parsing Tier 1 response: Expecting value: line 1 column 1 (char 0)")
+ @patch('summarize.build_summary_markdown')
+ @patch('ai_client.send')
+ def test_generate_tracks_malformed_json(self, mock_send, mock_summarize):
+  mock_summarize.return_value = "REPO_MAP"
+  mock_send.return_value = "NOT A JSON"
+  # Should return empty list and print error (we can mock print if we want to be thorough)
+  with patch('builtins.print') as mock_print:
+   result = orchestrator_pm.generate_tracks("req", {}, [])
+   self.assertEqual(result, [])
+   mock_print.assert_any_call("Error parsing Tier 1 response: Expecting value: line 1 column 1 (char 0)")

 if __name__ == '__main__':
-    unittest.main()
+ unittest.main()
@@ -7,70 +7,63 @@ from pathlib import Path
 import orchestrator_pm

 class TestOrchestratorPMHistory(unittest.TestCase):
-    def setUp(self):
-        self.test_dir = Path("test_conductor")
-        self.test_dir.mkdir(exist_ok=True)
-        self.archive_dir = self.test_dir / "archive"
-        self.tracks_dir = self.test_dir / "tracks"
-        self.archive_dir.mkdir(exist_ok=True)
-        self.tracks_dir.mkdir(exist_ok=True)
+ def setUp(self):
+  self.test_dir = Path("test_conductor")
+  self.test_dir.mkdir(exist_ok=True)
+  self.archive_dir = self.test_dir / "archive"
+  self.tracks_dir = self.test_dir / "tracks"
+  self.archive_dir.mkdir(exist_ok=True)
+  self.tracks_dir.mkdir(exist_ok=True)

-    def tearDown(self):
-        if self.test_dir.exists():
-            shutil.rmtree(self.test_dir)
+ def tearDown(self):
+  if self.test_dir.exists():
+   shutil.rmtree(self.test_dir)

-    def create_track(self, parent_dir, track_id, title, status, overview):
-        track_path = parent_dir / track_id
-        track_path.mkdir(exist_ok=True)
-        
-        metadata = {"title": title, "status": status}
-        with open(track_path / "metadata.json", "w") as f:
-            json.dump(metadata, f)
-            
-        spec_content = f"# Specification\n\n## Overview\n{overview}"
-        with open(track_path / "spec.md", "w") as f:
-            f.write(spec_content)
+ def create_track(self, parent_dir, track_id, title, status, overview):
+  track_path = parent_dir / track_id
+  track_path.mkdir(exist_ok=True)
+  metadata = {"title": title, "status": status}
+  with open(track_path / "metadata.json", "w") as f:
+   json.dump(metadata, f)
+  spec_content = f"# Specification\n\n## Overview\n{overview}"
+  with open(track_path / "spec.md", "w") as f:
+   f.write(spec_content)

-    @patch('orchestrator_pm.CONDUCTOR_PATH', Path("test_conductor"))
-    def test_get_track_history_summary(self):
-        # Setup mock tracks
-        self.create_track(self.archive_dir, "track_001", "Initial Setup", "completed", "Setting up the project structure.")
-        self.create_track(self.tracks_dir, "track_002", "Feature A", "in_progress", "Implementing Feature A.")
-        
-        summary = orchestrator_pm.get_track_history_summary()
-        
-        self.assertIn("Initial Setup", summary)
-        self.assertIn("completed", summary)
-        self.assertIn("Setting up the project structure.", summary)
-        self.assertIn("Feature A", summary)
-        self.assertIn("in_progress", summary)
-        self.assertIn("Implementing Feature A.", summary)
+ @patch('orchestrator_pm.CONDUCTOR_PATH', Path("test_conductor"))
+ def test_get_track_history_summary(self):
+ # Setup mock tracks
+  self.create_track(self.archive_dir, "track_001", "Initial Setup", "completed", "Setting up the project structure.")
+  self.create_track(self.tracks_dir, "track_002", "Feature A", "in_progress", "Implementing Feature A.")
+  summary = orchestrator_pm.get_track_history_summary()
+  self.assertIn("Initial Setup", summary)
+  self.assertIn("completed", summary)
+  self.assertIn("Setting up the project structure.", summary)
+  self.assertIn("Feature A", summary)
+  self.assertIn("in_progress", summary)
+  self.assertIn("Implementing Feature A.", summary)

-    @patch('orchestrator_pm.CONDUCTOR_PATH', Path("test_conductor"))
-    def test_get_track_history_summary_missing_files(self):
-        # Track with missing spec.md
-        track_path = self.tracks_dir / "track_003"
-        track_path.mkdir(exist_ok=True)
-        with open(track_path / "metadata.json", "w") as f:
-            json.dump({"title": "Missing Spec", "status": "pending"}, f)
-            
-        summary = orchestrator_pm.get_track_history_summary()
-        self.assertIn("Missing Spec", summary)
-        self.assertIn("pending", summary)
-        self.assertIn("No overview available", summary)
+ @patch('orchestrator_pm.CONDUCTOR_PATH', Path("test_conductor"))
+ def test_get_track_history_summary_missing_files(self):
+ # Track with missing spec.md
+  track_path = self.tracks_dir / "track_003"
+  track_path.mkdir(exist_ok=True)
+  with open(track_path / "metadata.json", "w") as f:
+   json.dump({"title": "Missing Spec", "status": "pending"}, f)
+  summary = orchestrator_pm.get_track_history_summary()
+  self.assertIn("Missing Spec", summary)
+  self.assertIn("pending", summary)
+  self.assertIn("No overview available", summary)

-    @patch('orchestrator_pm.summarize.build_summary_markdown')
-    @patch('ai_client.send')
-    def test_generate_tracks_with_history(self, mock_send, mock_summarize):
-        mock_summarize.return_value = "REPO_MAP"
-        mock_send.return_value = "[]"
-        
-        history_summary = "PAST_HISTORY_SUMMARY"
-        orchestrator_pm.generate_tracks("req", {}, [], history_summary=history_summary)
-        
-        args, kwargs = mock_send.call_args
-        self.assertIn(history_summary, kwargs['user_message'])
-        self.assertIn("### TRACK HISTORY:", kwargs['user_message'])
+ @patch('orchestrator_pm.summarize.build_summary_markdown')
+ @patch('ai_client.send')
+ def test_generate_tracks_with_history(self, mock_send, mock_summarize):
+  mock_summarize.return_value = "REPO_MAP"
+  mock_send.return_value = "[]"
+  history_summary = "PAST_HISTORY_SUMMARY"
+  orchestrator_pm.generate_tracks("req", {}, [], history_summary=history_summary)
+  args, kwargs = mock_send.call_args
+  self.assertIn(history_summary, kwargs['user_message'])
+  self.assertIn("### TRACK HISTORY:", kwargs['user_message'])

 if __name__ == '__main__':
-    unittest.main()
+ unittest.main()
@@ -9,21 +9,19 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from performance_monitor import PerformanceMonitor

 def test_perf_monitor_basic_timing():
-    pm = PerformanceMonitor()
-    pm.start_frame()
-    time.sleep(0.02) # 20ms
-    pm.end_frame()
-    
-    metrics = pm.get_metrics()
-    assert metrics['last_frame_time_ms'] >= 20.0
-    pm.stop()
+ pm = PerformanceMonitor()
+ pm.start_frame()
+ time.sleep(0.02) # 20ms
+ pm.end_frame()
+ metrics = pm.get_metrics()
+ assert metrics['last_frame_time_ms'] >= 20.0
+ pm.stop()

 def test_perf_monitor_component_timing():
-    pm = PerformanceMonitor()
-    pm.start_component("test_comp")
-    time.sleep(0.01)
-    pm.end_component("test_comp")
-    
-    metrics = pm.get_metrics()
-    assert metrics['time_test_comp_ms'] >= 10.0
-    pm.stop()
+ pm = PerformanceMonitor()
+ pm.start_component("test_comp")
+ time.sleep(0.01)
+ pm.end_component("test_comp")
+ metrics = pm.get_metrics()
+ assert metrics['time_test_comp_ms'] >= 10.0
+ pm.stop()
@@ -5,58 +5,49 @@ from gui_2 import App

@pytest.fixture
 def app_instance():
-    with (
-        patch('gui_2.load_config', return_value={'ai': {'provider': 'gemini', 'model': 'gemini-2.5-flash-lite'}, 'projects': {}}),
-        patch('gui_2.save_config'),
-        patch('gui_2.project_manager'),
-        patch('gui_2.session_logger'),
-        patch('gui_2.immapp.run'),
-        patch.object(App, '_load_active_project'),
-        patch.object(App, '_fetch_models'),
-        patch.object(App, '_load_fonts'),
-        patch.object(App, '_post_init'),
-        patch('ai_client.set_provider'),
-        patch('ai_client.reset_session')
-    ):
-        app = App()
-        yield app
+ with (
+  patch('gui_2.load_config', return_value={'ai': {'provider': 'gemini', 'model': 'gemini-2.5-flash-lite'}, 'projects': {}}),
+  patch('gui_2.save_config'),
+  patch('gui_2.project_manager'),
+  patch('gui_2.session_logger'),
+  patch('gui_2.immapp.run'),
+  patch.object(App, '_load_active_project'),
+  patch.object(App, '_fetch_models'),
+  patch.object(App, '_load_fonts'),
+  patch.object(App, '_post_init'),
+  patch('ai_client.set_provider'),
+  patch('ai_client.reset_session')
+ ):
+  app = App()
+  yield app

 def test_redundant_calls_in_process_pending_gui_tasks(app_instance):
-    # Setup
-    app_instance._pending_gui_tasks = [
-        {'action': 'set_value', 'item': 'current_provider', 'value': 'anthropic'}
-    ]
-    
-    with patch('ai_client.set_provider') as mock_set_provider, \
-         patch('ai_client.reset_session') as mock_reset_session:
-        
-        # We need to make sure the property setter's internal calls are also tracked or mocked.
-        # However, the App instance was created with mocked ai_client.
-        # Let's re-patch it specifically for this test.
-        
-        app_instance._process_pending_gui_tasks()
-        
-        # current_provider setter calls:
-        # ai_client.reset_session()
-        # ai_client.set_provider(value, self.current_model)
-        
-        # _process_pending_gui_tasks NO LONGER calls it redundantly:
-        
-        # Total should be 1 call for each.
-        assert mock_set_provider.call_count == 1
-        assert mock_reset_session.call_count == 1
+# Setup
+ app_instance._pending_gui_tasks = [
+  {'action': 'set_value', 'item': 'current_provider', 'value': 'anthropic'}
+ ]
+ with patch('ai_client.set_provider') as mock_set_provider, \
+ patch('ai_client.reset_session') as mock_reset_session:
+ # We need to make sure the property setter's internal calls are also tracked or mocked.
+ # However, the App instance was created with mocked ai_client.
+ # Let's re-patch it specifically for this test.
+  app_instance._process_pending_gui_tasks()
+  # current_provider setter calls:
+  # ai_client.reset_session()
+  # ai_client.set_provider(value, self.current_model)
+  # _process_pending_gui_tasks NO LONGER calls it redundantly:
+  # Total should be 1 call for each.
+  assert mock_set_provider.call_count == 1
+  assert mock_reset_session.call_count == 1

 def test_gcli_path_updates_adapter(app_instance):
-    # Setup
-    app_instance.current_provider = 'gemini_cli'
-    app_instance._pending_gui_tasks = [
-        {'action': 'set_value', 'item': 'gcli_path', 'value': '/new/path/to/gemini'}
-    ]
-    
-    # Initialize adapter if it doesn't exist (it shouldn't in mock env)
-    ai_client._gemini_cli_adapter = None
-    
-    app_instance._process_pending_gui_tasks()
-    
-    assert ai_client._gemini_cli_adapter is not None
-    assert ai_client._gemini_cli_adapter.binary_path == '/new/path/to/gemini'
+# Setup
+ app_instance.current_provider = 'gemini_cli'
+ app_instance._pending_gui_tasks = [
+  {'action': 'set_value', 'item': 'gcli_path', 'value': '/new/path/to/gemini'}
+ ]
+ # Initialize adapter if it doesn't exist (it shouldn't in mock env)
+ ai_client._gemini_cli_adapter = None
+ app_instance._process_pending_gui_tasks()
+ assert ai_client._gemini_cli_adapter is not None
+ assert ai_client._gemini_cli_adapter.binary_path == '/new/path/to/gemini'
@@ -6,89 +6,79 @@ from models import TrackState, Metadata, Ticket
 from datetime import datetime

 def test_get_all_tracks_empty(tmp_path):
-    # conductor/tracks directory doesn't exist
-    assert get_all_tracks(tmp_path) == []
+# conductor/tracks directory doesn't exist
+ assert get_all_tracks(tmp_path) == []

 def test_get_all_tracks_with_state(tmp_path):
-    tracks_dir = tmp_path / "conductor" / "tracks"
-    tracks_dir.mkdir(parents=True)
-    
-    track_id = "test_track_1"
-    track_dir = tracks_dir / track_id
-    track_dir.mkdir()
-    
-    # Create TrackState
-    metadata = Metadata(id=track_id, name="Test Track 1", status="in_progress", 
-                        created_at=datetime.now(), updated_at=datetime.now())
-    tasks = [
-        Ticket(id="task1", description="desc1", status="completed", assigned_to="user"),
-        Ticket(id="task2", description="desc2", status="todo", assigned_to="user")
-    ]
-    state = TrackState(metadata=metadata, discussion=[], tasks=tasks)
-    save_track_state(track_id, state, tmp_path)
-    
-    tracks = get_all_tracks(tmp_path)
-    assert len(tracks) == 1
-    track = tracks[0]
-    assert track["id"] == track_id
-    assert track["title"] == "Test Track 1"
-    assert track["status"] == "in_progress"
-    assert track["complete"] == 1
-    assert track["total"] == 2
-    assert track["progress"] == 0.5
+ tracks_dir = tmp_path / "conductor" / "tracks"
+ tracks_dir.mkdir(parents=True)
+ track_id = "test_track_1"
+ track_dir = tracks_dir / track_id
+ track_dir.mkdir()
+ # Create TrackState
+ metadata = Metadata(id=track_id, name="Test Track 1", status="in_progress", 
+  created_at=datetime.now(), updated_at=datetime.now())
+ tasks = [
+  Ticket(id="task1", description="desc1", status="completed", assigned_to="user"),
+  Ticket(id="task2", description="desc2", status="todo", assigned_to="user")
+ ]
+ state = TrackState(metadata=metadata, discussion=[], tasks=tasks)
+ save_track_state(track_id, state, tmp_path)
+ tracks = get_all_tracks(tmp_path)
+ assert len(tracks) == 1
+ track = tracks[0]
+ assert track["id"] == track_id
+ assert track["title"] == "Test Track 1"
+ assert track["status"] == "in_progress"
+ assert track["complete"] == 1
+ assert track["total"] == 2
+ assert track["progress"] == 0.5

 def test_get_all_tracks_with_metadata_json(tmp_path):
-    tracks_dir = tmp_path / "conductor" / "tracks"
-    tracks_dir.mkdir(parents=True)
-    
-    track_id = "test_track_2"
-    track_dir = tracks_dir / track_id
-    track_dir.mkdir()
-    
-    metadata = {
-        "id": track_id,
-        "title": "Test Track 2",
-        "status": "planned"
-    }
-    with open(track_dir / "metadata.json", "w") as f:
-        json.dump(metadata, f)
-        
-    # Create plan.md to test parsing
-    plan_content = """
+ tracks_dir = tmp_path / "conductor" / "tracks"
+ tracks_dir.mkdir(parents=True)
+ track_id = "test_track_2"
+ track_dir = tracks_dir / track_id
+ track_dir.mkdir()
+ metadata = {
+  "id": track_id,
+  "title": "Test Track 2",
+  "status": "planned"
+ }
+ with open(track_dir / "metadata.json", "w") as f:
+  json.dump(metadata, f)
+  # Create plan.md to test parsing
+ plan_content = """
 # Plan
 - [x] Task: Task 1
 - [ ] Task: Task 2
 - [~] Task: Task 3
 """
-    with open(track_dir / "plan.md", "w") as f:
-        f.write(plan_content)
-        
-    tracks = get_all_tracks(tmp_path)
-    assert len(tracks) == 1
-    track = tracks[0]
-    assert track["id"] == track_id
-    assert track["title"] == "Test Track 2"
-    assert track["status"] == "planned"
-    assert track["complete"] == 1
-    assert track["total"] == 3
-    assert pytest.approx(track["progress"]) == 0.333333
+ with open(track_dir / "plan.md", "w") as f:
+  f.write(plan_content)
+ tracks = get_all_tracks(tmp_path)
+ assert len(tracks) == 1
+ track = tracks[0]
+ assert track["id"] == track_id
+ assert track["title"] == "Test Track 2"
+ assert track["status"] == "planned"
+ assert track["complete"] == 1
+ assert track["total"] == 3
+ assert pytest.approx(track["progress"]) == 0.333333

 def test_get_all_tracks_malformed(tmp_path):
-    tracks_dir = tmp_path / "conductor" / "tracks"
-    tracks_dir.mkdir(parents=True)
-    
-    track_id = "malformed_track"
-    track_dir = tracks_dir / track_id
-    track_dir.mkdir()
-    
-    # Malformed metadata.json
-    with open(track_dir / "metadata.json", "w") as f:
-        f.write("{ invalid json }")
-        
-    tracks = get_all_tracks(tmp_path)
-    assert len(tracks) == 1
-    track = tracks[0]
-    assert track["id"] == track_id
-    assert track["status"] == "unknown"
-    assert track["complete"] == 0
-    assert track["total"] == 0
+ tracks_dir = tmp_path / "conductor" / "tracks"
+ tracks_dir.mkdir(parents=True)
+ track_id = "malformed_track"
+ track_dir = tracks_dir / track_id
+ track_dir.mkdir()
+ # Malformed metadata.json
+ with open(track_dir / "metadata.json", "w") as f:
+  f.write("{ invalid json }")
+ tracks = get_all_tracks(tmp_path)
+ assert len(tracks) == 1
+ track = tracks[0]
+ assert track["id"] == track_id
+ assert track["status"] == "unknown"
+ assert track["complete"] == 0
+ assert track["total"] == 0
@@ -9,56 +9,46 @@ import tomllib

@pytest.fixture
 def temp_logs(tmp_path, monkeypatch):
-    # Ensure closed before starting
-    session_logger.close_session()
-    monkeypatch.setattr(session_logger, "_comms_fh", None)
-    
-    # Mock _LOG_DIR in session_logger
-    original_log_dir = session_logger._LOG_DIR
-    session_logger._LOG_DIR = tmp_path / "logs"
-    monkeypatch.setattr(session_logger, "_LOG_DIR", tmp_path / "logs")
-    session_logger._LOG_DIR.mkdir(parents=True, exist_ok=True)
-    
-    # Mock _SCRIPTS_DIR
-    original_scripts_dir = session_logger._SCRIPTS_DIR
-    session_logger._SCRIPTS_DIR = tmp_path / "scripts" / "generated"
-    monkeypatch.setattr(session_logger, "_SCRIPTS_DIR", tmp_path / "scripts" / "generated")
-    session_logger._SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
-    
-    yield tmp_path / "logs"
-    
-    # Cleanup: Close handles if open
-    session_logger.close_session()
-    session_logger._LOG_DIR = original_log_dir
-    session_logger._SCRIPTS_DIR = original_scripts_dir
+# Ensure closed before starting
+ session_logger.close_session()
+ monkeypatch.setattr(session_logger, "_comms_fh", None)
+ # Mock _LOG_DIR in session_logger
+ original_log_dir = session_logger._LOG_DIR
+ session_logger._LOG_DIR = tmp_path / "logs"
+ monkeypatch.setattr(session_logger, "_LOG_DIR", tmp_path / "logs")
+ session_logger._LOG_DIR.mkdir(parents=True, exist_ok=True)
+ # Mock _SCRIPTS_DIR
+ original_scripts_dir = session_logger._SCRIPTS_DIR
+ session_logger._SCRIPTS_DIR = tmp_path / "scripts" / "generated"
+ monkeypatch.setattr(session_logger, "_SCRIPTS_DIR", tmp_path / "scripts" / "generated")
+ session_logger._SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
+ yield tmp_path / "logs"
+ # Cleanup: Close handles if open
+ session_logger.close_session()
+ session_logger._LOG_DIR = original_log_dir
+ session_logger._SCRIPTS_DIR = original_scripts_dir

 def test_open_session_creates_subdir_and_registry(temp_logs):
-    label = "test-label"
-    # We can't easily mock datetime.datetime.now() because it's a built-in
-    # but we can check the resulting directory name pattern
-    
-    session_logger.open_session(label=label)
-    
-    # Check that a subdirectory was created
-    subdirs = list(temp_logs.iterdir())
-    # One is the log_registry.toml, one is the session dir
-    session_dirs = [d for d in subdirs if d.is_dir()]
-    assert len(session_dirs) == 1
-    session_dir = session_dirs[0]
-    
-    assert session_dir.name.endswith(f"_{label}")
-    
-    # Check for log files
-    assert (session_dir / "comms.log").exists()
-    assert (session_dir / "toolcalls.log").exists()
-    assert (session_dir / "apihooks.log").exists()
-    assert (session_dir / "clicalls.log").exists()
-    
-    # Check registry
-    registry_path = temp_logs / "log_registry.toml"
-    assert registry_path.exists()
-    
-    with open(registry_path, "rb") as f:
-        data = tomllib.load(f)
-        assert session_dir.name in data
-        assert data[session_dir.name]["path"] == str(session_dir)
+ label = "test-label"
+ # We can't easily mock datetime.datetime.now() because it's a built-in
+ # but we can check the resulting directory name pattern
+ session_logger.open_session(label=label)
+ # Check that a subdirectory was created
+ subdirs = list(temp_logs.iterdir())
+ # One is the log_registry.toml, one is the session dir
+ session_dirs = [d for d in subdirs if d.is_dir()]
+ assert len(session_dirs) == 1
+ session_dir = session_dirs[0]
+ assert session_dir.name.endswith(f"_{label}")
+ # Check for log files
+ assert (session_dir / "comms.log").exists()
+ assert (session_dir / "toolcalls.log").exists()
+ assert (session_dir / "apihooks.log").exists()
+ assert (session_dir / "clicalls.log").exists()
+ # Check registry
+ registry_path = temp_logs / "log_registry.toml"
+ assert registry_path.exists()
+ with open(registry_path, "rb") as f:
+  data = tomllib.load(f)
+  assert session_dir.name in data
+  assert data[session_dir.name]["path"] == str(session_dir)
@@ -9,33 +9,28 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from simulation.sim_ai_settings import AISettingsSimulation

 def test_ai_settings_simulation_run():
-    mock_client = MagicMock()
-    mock_client.wait_for_server.return_value = True
-    
-    mock_client.get_value.side_effect = lambda key: {
-        "current_provider": "gemini",
-        "current_model": "gemini-2.5-flash-lite"
-    }.get(key)
-    
-    with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
-        mock_sim = MagicMock()
-        mock_sim_class.return_value = mock_sim
-        
-        sim = AISettingsSimulation(mock_client)
-        
-        # Override the side effect after initial setup if needed or just let it return the same for simplicity
-        # Actually, let's use a side effect that updates
-        vals = {"current_provider": "gemini", "current_model": "gemini-2.5-flash-lite"}
-        def side_effect(key):
-            return vals.get(key)
-        def set_side_effect(key, val):
-            vals[key] = val
-            
-        mock_client.get_value.side_effect = side_effect
-        mock_client.set_value.side_effect = set_side_effect
-        
-        sim.run()
-        
-        # Verify calls
-        mock_client.set_value.assert_any_call("current_model", "gemini-1.5-flash")
-        mock_client.set_value.assert_any_call("current_model", "gemini-2.5-flash-lite")
+ mock_client = MagicMock()
+ mock_client.wait_for_server.return_value = True
+ mock_client.get_value.side_effect = lambda key: {
+  "current_provider": "gemini",
+  "current_model": "gemini-2.5-flash-lite"
+ }.get(key)
+ with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
+  mock_sim = MagicMock()
+  mock_sim_class.return_value = mock_sim
+  sim = AISettingsSimulation(mock_client)
+  # Override the side effect after initial setup if needed or just let it return the same for simplicity
+  # Actually, let's use a side effect that updates
+  vals = {"current_provider": "gemini", "current_model": "gemini-2.5-flash-lite"}
+
+  def side_effect(key):
+   return vals.get(key)
+
+  def set_side_effect(key, val):
+   vals[key] = val
+  mock_client.get_value.side_effect = side_effect
+  mock_client.set_value.side_effect = set_side_effect
+  sim.run()
+  # Verify calls
+  mock_client.set_value.assert_any_call("current_model", "gemini-1.5-flash")
+  mock_client.set_value.assert_any_call("current_model", "gemini-2.5-flash-lite")
@@ -9,26 +9,22 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from simulation.sim_base import BaseSimulation

 def test_base_simulation_init():
-    with patch('simulation.sim_base.ApiHookClient') as mock_client_class:
-        mock_client = MagicMock()
-        mock_client_class.return_value = mock_client
-        
-        sim = BaseSimulation()
-        assert sim.client == mock_client
-        assert sim.sim is not None
+ with patch('simulation.sim_base.ApiHookClient') as mock_client_class:
+  mock_client = MagicMock()
+  mock_client_class.return_value = mock_client
+  sim = BaseSimulation()
+  assert sim.client == mock_client
+  assert sim.sim is not None

 def test_base_simulation_setup():
-    mock_client = MagicMock()
-    mock_client.wait_for_server.return_value = True
-    
-    with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
-        mock_sim = MagicMock()
-        mock_sim_class.return_value = mock_sim
-        
-        sim = BaseSimulation(mock_client)
-        sim.setup("TestSim")
-        
-        mock_client.wait_for_server.assert_called()
-        mock_client.click.assert_any_call("btn_reset")
-        mock_sim.setup_new_project.assert_called()
-        assert sim.project_path.endswith("temp_testsim.toml")
+ mock_client = MagicMock()
+ mock_client.wait_for_server.return_value = True
+ with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
+  mock_sim = MagicMock()
+  mock_sim_class.return_value = mock_sim
+  sim = BaseSimulation(mock_client)
+  sim.setup("TestSim")
+  mock_client.wait_for_server.assert_called()
+  mock_client.click.assert_any_call("btn_reset")
+  mock_sim.setup_new_project.assert_called()
+  assert sim.project_path.endswith("temp_testsim.toml")
@@ -9,42 +9,37 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from simulation.sim_context import ContextSimulation

 def test_context_simulation_run():
-    mock_client = MagicMock()
-    mock_client.wait_for_server.return_value = True
-    
-    # Mock project config
-    mock_project = {
-        'project': {
-            'files': {'paths': []}
-        }
-    }
-    mock_client.get_project.return_value = mock_project
-    mock_client.get_value.side_effect = lambda key: {
-        "ai_status": "md written: test.md",
-        "token_budget_pct": 0.05
-    }.get(key)
-    
-    # Mock session entries
-    mock_session = {
-        'session': {
-            'entries': [
-                {'role': 'User', 'content': 'Hello'},
-                {'role': 'AI', 'content': 'Hi'}
-            ]
-        }
-    }
-    mock_client.get_session.return_value = mock_session
-
-    with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
-        mock_sim = MagicMock()
-        mock_sim_class.return_value = mock_sim
-        
-        sim = ContextSimulation(mock_client)
-        sim.run()
-        
-        # Verify calls
-        mock_sim.create_discussion.assert_called()
-        mock_client.post_project.assert_called()
-        mock_client.click.assert_called_with("btn_md_only")
-        mock_sim.run_discussion_turn.assert_called()
-        mock_sim.truncate_history.assert_called_with(1)
+ mock_client = MagicMock()
+ mock_client.wait_for_server.return_value = True
+ # Mock project config
+ mock_project = {
+  'project': {
+   'files': {'paths': []}
+  }
+ }
+ mock_client.get_project.return_value = mock_project
+ mock_client.get_value.side_effect = lambda key: {
+  "ai_status": "md written: test.md",
+  "token_budget_pct": 0.05
+ }.get(key)
+ # Mock session entries
+ mock_session = {
+  'session': {
+   'entries': [
+    {'role': 'User', 'content': 'Hello'},
+    {'role': 'AI', 'content': 'Hi'}
+   ]
+  }
+ }
+ mock_client.get_session.return_value = mock_session
+ with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
+  mock_sim = MagicMock()
+  mock_sim_class.return_value = mock_sim
+  sim = ContextSimulation(mock_client)
+  sim.run()
+  # Verify calls
+  mock_sim.create_discussion.assert_called()
+  mock_client.post_project.assert_called()
+  mock_client.click.assert_called_with("btn_md_only")
+  mock_sim.run_discussion_turn.assert_called()
+  mock_sim.truncate_history.assert_called_with(1)
@@ -9,42 +9,37 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from simulation.sim_execution import ExecutionSimulation

 def test_execution_simulation_run():
-    mock_client = MagicMock()
-    mock_client.wait_for_server.return_value = True
-    
-    # Mock show_confirm_modal state
-    vals = {"show_confirm_modal": False}
-    def side_effect(key):
-        return vals.get(key)
-    def set_side_effect(key, val):
-        vals[key] = val
-        
-    mock_client.get_value.side_effect = side_effect
-    mock_client.set_value.side_effect = set_side_effect
-    
-    # Mock session entries with tool output
-    mock_session = {
-        'session': {
-            'entries': [
-                {'role': 'Tool', 'content': 'Simulation Test', 'tool_call_id': 'call_1'}
-            ]
-        }
-    }
-    mock_client.get_session.return_value = mock_session
+ mock_client = MagicMock()
+ mock_client.wait_for_server.return_value = True
+ # Mock show_confirm_modal state
+ vals = {"show_confirm_modal": False}

-    # Mock script confirmation event
-    mock_client.wait_for_event.side_effect = [
-        {"type": "script_confirmation_required", "script": "dir"},
-        None # Second call returns None to end the loop
-    ]
+ def side_effect(key):
+  return vals.get(key)

-    with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
-        mock_sim = MagicMock()
-        mock_sim_class.return_value = mock_sim
-        
-        sim = ExecutionSimulation(mock_client)
-        sim.run()
-        
-        # Verify calls
-        mock_sim.run_discussion_turn_async.assert_called()
-        mock_client.click.assert_called_with("btn_approve_script")
+ def set_side_effect(key, val):
+  vals[key] = val
+ mock_client.get_value.side_effect = side_effect
+ mock_client.set_value.side_effect = set_side_effect
+ # Mock session entries with tool output
+ mock_session = {
+  'session': {
+   'entries': [
+    {'role': 'Tool', 'content': 'Simulation Test', 'tool_call_id': 'call_1'}
+   ]
+  }
+ }
+ mock_client.get_session.return_value = mock_session
+ # Mock script confirmation event
+ mock_client.wait_for_event.side_effect = [
+  {"type": "script_confirmation_required", "script": "dir"},
+  None # Second call returns None to end the loop
+ ]
+ with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
+  mock_sim = MagicMock()
+  mock_sim_class.return_value = mock_sim
+  sim = ExecutionSimulation(mock_client)
+  sim.run()
+  # Verify calls
+  mock_sim.run_discussion_turn_async.assert_called()
+  mock_client.click.assert_called_with("btn_approve_script")
@@ -9,28 +9,24 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from simulation.sim_tools import ToolsSimulation

 def test_tools_simulation_run():
-    mock_client = MagicMock()
-    mock_client.wait_for_server.return_value = True
-    
-    # Mock session entries with tool output
-    mock_session = {
-        'session': {
-            'entries': [
-                {'role': 'User', 'content': 'List files'},
-                {'role': 'Tool', 'content': 'aggregate.py, ai_client.py', 'tool_call_id': 'call_1'},
-                {'role': 'AI', 'content': 'The files are: aggregate.py, ai_client.py'}
-            ]
-        }
-    }
-    mock_client.get_session.return_value = mock_session
-
-    with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
-        mock_sim = MagicMock()
-        mock_sim_class.return_value = mock_sim
-        
-        sim = ToolsSimulation(mock_client)
-        sim.run()
-        
-        # Verify calls
-        mock_sim.run_discussion_turn.assert_any_call("List the files in the current directory.")
-        mock_sim.run_discussion_turn.assert_any_call("Read the first 10 lines of aggregate.py.")
+ mock_client = MagicMock()
+ mock_client.wait_for_server.return_value = True
+ # Mock session entries with tool output
+ mock_session = {
+  'session': {
+   'entries': [
+    {'role': 'User', 'content': 'List files'},
+    {'role': 'Tool', 'content': 'aggregate.py, ai_client.py', 'tool_call_id': 'call_1'},
+    {'role': 'AI', 'content': 'The files are: aggregate.py, ai_client.py'}
+   ]
+  }
+ }
+ mock_client.get_session.return_value = mock_session
+ with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
+  mock_sim = MagicMock()
+  mock_sim_class.return_value = mock_sim
+  sim = ToolsSimulation(mock_client)
+  sim.run()
+  # Verify calls
+  mock_sim.run_discussion_turn.assert_any_call("List the files in the current directory.")
+  mock_sim.run_discussion_turn.assert_any_call("Read the first 10 lines of aggregate.py.")
@@ -7,84 +7,75 @@ import asyncio
 import concurrent.futures

 class MockDialog:
-    def __init__(self, approved, final_payload=None):
-        self.approved = approved
-        self.final_payload = final_payload
-    def wait(self):
-        # Match the new return format: a dictionary
-        res = {'approved': self.approved, 'abort': False}
-        if self.final_payload:
-            res.update(self.final_payload)
-        return res
+ def __init__(self, approved, final_payload=None):
+  self.approved = approved
+  self.final_payload = final_payload
+
+ def wait(self):
+ # Match the new return format: a dictionary
+  res = {'approved': self.approved, 'abort': False}
+  if self.final_payload:
+   res.update(self.final_payload)
+  return res

@pytest.fixture
 def mock_ai_client():
-    with patch("ai_client.send") as mock_send:
-        mock_send.return_value = "Task completed"
-        yield mock_send
+ with patch("ai_client.send") as mock_send:
+  mock_send.return_value = "Task completed"
+  yield mock_send

@pytest.mark.asyncio
 async def test_confirm_spawn_pushed_to_queue():
-    event_queue = events.AsyncEventQueue()
-    ticket_id = "T1"
-    role = "Tier 3 Worker"
-    prompt = "Original Prompt"
-    context_md = "Original Context"
+ event_queue = events.AsyncEventQueue()
+ ticket_id = "T1"
+ role = "Tier 3 Worker"
+ prompt = "Original Prompt"
+ context_md = "Original Context"
+ # Start confirm_spawn in a thread since it blocks with time.sleep

-    # Start confirm_spawn in a thread since it blocks with time.sleep
-    def run_confirm():
-        return multi_agent_conductor.confirm_spawn(role, prompt, context_md, event_queue, ticket_id)
-
-    loop = asyncio.get_running_loop()
-    with concurrent.futures.ThreadPoolExecutor() as executor:
-        future = loop.run_in_executor(executor, run_confirm)
-
-        # Wait for the event to appear in the queue
-        event_name, payload = await event_queue.get()
-        assert event_name == "mma_spawn_approval"
-        assert payload["ticket_id"] == ticket_id
-        assert payload["role"] == role
-        assert payload["prompt"] == prompt
-        assert payload["context_md"] == context_md
-        assert "dialog_container" in payload
-
-        # Simulate GUI injecting a dialog
-        payload["dialog_container"][0] = MockDialog(True, {"prompt": "Modified Prompt", "context_md": "Modified Context"})
-
-        approved, final_prompt, final_context = await future
-        assert approved is True
-        assert final_prompt == "Modified Prompt"
-        assert final_context == "Modified Context"
+ def run_confirm():
+  return multi_agent_conductor.confirm_spawn(role, prompt, context_md, event_queue, ticket_id)
+ loop = asyncio.get_running_loop()
+ with concurrent.futures.ThreadPoolExecutor() as executor:
+  future = loop.run_in_executor(executor, run_confirm)
+  # Wait for the event to appear in the queue
+  event_name, payload = await event_queue.get()
+  assert event_name == "mma_spawn_approval"
+  assert payload["ticket_id"] == ticket_id
+  assert payload["role"] == role
+  assert payload["prompt"] == prompt
+  assert payload["context_md"] == context_md
+  assert "dialog_container" in payload
+  # Simulate GUI injecting a dialog
+  payload["dialog_container"][0] = MockDialog(True, {"prompt": "Modified Prompt", "context_md": "Modified Context"})
+  approved, final_prompt, final_context = await future
+  assert approved is True
+  assert final_prompt == "Modified Prompt"
+  assert final_context == "Modified Context"

@patch("multi_agent_conductor.confirm_spawn")
 def test_run_worker_lifecycle_approved(mock_confirm, mock_ai_client):
-    ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
-    context = WorkerContext(ticket_id="T1", model_name="model", messages=[])
-    event_queue = events.AsyncEventQueue()
-    
-    mock_confirm.return_value = (True, "Modified Prompt", "Modified Context")
-    
-    multi_agent_conductor.run_worker_lifecycle(ticket, context, event_queue=event_queue)
-    
-    mock_confirm.assert_called_once()
-    # Check that ai_client.send was called with modified values
-    args, kwargs = mock_ai_client.call_args
-    assert kwargs["user_message"] == "Modified Prompt"
-    assert kwargs["md_content"] == "Modified Context"
-    assert ticket.status == "completed"
+ ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
+ context = WorkerContext(ticket_id="T1", model_name="model", messages=[])
+ event_queue = events.AsyncEventQueue()
+ mock_confirm.return_value = (True, "Modified Prompt", "Modified Context")
+ multi_agent_conductor.run_worker_lifecycle(ticket, context, event_queue=event_queue)
+ mock_confirm.assert_called_once()
+ # Check that ai_client.send was called with modified values
+ args, kwargs = mock_ai_client.call_args
+ assert kwargs["user_message"] == "Modified Prompt"
+ assert kwargs["md_content"] == "Modified Context"
+ assert ticket.status == "completed"

@patch("multi_agent_conductor.confirm_spawn")
 def test_run_worker_lifecycle_rejected(mock_confirm, mock_ai_client):
-    ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
-    context = WorkerContext(ticket_id="T1", model_name="model", messages=[])
-    event_queue = events.AsyncEventQueue()
-    
-    mock_confirm.return_value = (False, "Original Prompt", "Original Context")
-    
-    result = multi_agent_conductor.run_worker_lifecycle(ticket, context, event_queue=event_queue)
-    
-    mock_confirm.assert_called_once()
-    mock_ai_client.assert_not_called()
-    assert ticket.status == "blocked"
-    assert "Spawn rejected by user" in ticket.blocked_reason
-    assert "BLOCKED" in result
+ ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
+ context = WorkerContext(ticket_id="T1", model_name="model", messages=[])
+ event_queue = events.AsyncEventQueue()
+ mock_confirm.return_value = (False, "Original Prompt", "Original Context")
+ result = multi_agent_conductor.run_worker_lifecycle(ticket, context, event_queue=event_queue)
+ mock_confirm.assert_called_once()
+ mock_ai_client.assert_not_called()
+ assert ticket.status == "blocked"
+ assert "Spawn rejected by user" in ticket.blocked_reason
+ assert "BLOCKED" in result
@@ -5,57 +5,50 @@ import pytest
 from api_hook_client import ApiHookClient

 def test_api_ask_client_method(live_gui):
-    """
+ """
    Tests the request_confirmation method in ApiHookClient.
    """
-    client = ApiHookClient("http://127.0.0.1:8999")
-    
-    # Drain existing events
-    client.get_events()
-    
-    results = {"response": None, "error": None}
-    
-    def make_blocking_request():
-        try:
-            # This call should block until we respond
-            results["response"] = client.request_confirmation(
-                tool_name="powershell",
-                args={"command": "echo hello"}
-            )
-        except Exception as e:
-            results["error"] = str(e)
+ client = ApiHookClient("http://127.0.0.1:8999")
+ # Drain existing events
+ client.get_events()
+ results = {"response": None, "error": None}

-    # Start the request in a background thread
-    t = threading.Thread(target=make_blocking_request)
-    t.start()
-    
-    # Poll for the 'ask_received' event
-    request_id = None
-    start_time = time.time()
-    while time.time() - start_time < 5:
-        events = client.get_events()
-        for ev in events:
-            if ev.get("type") == "ask_received":
-                request_id = ev.get("request_id")
-                break
-        if request_id:
-            break
-        time.sleep(0.1)
-
-    assert request_id is not None, "Timed out waiting for 'ask_received' event"
-
-    # Respond
-    expected_response = {"approved": True}
-    resp = requests.post(
-        "http://127.0.0.1:8999/api/ask/respond",
-        json={
-            "request_id": request_id,
-            "response": expected_response
-        }
-    )
-    assert resp.status_code == 200
-
-    t.join(timeout=5)
-    assert not t.is_alive()
-    assert results["error"] is None
-    assert results["response"] == expected_response
+ def make_blocking_request():
+  try:
+  # This call should block until we respond
+   results["response"] = client.request_confirmation(
+    tool_name="powershell",
+    args={"command": "echo hello"}
+   )
+  except Exception as e:
+   results["error"] = str(e)
+   # Start the request in a background thread
+ t = threading.Thread(target=make_blocking_request)
+ t.start()
+ # Poll for the 'ask_received' event
+ request_id = None
+ start_time = time.time()
+ while time.time() - start_time < 5:
+  events = client.get_events()
+  for ev in events:
+   if ev.get("type") == "ask_received":
+    request_id = ev.get("request_id")
+    break
+  if request_id:
+   break
+  time.sleep(0.1)
+ assert request_id is not None, "Timed out waiting for 'ask_received' event"
+ # Respond
+ expected_response = {"approved": True}
+ resp = requests.post(
+  "http://127.0.0.1:8999/api/ask/respond",
+  json={
+   "request_id": request_id,
+   "response": expected_response
+  }
+ )
+ assert resp.status_code == 200
+ t.join(timeout=5)
+ assert not t.is_alive()
+ assert results["error"] is None
+ assert results["response"] == expected_response
@@ -4,221 +4,176 @@ import subprocess
 from shell_runner import run_powershell

 def test_run_powershell_qa_callback_on_failure():
-    """
+ """
    Test that qa_callback is called when a powershell command fails (non-zero exit code).
    The result of the callback should be appended to the output.
    """
-    script = "Write-Error 'something went wrong'; exit 1"
-    base_dir = "."
-    
-    # Mocking subprocess.run to simulate failure
-    mock_result = MagicMock()
-    mock_result.stdout = ""
-    mock_result.stderr = "something went wrong"
-    mock_result.returncode = 1
-    
-    qa_callback = MagicMock(return_value="QA ANALYSIS: This looks like a syntax error.")
-    
-    with patch("subprocess.run", return_value=mock_result), \
-         patch("shutil.which", return_value="powershell.exe"):
-        
-        # We expect run_powershell to accept qa_callback
-        output = run_powershell(script, base_dir, qa_callback=qa_callback)
-        
-        # Verify callback was called with stderr
-        qa_callback.assert_called_once_with("something went wrong")
-        
-        # Verify output contains the callback result
-        assert "QA ANALYSIS: This looks like a syntax error." in output
-        assert "STDERR:\nsomething went wrong" in output
-        assert "EXIT CODE: 1" in output
+ script = "Write-Error 'something went wrong'; exit 1"
+ base_dir = "."
+ # Mocking subprocess.run to simulate failure
+ mock_result = MagicMock()
+ mock_result.stdout = ""
+ mock_result.stderr = "something went wrong"
+ mock_result.returncode = 1
+ qa_callback = MagicMock(return_value="QA ANALYSIS: This looks like a syntax error.")
+ with patch("subprocess.run", return_value=mock_result), \
+ patch("shutil.which", return_value="powershell.exe"):
+ # We expect run_powershell to accept qa_callback
+  output = run_powershell(script, base_dir, qa_callback=qa_callback)
+  # Verify callback was called with stderr
+  qa_callback.assert_called_once_with("something went wrong")
+  # Verify output contains the callback result
+  assert "QA ANALYSIS: This looks like a syntax error." in output
+  assert "STDERR:\nsomething went wrong" in output
+  assert "EXIT CODE: 1" in output

 def test_run_powershell_qa_callback_on_stderr_only():
-    """
+ """
    Test that qa_callback is called when a command has stderr even if exit code is 0.
    """
-    script = "Write-Error 'non-fatal error'"
-    base_dir = "."
-    
-    mock_result = MagicMock()
-    mock_result.stdout = "Success"
-    mock_result.stderr = "non-fatal error"
-    mock_result.returncode = 0
-    
-    qa_callback = MagicMock(return_value="QA ANALYSIS: Ignorable warning.")
-    
-    with patch("subprocess.run", return_value=mock_result), \
-         patch("shutil.which", return_value="powershell.exe"):
-        
-        output = run_powershell(script, base_dir, qa_callback=qa_callback)
-        
-        qa_callback.assert_called_once_with("non-fatal error")
-        assert "QA ANALYSIS: Ignorable warning." in output
-        assert "STDOUT:\nSuccess" in output
+ script = "Write-Error 'non-fatal error'"
+ base_dir = "."
+ mock_result = MagicMock()
+ mock_result.stdout = "Success"
+ mock_result.stderr = "non-fatal error"
+ mock_result.returncode = 0
+ qa_callback = MagicMock(return_value="QA ANALYSIS: Ignorable warning.")
+ with patch("subprocess.run", return_value=mock_result), \
+ patch("shutil.which", return_value="powershell.exe"):
+  output = run_powershell(script, base_dir, qa_callback=qa_callback)
+  qa_callback.assert_called_once_with("non-fatal error")
+  assert "QA ANALYSIS: Ignorable warning." in output
+  assert "STDOUT:\nSuccess" in output

 def test_run_powershell_no_qa_callback_on_success():
-    """
+ """
    Test that qa_callback is NOT called when the command succeeds without stderr.
    """
-    script = "Write-Output 'All good'"
-    base_dir = "."
-    
-    mock_result = MagicMock()
-    mock_result.stdout = "All good"
-    mock_result.stderr = ""
-    mock_result.returncode = 0
-    
-    qa_callback = MagicMock()
-    
-    with patch("subprocess.run", return_value=mock_result), \
-         patch("shutil.which", return_value="powershell.exe"):
-        
-        output = run_powershell(script, base_dir, qa_callback=qa_callback)
-        
-        qa_callback.assert_not_called()
-        assert "STDOUT:\nAll good" in output
-        assert "EXIT CODE: 0" in output
-        assert "QA ANALYSIS" not in output
+ script = "Write-Output 'All good'"
+ base_dir = "."
+ mock_result = MagicMock()
+ mock_result.stdout = "All good"
+ mock_result.stderr = ""
+ mock_result.returncode = 0
+ qa_callback = MagicMock()
+ with patch("subprocess.run", return_value=mock_result), \
+ patch("shutil.which", return_value="powershell.exe"):
+  output = run_powershell(script, base_dir, qa_callback=qa_callback)
+  qa_callback.assert_not_called()
+  assert "STDOUT:\nAll good" in output
+  assert "EXIT CODE: 0" in output
+  assert "QA ANALYSIS" not in output

 def test_run_powershell_optional_qa_callback():
-    """
+ """
    Test that run_powershell still works without providing a qa_callback.
    """
-    script = "Write-Error 'error'"
-    base_dir = "."
-    
-    mock_result = MagicMock()
-    mock_result.stdout = ""
-    mock_result.stderr = "error"
-    mock_result.returncode = 1
-    
-    with patch("subprocess.run", return_value=mock_result), \
-         patch("shutil.which", return_value="powershell.exe"):
-        
-        # Should not raise TypeError even if qa_callback is not provided
-        output = run_powershell(script, base_dir)
-        
-        assert "STDERR:\nerror" in output
-        assert "EXIT CODE: 1" in output
+ script = "Write-Error 'error'"
+ base_dir = "."
+ mock_result = MagicMock()
+ mock_result.stdout = ""
+ mock_result.stderr = "error"
+ mock_result.returncode = 1
+ with patch("subprocess.run", return_value=mock_result), \
+ patch("shutil.which", return_value="powershell.exe"):
+ # Should not raise TypeError even if qa_callback is not provided
+  output = run_powershell(script, base_dir)
+  assert "STDERR:\nerror" in output
+  assert "EXIT CODE: 1" in output

 def test_end_to_end_tier4_integration():
-    """
+ """
    Verifies that shell_runner.run_powershell correctly uses ai_client.run_tier4_analysis.
    """
-    import ai_client
-    
-    script = "Invoke-Item non_existent_file"
-    base_dir = "."
-    stderr_content = "Invoke-Item : Cannot find path 'C:\\non_existent_file' because it does not exist."
-    
-    mock_result = MagicMock()
-    mock_result.stdout = ""
-    mock_result.stderr = stderr_content
-    mock_result.returncode = 1
-    
-    expected_analysis = "Path does not exist. Verify the file path and ensure the file is present before invoking."
-    
-    with patch("subprocess.run", return_value=mock_result), \
-         patch("shutil.which", return_value="powershell.exe"), \
-         patch("ai_client.run_tier4_analysis", return_value=expected_analysis) as mock_analysis:
-        
-        output = run_powershell(script, base_dir, qa_callback=ai_client.run_tier4_analysis)
-        
-        mock_analysis.assert_called_once_with(stderr_content)
-        assert f"QA ANALYSIS:\n{expected_analysis}" in output
+ import ai_client
+ script = "Invoke-Item non_existent_file"
+ base_dir = "."
+ stderr_content = "Invoke-Item : Cannot find path 'C:\\non_existent_file' because it does not exist."
+ mock_result = MagicMock()
+ mock_result.stdout = ""
+ mock_result.stderr = stderr_content
+ mock_result.returncode = 1
+ expected_analysis = "Path does not exist. Verify the file path and ensure the file is present before invoking."
+ with patch("subprocess.run", return_value=mock_result), \
+ patch("shutil.which", return_value="powershell.exe"), \
+ patch("ai_client.run_tier4_analysis", return_value=expected_analysis) as mock_analysis:
+  output = run_powershell(script, base_dir, qa_callback=ai_client.run_tier4_analysis)
+  mock_analysis.assert_called_once_with(stderr_content)
+  assert f"QA ANALYSIS:\n{expected_analysis}" in output

 def test_ai_client_passes_qa_callback():
-    """
+ """
    Verifies that ai_client.send passes the qa_callback down to the provider function.
    """
-    import ai_client
-    
-    # Mocking a provider function to avoid actual API calls
-    mock_send_gemini = MagicMock(return_value="AI Response")
-    
-    qa_callback = MagicMock(return_value="QA Analysis")
-    
-    # Force provider to gemini and mock its send function
-    with patch("ai_client._provider", "gemini"), \
-         patch("ai_client._send_gemini", mock_send_gemini):
-        
-        ai_client.send(
-            md_content="Context",
-            user_message="Hello",
-            qa_callback=qa_callback
-        )
-        
-        # Verify provider received the qa_callback
-        mock_send_gemini.assert_called_once()
-        args, kwargs = mock_send_gemini.call_args
-        # qa_callback is the 7th positional argument in _send_gemini
-        assert args[6] == qa_callback
+ import ai_client
+ # Mocking a provider function to avoid actual API calls
+ mock_send_gemini = MagicMock(return_value="AI Response")
+ qa_callback = MagicMock(return_value="QA Analysis")
+ # Force provider to gemini and mock its send function
+ with patch("ai_client._provider", "gemini"), \
+ patch("ai_client._send_gemini", mock_send_gemini):
+  ai_client.send(
+   md_content="Context",
+   user_message="Hello",
+   qa_callback=qa_callback
+  )
+  # Verify provider received the qa_callback
+  mock_send_gemini.assert_called_once()
+  args, kwargs = mock_send_gemini.call_args
+  # qa_callback is the 7th positional argument in _send_gemini
+  assert args[6] == qa_callback

 def test_gemini_provider_passes_qa_callback_to_run_script():
-    """
+ """
    Verifies that _send_gemini passes the qa_callback to _run_script.
    """
-    import ai_client
-    
-    # Mock Gemini chat and client
-    mock_client = MagicMock()
-    mock_chat = MagicMock()
-    
-    # Simulate a tool call response
-    mock_part = MagicMock()
-    mock_part.text = ""
-    mock_part.function_call = MagicMock()
-    mock_part.function_call.name = "run_powershell"
-    mock_part.function_call.args = {"script": "dir"}
-    
-    mock_candidate = MagicMock()
-    mock_candidate.content.parts = [mock_part]
-    mock_candidate.finish_reason.name = "STOP"
-    
-    mock_response = MagicMock()
-    mock_response.candidates = [mock_candidate]
-    mock_response.usage_metadata.prompt_token_count = 10
-    mock_response.usage_metadata.candidates_token_count = 5
-    
-    # Second call returns a stop response to break the loop
-    mock_stop_part = MagicMock()
-    mock_stop_part.text = "Done"
-    mock_stop_part.function_call = None
-    
-    mock_stop_candidate = MagicMock()
-    mock_stop_candidate.content.parts = [mock_stop_part]
-    mock_stop_candidate.finish_reason.name = "STOP"
-    
-    mock_stop_response = MagicMock()
-    mock_stop_response.candidates = [mock_stop_candidate]
-    mock_stop_response.usage_metadata.prompt_token_count = 5
-    mock_stop_response.usage_metadata.candidates_token_count = 2
-    
-    mock_chat.send_message.side_effect = [mock_response, mock_stop_response]
-    
-    # Mock count_tokens to avoid chat creation failure
-    mock_count_resp = MagicMock()
-    mock_count_resp.total_tokens = 100
-    mock_client.models.count_tokens.return_value = mock_count_resp
-    
-    qa_callback = MagicMock()
-    
-    # Set global state for the test
-    with patch("ai_client._gemini_client", mock_client), \
-         patch("ai_client._gemini_chat", None), \
-         patch("ai_client._ensure_gemini_client"), \
-         patch("ai_client._run_script", return_value="output") as mock_run_script, \
-         patch("ai_client._get_gemini_history_list", return_value=[]):
-        
-        # Ensure chats.create returns our mock_chat
-        mock_client.chats.create.return_value = mock_chat
-        
-        ai_client._send_gemini(
-            md_content="Context",
-            user_message="Run dir",
-            base_dir=".",
-            qa_callback=qa_callback
-        )
-        
-        # Verify _run_script received the qa_callback
-        mock_run_script.assert_called_once_with("dir", ".", qa_callback)
+ import ai_client
+ # Mock Gemini chat and client
+ mock_client = MagicMock()
+ mock_chat = MagicMock()
+ # Simulate a tool call response
+ mock_part = MagicMock()
+ mock_part.text = ""
+ mock_part.function_call = MagicMock()
+ mock_part.function_call.name = "run_powershell"
+ mock_part.function_call.args = {"script": "dir"}
+ mock_candidate = MagicMock()
+ mock_candidate.content.parts = [mock_part]
+ mock_candidate.finish_reason.name = "STOP"
+ mock_response = MagicMock()
+ mock_response.candidates = [mock_candidate]
+ mock_response.usage_metadata.prompt_token_count = 10
+ mock_response.usage_metadata.candidates_token_count = 5
+ # Second call returns a stop response to break the loop
+ mock_stop_part = MagicMock()
+ mock_stop_part.text = "Done"
+ mock_stop_part.function_call = None
+ mock_stop_candidate = MagicMock()
+ mock_stop_candidate.content.parts = [mock_stop_part]
+ mock_stop_candidate.finish_reason.name = "STOP"
+ mock_stop_response = MagicMock()
+ mock_stop_response.candidates = [mock_stop_candidate]
+ mock_stop_response.usage_metadata.prompt_token_count = 5
+ mock_stop_response.usage_metadata.candidates_token_count = 2
+ mock_chat.send_message.side_effect = [mock_response, mock_stop_response]
+ # Mock count_tokens to avoid chat creation failure
+ mock_count_resp = MagicMock()
+ mock_count_resp.total_tokens = 100
+ mock_client.models.count_tokens.return_value = mock_count_resp
+ qa_callback = MagicMock()
+ # Set global state for the test
+ with patch("ai_client._gemini_client", mock_client), \
+ patch("ai_client._gemini_chat", None), \
+ patch("ai_client._ensure_gemini_client"), \
+ patch("ai_client._run_script", return_value="output") as mock_run_script, \
+ patch("ai_client._get_gemini_history_list", return_value=[]):
+ # Ensure chats.create returns our mock_chat
+  mock_client.chats.create.return_value = mock_chat
+  ai_client._send_gemini(
+   md_content="Context",
+   user_message="Run dir",
+   base_dir=".",
+   qa_callback=qa_callback
+  )
+  # Verify _run_script received the qa_callback
+  mock_run_script.assert_called_once_with("dir", ".", qa_callback)
@@ -3,134 +3,113 @@ from pathlib import Path
 from aggregate import build_tier1_context, build_tier2_context, build_tier3_context

 def test_build_tier1_context_exists():
-    # This should fail if the function is not defined
-    file_items = [
-        {"path": Path("conductor/product.md"), "entry": "conductor/product.md", "content": "Product content", "error": False},
-        {"path": Path("other.py"), "entry": "other.py", "content": "Other content", "error": False}
-    ]
-    history = ["User: hello", "AI: hi"]
-    
-    result = build_tier1_context(file_items, Path("."), [], history)
-    
-    assert "Product content" in result
-    # other.py should be summarized, not full content in a code block
-    assert "Other content" not in result or "Summarized" in result # Assuming summary format
+# This should fail if the function is not defined
+ file_items = [
+  {"path": Path("conductor/product.md"), "entry": "conductor/product.md", "content": "Product content", "error": False},
+  {"path": Path("other.py"), "entry": "other.py", "content": "Other content", "error": False}
+ ]
+ history = ["User: hello", "AI: hi"]
+ result = build_tier1_context(file_items, Path("."), [], history)
+ assert "Product content" in result
+ # other.py should be summarized, not full content in a code block
+ assert "Other content" not in result or "Summarized" in result # Assuming summary format

 def test_build_tier2_context_exists():
-    file_items = [
-        {"path": Path("other.py"), "entry": "other.py", "content": "Other content", "error": False}
-    ]
-    history = ["User: hello"]
-    result = build_tier2_context(file_items, Path("."), [], history)
-    assert "Other content" in result
+ file_items = [
+  {"path": Path("other.py"), "entry": "other.py", "content": "Other content", "error": False}
+ ]
+ history = ["User: hello"]
+ result = build_tier2_context(file_items, Path("."), [], history)
+ assert "Other content" in result

 def test_build_tier3_context_ast_skeleton(monkeypatch):
-    from unittest.mock import MagicMock
-    import aggregate
-    import file_cache
-    
-    # Mock ASTParser
-    mock_parser_instance = MagicMock()
-    mock_parser_instance.get_skeleton.return_value = "def other():\n    ..."
-    mock_parser_class = MagicMock(return_value=mock_parser_instance)
-    
-    # Mock file_cache.ASTParser in aggregate module
-    monkeypatch.setattr("aggregate.ASTParser", mock_parser_class)
-    
-    file_items = [
-        {"path": Path("other.py"), "entry": "other.py", "content": "def other():\n    pass", "error": False}
-    ]
-    history = []
-    
-    # New behavior check: it should use ASTParser for .py files not in focus
-    result = build_tier3_context(file_items, Path("."), [], history, focus_files=[])
-    
-    assert "def other():" in result
-    assert "..." in result
-    assert "Python" not in result # summarize.py output should not be there if AST skeleton is used
-    mock_parser_class.assert_called_once_with("python")
-    mock_parser_instance.get_skeleton.assert_called_once_with("def other():\n    pass")
+ from unittest.mock import MagicMock
+ import aggregate
+ import file_cache
+ # Mock ASTParser
+ mock_parser_instance = MagicMock()
+ mock_parser_instance.get_skeleton.return_value = "def other():\n    ..."
+ mock_parser_class = MagicMock(return_value=mock_parser_instance)
+ # Mock file_cache.ASTParser in aggregate module
+ monkeypatch.setattr("aggregate.ASTParser", mock_parser_class)
+ file_items = [
+  {"path": Path("other.py"), "entry": "other.py", "content": "def other():\n    pass", "error": False}
+ ]
+ history = []
+ # New behavior check: it should use ASTParser for .py files not in focus
+ result = build_tier3_context(file_items, Path("."), [], history, focus_files=[])
+ assert "def other():" in result
+ assert "..." in result
+ assert "Python" not in result # summarize.py output should not be there if AST skeleton is used
+ mock_parser_class.assert_called_once_with("python")
+ mock_parser_instance.get_skeleton.assert_called_once_with("def other():\n    pass")

 def test_build_tier3_context_exists():
-    file_items = [
-        {"path": Path("focus.py"), "entry": "focus.py", "content": "def focus():\n    pass", "error": False},
-        {"path": Path("other.py"), "entry": "other.py", "content": "def other():\n    pass", "error": False}
-    ]
-    history = ["User: hello"]
-    result = build_tier3_context(file_items, Path("."), [], history, focus_files=["focus.py"])
-    
-    assert "def focus():" in result
-    assert "pass" in result
-    # other.py should have skeletonized content, not full "pass" (if get_skeleton works)
-    # However, for a simple "pass", the skeleton might be the same or similar.
-    # Let's check for the header
-    assert "other.py" in result
-    assert "AST Skeleton" in result
+ file_items = [
+  {"path": Path("focus.py"), "entry": "focus.py", "content": "def focus():\n    pass", "error": False},
+  {"path": Path("other.py"), "entry": "other.py", "content": "def other():\n    pass", "error": False}
+ ]
+ history = ["User: hello"]
+ result = build_tier3_context(file_items, Path("."), [], history, focus_files=["focus.py"])
+ assert "def focus():" in result
+ assert "pass" in result
+ # other.py should have skeletonized content, not full "pass" (if get_skeleton works)
+ # However, for a simple "pass", the skeleton might be the same or similar.
+ # Let's check for the header
+ assert "other.py" in result
+ assert "AST Skeleton" in result

 def test_build_file_items_with_tiers(tmp_path):
-    from aggregate import build_file_items
-    
-    # Create some dummy files
-    file1 = tmp_path / "file1.txt"
-    file1.write_text("content1")
-    file2 = tmp_path / "file2.txt"
-    file2.write_text("content2")
-    
-    files_config = [
-        "file1.txt",
-        {"path": "file2.txt", "tier": 3}
-    ]
-    
-    items = build_file_items(tmp_path, files_config)
-    
-    assert len(items) == 2
-    
-    item1 = next(i for i in items if i["entry"] == "file1.txt")
-    assert item1["content"] == "content1"
-    assert "tier" in item1
-    assert item1["tier"] is None
-    
-    item2 = next(i for i in items if i["entry"] == "file2.txt")
-    assert item2["content"] == "content2"
-    assert item2["tier"] == 3
+ from aggregate import build_file_items
+ # Create some dummy files
+ file1 = tmp_path / "file1.txt"
+ file1.write_text("content1")
+ file2 = tmp_path / "file2.txt"
+ file2.write_text("content2")
+ files_config = [
+  "file1.txt",
+  {"path": "file2.txt", "tier": 3}
+ ]
+ items = build_file_items(tmp_path, files_config)
+ assert len(items) == 2
+ item1 = next(i for i in items if i["entry"] == "file1.txt")
+ assert item1["content"] == "content1"
+ assert "tier" in item1
+ assert item1["tier"] is None
+ item2 = next(i for i in items if i["entry"] == "file2.txt")
+ assert item2["content"] == "content2"
+ assert item2["tier"] == 3

 def test_build_files_section_with_dicts(tmp_path):
-    from aggregate import build_files_section
-    
-    file1 = tmp_path / "file1.txt"
-    file1.write_text("content1")
-    
-    files_config = [
-        {"path": str(file1)}
-    ]
-    
-    result = build_files_section(tmp_path, files_config)
-    assert "content1" in result
-    assert "file1.txt" in result
+ from aggregate import build_files_section
+ file1 = tmp_path / "file1.txt"
+ file1.write_text("content1")
+ files_config = [
+  {"path": str(file1)}
+ ]
+ result = build_files_section(tmp_path, files_config)
+ assert "content1" in result
+ assert "file1.txt" in result

 def test_tiered_context_by_tier_field():
-    file_items = [
-        {"path": Path("tier1_file.txt"), "entry": "tier1_file.txt", "content": "Full Tier 1 Content\nLine 2", "tier": 1},
-        {"path": Path("tier3_file.txt"), "entry": "tier3_file.txt", "content": "Full Tier 3 Content\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nLine 7\nLine 8\nLine 9\nLine 10", "tier": 3},
-        {"path": Path("other.txt"), "entry": "other.txt", "content": "Other Content\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nLine 7\nLine 8\nLine 9\nLine 10", "tier": None}
-    ]
-    
-    # Test Tier 1 Context
-    result_t1 = build_tier1_context(file_items, Path("."), [], [])
-    assert "Full Tier 1 Content" in result_t1
-    assert "Line 2" in result_t1 # In full
-    
-    # tier3_file.txt should be summarized
-    assert "tier3_file.txt" in result_t1
-    assert "preview:" in result_t1
-    assert "Line 9" not in result_t1 # Only first 8 lines in preview
-    
-    # Test Tier 3 Context
-    result_t3 = build_tier3_context(file_items, Path("."), [], [], focus_files=[])
-    assert "Full Tier 3 Content" in result_t3
-    assert "Line 10" in result_t3 # In full
-    
-    # tier1_file.txt should be summarized
-    assert "tier1_file.txt" in result_t3
-    assert "preview:" in result_t3
-    assert "Full Tier 1 Content" in result_t3 # It's short, so it's in preview
+ file_items = [
+  {"path": Path("tier1_file.txt"), "entry": "tier1_file.txt", "content": "Full Tier 1 Content\nLine 2", "tier": 1},
+  {"path": Path("tier3_file.txt"), "entry": "tier3_file.txt", "content": "Full Tier 3 Content\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nLine 7\nLine 8\nLine 9\nLine 10", "tier": 3},
+  {"path": Path("other.txt"), "entry": "other.txt", "content": "Other Content\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nLine 7\nLine 8\nLine 9\nLine 10", "tier": None}
+ ]
+ # Test Tier 1 Context
+ result_t1 = build_tier1_context(file_items, Path("."), [], [])
+ assert "Full Tier 1 Content" in result_t1
+ assert "Line 2" in result_t1 # In full
+ # tier3_file.txt should be summarized
+ assert "tier3_file.txt" in result_t1
+ assert "preview:" in result_t1
+ assert "Line 9" not in result_t1 # Only first 8 lines in preview
+ # Test Tier 3 Context
+ result_t3 = build_tier3_context(file_items, Path("."), [], [], focus_files=[])
+ assert "Full Tier 3 Content" in result_t3
+ assert "Line 10" in result_t3 # In full
+ # tier1_file.txt should be summarized
+ assert "tier1_file.txt" in result_t3
+ assert "preview:" in result_t3
+ assert "Full Tier 1 Content" in result_t3 # It's short, so it's in preview
@@ -8,8 +8,8 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 import ai_client

 def test_token_usage_tracking():
-    ai_client.reset_session()
-    # Mock an API response with token usage
-    usage = {"prompt_tokens": 100, "candidates_tokens": 50, "total_tokens": 150}
-    # This would test the internal accumulator in ai_client
-    pass
+ ai_client.reset_session()
+ # Mock an API response with token usage
+ usage = {"prompt_tokens": 100, "candidates_tokens": 50, "total_tokens": 150}
+ # This would test the internal accumulator in ai_client
+ pass
@@ -9,73 +9,61 @@ from models import TrackState, Metadata, Ticket
 from project_manager import save_track_state, load_track_state

 def test_track_state_persistence(tmp_path):
-    """
+ """
    Tests saving and loading a TrackState object to/from a TOML file.
    1. Create a TrackState object with sample metadata, discussion, and tasks.
    2. Call save_track_state('test_track', state, base_dir).
    3. Verify that base_dir/conductor/tracks/test_track/state.toml exists.
    4. Call load_track_state('test_track', base_dir) and verify it returns an identical TrackState object.
    """
-    base_dir = tmp_path
-    track_id = "test-track-999" # Metadata internal ID
-    track_folder_name = "test_track" # Folder name used in persistence
-    
-    # 1. Create a TrackState object with sample data
-    metadata = Metadata(
-        id=track_id,
-        name="Test Track",
-        status="in_progress",
-        created_at=datetime(2023, 1, 1, 12, 0, 0),
-        updated_at=datetime(2023, 1, 2, 13, 0, 0)
-    )
-    
-    discussion = [
-        {"role": "User", "content": "Hello", "ts": datetime(2023, 1, 1, 12, 0, 0)},
-        {"role": "AI", "content": "Hi there!", "ts": datetime(2023, 1, 1, 12, 0, 5)}
-    ]
-    
-    tasks = [
-        Ticket(id="task-1", description="First task", status="completed", assigned_to="worker-1"),
-        Ticket(id="task-2", description="Second task", status="todo", assigned_to="worker-2")
-    ]
-    
-    original_state = TrackState(
-        metadata=metadata,
-        discussion=discussion,
-        tasks=tasks
-    )
-
-    # 2. Call save_track_state('test_track', state, base_dir)
-    save_track_state(track_folder_name, original_state, base_dir)
-
-    # 3. Verify that base_dir/conductor/tracks/test_track/state.toml exists
-    state_file_path = base_dir / "conductor" / "tracks" / track_folder_name / "state.toml"
-    assert state_file_path.exists(), f"State file should exist at {state_file_path}"
-
-    # 4. Call load_track_state('test_track', base_dir) and verify it returns an identical TrackState object
-    loaded_state = load_track_state(track_folder_name, base_dir)
-    
-    assert loaded_state is not None, "load_track_state returned None"
-    
-    # Verify equality
-    assert loaded_state.metadata.id == original_state.metadata.id
-    assert loaded_state.metadata.name == original_state.metadata.name
-    assert loaded_state.metadata.status == original_state.metadata.status
-    assert loaded_state.metadata.created_at == original_state.metadata.created_at
-    assert loaded_state.metadata.updated_at == original_state.metadata.updated_at
-    
-    assert len(loaded_state.tasks) == len(original_state.tasks)
-    for i in range(len(original_state.tasks)):
-        assert loaded_state.tasks[i].id == original_state.tasks[i].id
-        assert loaded_state.tasks[i].description == original_state.tasks[i].description
-        assert loaded_state.tasks[i].status == original_state.tasks[i].status
-        assert loaded_state.tasks[i].assigned_to == original_state.tasks[i].assigned_to
-
-    assert len(loaded_state.discussion) == len(original_state.discussion)
-    for i in range(len(original_state.discussion)):
-        assert loaded_state.discussion[i]["role"] == original_state.discussion[i]["role"]
-        assert loaded_state.discussion[i]["content"] == original_state.discussion[i]["content"]
-        assert loaded_state.discussion[i]["ts"] == original_state.discussion[i]["ts"]
-
-    # Final check: deep equality of dataclasses
-    assert loaded_state == original_state
+ base_dir = tmp_path
+ track_id = "test-track-999" # Metadata internal ID
+ track_folder_name = "test_track" # Folder name used in persistence
+ # 1. Create a TrackState object with sample data
+ metadata = Metadata(
+  id=track_id,
+  name="Test Track",
+  status="in_progress",
+  created_at=datetime(2023, 1, 1, 12, 0, 0),
+  updated_at=datetime(2023, 1, 2, 13, 0, 0)
+ )
+ discussion = [
+  {"role": "User", "content": "Hello", "ts": datetime(2023, 1, 1, 12, 0, 0)},
+  {"role": "AI", "content": "Hi there!", "ts": datetime(2023, 1, 1, 12, 0, 5)}
+ ]
+ tasks = [
+  Ticket(id="task-1", description="First task", status="completed", assigned_to="worker-1"),
+  Ticket(id="task-2", description="Second task", status="todo", assigned_to="worker-2")
+ ]
+ original_state = TrackState(
+  metadata=metadata,
+  discussion=discussion,
+  tasks=tasks
+ )
+ # 2. Call save_track_state('test_track', state, base_dir)
+ save_track_state(track_folder_name, original_state, base_dir)
+ # 3. Verify that base_dir/conductor/tracks/test_track/state.toml exists
+ state_file_path = base_dir / "conductor" / "tracks" / track_folder_name / "state.toml"
+ assert state_file_path.exists(), f"State file should exist at {state_file_path}"
+ # 4. Call load_track_state('test_track', base_dir) and verify it returns an identical TrackState object
+ loaded_state = load_track_state(track_folder_name, base_dir)
+ assert loaded_state is not None, "load_track_state returned None"
+ # Verify equality
+ assert loaded_state.metadata.id == original_state.metadata.id
+ assert loaded_state.metadata.name == original_state.metadata.name
+ assert loaded_state.metadata.status == original_state.metadata.status
+ assert loaded_state.metadata.created_at == original_state.metadata.created_at
+ assert loaded_state.metadata.updated_at == original_state.metadata.updated_at
+ assert len(loaded_state.tasks) == len(original_state.tasks)
+ for i in range(len(original_state.tasks)):
+  assert loaded_state.tasks[i].id == original_state.tasks[i].id
+  assert loaded_state.tasks[i].description == original_state.tasks[i].description
+  assert loaded_state.tasks[i].status == original_state.tasks[i].status
+  assert loaded_state.tasks[i].assigned_to == original_state.tasks[i].assigned_to
+ assert len(loaded_state.discussion) == len(original_state.discussion)
+ for i in range(len(original_state.discussion)):
+  assert loaded_state.discussion[i]["role"] == original_state.discussion[i]["role"]
+  assert loaded_state.discussion[i]["content"] == original_state.discussion[i]["content"]
+  assert loaded_state.discussion[i]["ts"] == original_state.discussion[i]["ts"]
+  # Final check: deep equality of dataclasses
+ assert loaded_state == original_state
@@ -7,166 +7,154 @@ from models import Metadata, TrackState, Ticket
 # --- Pytest Tests ---

 def test_track_state_instantiation():
-    """Test creating a TrackState object."""
-    now = datetime.now(timezone.utc)
-    metadata = Metadata(
-        id="track-123",
-        name="Initial Setup",
-        status="in_progress",
-        created_at=now - timedelta(days=1),
-        updated_at=now,
-    )
-    discussion = [
-        {"role": "user", "content": "Hello", "ts": now - timedelta(hours=1)},
-        {"role": "assistant", "content": "Hi there!", "ts": now - timedelta(hours=2)},
-    ]
-    # Update Ticket instantiation to match models.py fields (description, assigned_to)
-    tasks = [
-        Ticket(id="task-a", description="Design UI", status="todo", assigned_to="dev1"),
-        Ticket(id="task-b", description="Implement Backend", status="todo", assigned_to="dev2"),
-    ]
-
-    track_state = TrackState(
-        metadata=metadata,
-        discussion=discussion,
-        tasks=tasks,
-    )
-
-    assert track_state.metadata.id == "track-123"
-    assert len(track_state.discussion) == 2
-    assert len(track_state.tasks) == 2
-    assert isinstance(track_state.tasks[0], Ticket)
-    assert track_state.tasks[0].description == "Design UI"
-    assert track_state.tasks[0].assigned_to == "dev1"
+ """Test creating a TrackState object."""
+ now = datetime.now(timezone.utc)
+ metadata = Metadata(
+  id="track-123",
+  name="Initial Setup",
+  status="in_progress",
+  created_at=now - timedelta(days=1),
+  updated_at=now,
+ )
+ discussion = [
+  {"role": "user", "content": "Hello", "ts": now - timedelta(hours=1)},
+  {"role": "assistant", "content": "Hi there!", "ts": now - timedelta(hours=2)},
+ ]
+ # Update Ticket instantiation to match models.py fields (description, assigned_to)
+ tasks = [
+  Ticket(id="task-a", description="Design UI", status="todo", assigned_to="dev1"),
+  Ticket(id="task-b", description="Implement Backend", status="todo", assigned_to="dev2"),
+ ]
+ track_state = TrackState(
+  metadata=metadata,
+  discussion=discussion,
+  tasks=tasks,
+ )
+ assert track_state.metadata.id == "track-123"
+ assert len(track_state.discussion) == 2
+ assert len(track_state.tasks) == 2
+ assert isinstance(track_state.tasks[0], Ticket)
+ assert track_state.tasks[0].description == "Design UI"
+ assert track_state.tasks[0].assigned_to == "dev1"

 def test_track_state_to_dict():
-    """Test the to_dict() method for serialization."""
-    now = datetime.now(timezone.utc)
-    metadata = Metadata(
-        id="track-456",
-        name="Refinement Phase",
-        status="completed",
-        created_at=now - timedelta(days=5),
-        updated_at=now - timedelta(days=2),
-    )
-    discussion = [
-        {"role": "user", "content": "Need changes", "ts": now - timedelta(hours=3)},
-        {"role": "assistant", "content": "Understood.", "ts": now - timedelta(hours=4)},
-    ]
-    # Update Ticket instantiation
-    tasks = [
-        Ticket(id="task-c", description="Add feature X", status="in_progress", assigned_to="dev3"),
-    ]
-
-    track_state = TrackState(
-        metadata=metadata,
-        discussion=discussion,
-        tasks=tasks,
-    )
-
-    track_dict = track_state.to_dict()
-
-    assert track_dict["metadata"]["id"] == "track-456"
-    assert track_dict["metadata"]["created_at"] == metadata.created_at.isoformat()
-    assert track_dict["metadata"]["updated_at"] == metadata.updated_at.isoformat()
-    assert len(track_dict["discussion"]) == 2
-    assert track_dict["discussion"][0]["ts"] == discussion[0]["ts"].isoformat()
-    assert len(track_dict["tasks"]) == 1
-    # Use the Ticket's to_dict method for serialization
-    assert track_dict["tasks"][0]["id"] == "task-c"
-    assert track_dict["tasks"][0]["description"] == "Add feature X"
-    assert track_dict["tasks"][0]["assigned_to"] == "dev3"
+ """Test the to_dict() method for serialization."""
+ now = datetime.now(timezone.utc)
+ metadata = Metadata(
+  id="track-456",
+  name="Refinement Phase",
+  status="completed",
+  created_at=now - timedelta(days=5),
+  updated_at=now - timedelta(days=2),
+ )
+ discussion = [
+  {"role": "user", "content": "Need changes", "ts": now - timedelta(hours=3)},
+  {"role": "assistant", "content": "Understood.", "ts": now - timedelta(hours=4)},
+ ]
+ # Update Ticket instantiation
+ tasks = [
+  Ticket(id="task-c", description="Add feature X", status="in_progress", assigned_to="dev3"),
+ ]
+ track_state = TrackState(
+  metadata=metadata,
+  discussion=discussion,
+  tasks=tasks,
+ )
+ track_dict = track_state.to_dict()
+ assert track_dict["metadata"]["id"] == "track-456"
+ assert track_dict["metadata"]["created_at"] == metadata.created_at.isoformat()
+ assert track_dict["metadata"]["updated_at"] == metadata.updated_at.isoformat()
+ assert len(track_dict["discussion"]) == 2
+ assert track_dict["discussion"][0]["ts"] == discussion[0]["ts"].isoformat()
+ assert len(track_dict["tasks"]) == 1
+ # Use the Ticket's to_dict method for serialization
+ assert track_dict["tasks"][0]["id"] == "task-c"
+ assert track_dict["tasks"][0]["description"] == "Add feature X"
+ assert track_dict["tasks"][0]["assigned_to"] == "dev3"

 def test_track_state_from_dict():
-    """Test the from_dict() class method for deserialization."""
-    now = datetime.now(timezone.utc)
-    track_dict_data = {
-        "metadata": {
-            "id": "track-789",
-            "name": "Final Review",
-            "status": "pending",
-            "created_at": (now - timedelta(days=10)).isoformat(),
-            "updated_at": (now - timedelta(days=9)).isoformat(),
-        },
-        "discussion": [
-            {"role": "user", "content": "Review complete.", "ts": (now - timedelta(hours=5)).isoformat()},
-        ],
-        "tasks": [
-            # Use fields from models.py Ticket definition for deserialization
-            {"id": "task-d", "description": "Deploy", "status": "completed", "assigned_to": "ops1"},
-        ],
-    }
+ """Test the from_dict() class method for deserialization."""
+ now = datetime.now(timezone.utc)
+ track_dict_data = {
+  "metadata": {
+   "id": "track-789",
+   "name": "Final Review",
+   "status": "pending",
+   "created_at": (now - timedelta(days=10)).isoformat(),
+   "updated_at": (now - timedelta(days=9)).isoformat(),
+  },
+  "discussion": [
+   {"role": "user", "content": "Review complete.", "ts": (now - timedelta(hours=5)).isoformat()},
+  ],
+  "tasks": [
+   # Use fields from models.py Ticket definition for deserialization
+   {"id": "task-d", "description": "Deploy", "status": "completed", "assigned_to": "ops1"},
+  ],
+ }
+ track_state = TrackState.from_dict(track_dict_data)
+ assert isinstance(track_state, TrackState)
+ assert track_state.metadata.id == "track-789"
+ assert isinstance(track_state.metadata.created_at, datetime)
+ assert track_state.metadata.created_at.isoformat() == track_dict_data["metadata"]["created_at"]
+ assert len(track_state.discussion) == 1
+ assert isinstance(track_state.discussion[0]["ts"], datetime)
+ assert track_state.discussion[0]["ts"].isoformat() == track_dict_data["discussion"][0]["ts"]
+ assert len(track_state.tasks) == 1
+ assert isinstance(track_state.tasks[0], Ticket)
+ assert track_state.tasks[0].id == "task-d"
+ assert track_state.tasks[0].description == "Deploy"
+ assert track_state.tasks[0].assigned_to == "ops1"
+ # Test case for empty lists and missing keys for robustness

-    track_state = TrackState.from_dict(track_dict_data)
-
-    assert isinstance(track_state, TrackState)
-    assert track_state.metadata.id == "track-789"
-    assert isinstance(track_state.metadata.created_at, datetime)
-    assert track_state.metadata.created_at.isoformat() == track_dict_data["metadata"]["created_at"]
-    assert len(track_state.discussion) == 1
-    assert isinstance(track_state.discussion[0]["ts"], datetime)
-    assert track_state.discussion[0]["ts"].isoformat() == track_dict_data["discussion"][0]["ts"]
-    assert len(track_state.tasks) == 1
-    assert isinstance(track_state.tasks[0], Ticket)
-    assert track_state.tasks[0].id == "task-d"
-    assert track_state.tasks[0].description == "Deploy"
-    assert track_state.tasks[0].assigned_to == "ops1"
-
-# Test case for empty lists and missing keys for robustness
 def test_track_state_from_dict_empty_and_missing():
-    """Test from_dict with empty lists and missing optional keys."""
-    track_dict_data = {
-        "metadata": {
-            "id": "track-empty",
-            "name": "Empty State",
-            # created_at, updated_at, status are optional in from_dict logic
-        },
-        "discussion": [], # Empty discussion list
-        "tasks": [], # Empty tasks list
-    }
+ """Test from_dict with empty lists and missing optional keys."""
+ track_dict_data = {
+  "metadata": {
+   "id": "track-empty",
+   "name": "Empty State",
+   # created_at, updated_at, status are optional in from_dict logic
+  },
+  "discussion": [], # Empty discussion list
+  "tasks": [], # Empty tasks list
+ }
+ track_state = TrackState.from_dict(track_dict_data)
+ assert isinstance(track_state, TrackState)
+ assert track_state.metadata.id == "track-empty"
+ assert track_state.metadata.name == "Empty State"
+ assert track_state.metadata.created_at is None
+ assert track_state.metadata.updated_at is None
+ assert track_state.metadata.status is None
+ assert len(track_state.discussion) == 0
+ assert len(track_state.tasks) == 0
+ # Test case for to_dict with None values or missing optional data

-    track_state = TrackState.from_dict(track_dict_data)
-
-    assert isinstance(track_state, TrackState)
-    assert track_state.metadata.id == "track-empty"
-    assert track_state.metadata.name == "Empty State"
-    assert track_state.metadata.created_at is None
-    assert track_state.metadata.updated_at is None
-    assert track_state.metadata.status is None
-    assert len(track_state.discussion) == 0
-    assert len(track_state.tasks) == 0
-
-# Test case for to_dict with None values or missing optional data
 def test_track_state_to_dict_with_none():
-    """Test to_dict with None values in optional fields."""
-    now = datetime.now(timezone.utc)
-    metadata = Metadata(
-        id="track-none",
-        name="None Test",
-        status=None, # None status
-        created_at=now,
-        updated_at=None, # None updated_at
-    )
-    discussion = [
-        {"role": "system", "content": "Info", "ts": None}, # None timestamp
-    ]
-    # Update Ticket instantiation
-    tasks = [
-        Ticket(id="task-none", description="Task None", status="pending", assigned_to="anon"),
-    ]
-
-    track_state = TrackState(
-        metadata=metadata,
-        discussion=discussion,
-        tasks=tasks,
-    )
-
-    track_dict = track_state.to_dict()
-
-    assert track_dict["metadata"]["status"] is None
-    # Check that isoformat was called on datetime object, not None
-    assert track_dict["metadata"]["created_at"] == now.isoformat()
-    assert track_dict["metadata"]["updated_at"] is None # This should be None as it's passed as None
-    assert track_dict["discussion"][0]["ts"] is None
-    assert track_dict["tasks"][0]["description"] == "Task None"
-    assert track_dict["tasks"][0]["assigned_to"] == "anon"
+ """Test to_dict with None values in optional fields."""
+ now = datetime.now(timezone.utc)
+ metadata = Metadata(
+  id="track-none",
+  name="None Test",
+  status=None, # None status
+  created_at=now,
+  updated_at=None, # None updated_at
+ )
+ discussion = [
+  {"role": "system", "content": "Info", "ts": None}, # None timestamp
+ ]
+ # Update Ticket instantiation
+ tasks = [
+  Ticket(id="task-none", description="Task None", status="pending", assigned_to="anon"),
+ ]
+ track_state = TrackState(
+  metadata=metadata,
+  discussion=discussion,
+  tasks=tasks,
+ )
+ track_dict = track_state.to_dict()
+ assert track_dict["metadata"]["status"] is None
+ # Check that isoformat was called on datetime object, not None
+ assert track_dict["metadata"]["created_at"] == now.isoformat()
+ assert track_dict["metadata"]["updated_at"] is None # This should be None as it's passed as None
+ assert track_dict["discussion"][0]["ts"] is None
+ assert track_dict["tasks"][0]["description"] == "Task None"
+ assert track_dict["tasks"][0]["assigned_to"] == "anon"
@@ -2,29 +2,24 @@ import tree_sitter_python as tspython
 from tree_sitter import Language, Parser

 def test_tree_sitter_python_setup():
-    """
+ """
    Verifies that tree-sitter and tree-sitter-python are correctly installed
    and can parse a simple Python function string.
    """
-    # Initialize the Python language and parser
-    PY_LANGUAGE = Language(tspython.language())
-    parser = Parser(PY_LANGUAGE)
-    
-    # Simple Python code to parse
-    code = """def hello():
+ # Initialize the Python language and parser
+ PY_LANGUAGE = Language(tspython.language())
+ parser = Parser(PY_LANGUAGE)
+ # Simple Python code to parse
+ code = """def hello():
    print('world')"""
-    
-    # Parse the code
-    tree = parser.parse(bytes(code, "utf8"))
-    
-    # Assert that the root node is a 'module'
-    assert tree.root_node.type == "module"
-    
-    # Verify we can find a function definition
-    found_function = False
-    for child in tree.root_node.children:
-        if child.type == "function_definition":
-            found_function = True
-            break
-    
-    assert found_function, "Should have found a function_definition node"
+ # Parse the code
+ tree = parser.parse(bytes(code, "utf8"))
+ # Assert that the root node is a 'module'
+ assert tree.root_node.type == "module"
+ # Verify we can find a function definition
+ found_function = False
+ for child in tree.root_node.children:
+  if child.type == "function_definition":
+   found_function = True
+   break
+ assert found_function, "Should have found a function_definition node"
@@ -8,15 +8,15 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from simulation.user_agent import UserSimAgent

 def test_user_agent_instantiation():
-    agent = UserSimAgent(hook_client=None)
-    assert agent is not None
+ agent = UserSimAgent(hook_client=None)
+ assert agent is not None

 def test_perform_action_with_delay():
-    agent = UserSimAgent(hook_client=None)
-    called = False
-    def action():
-        nonlocal called
-        called = True
-    
-    agent.perform_action_with_delay(action)
-    assert called is True
+ agent = UserSimAgent(hook_client=None)
+ called = False
+
+ def action():
+  nonlocal called
+  called = True
+ agent.perform_action_with_delay(action)
+ assert called is True
@@ -9,39 +9,33 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from simulation.workflow_sim import WorkflowSimulator

 def test_simulator_instantiation():
-    client = MagicMock()
-    sim = WorkflowSimulator(client)
-    assert sim is not None
+ client = MagicMock()
+ sim = WorkflowSimulator(client)
+ assert sim is not None

 def test_setup_new_project():
-    client = MagicMock()
-    sim = WorkflowSimulator(client)
-    
-    # Mock responses for wait_for_server
-    client.wait_for_server.return_value = True
-    
-    sim.setup_new_project("TestProject", "/tmp/test_git")
-    
-    # Verify hook calls
-    client.click.assert_any_call("btn_project_new")
-    client.set_value.assert_any_call("project_git_dir", "/tmp/test_git")
-    client.click.assert_any_call("btn_project_save")
+ client = MagicMock()
+ sim = WorkflowSimulator(client)
+ # Mock responses for wait_for_server
+ client.wait_for_server.return_value = True
+ sim.setup_new_project("TestProject", "/tmp/test_git")
+ # Verify hook calls
+ client.click.assert_any_call("btn_project_new")
+ client.set_value.assert_any_call("project_git_dir", "/tmp/test_git")
+ client.click.assert_any_call("btn_project_save")

 def test_discussion_switching():
-    client = MagicMock()
-    sim = WorkflowSimulator(client)
-    
-    sim.create_discussion("NewDisc")
-    client.set_value.assert_called_with("disc_new_name_input", "NewDisc")
-    client.click.assert_called_with("btn_disc_create")
-    
-    sim.switch_discussion("NewDisc")
-    client.select_list_item.assert_called_with("disc_listbox", "NewDisc")
+ client = MagicMock()
+ sim = WorkflowSimulator(client)
+ sim.create_discussion("NewDisc")
+ client.set_value.assert_called_with("disc_new_name_input", "NewDisc")
+ client.click.assert_called_with("btn_disc_create")
+ sim.switch_discussion("NewDisc")
+ client.select_list_item.assert_called_with("disc_listbox", "NewDisc")

 def test_history_truncation():
-    client = MagicMock()
-    sim = WorkflowSimulator(client)
-    
-    sim.truncate_history(3)
-    client.set_value.assert_called_with("disc_truncate_pairs", 3)
-    client.click.assert_called_with("btn_disc_truncate")
+ client = MagicMock()
+ sim = WorkflowSimulator(client)
+ sim.truncate_history(3)
+ client.set_value.assert_called_with("disc_truncate_pairs", 3)
+ client.click.assert_called_with("btn_disc_truncate")
@@ -8,89 +8,80 @@ import unittest
 # Calculate project root
 PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 if PROJECT_ROOT not in sys.path:
-    sys.path.insert(0, PROJECT_ROOT)
+ sys.path.insert(0, PROJECT_ROOT)

 from api_hook_client import ApiHookClient

 class TestMMAGUIRobust(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        # 1. Launch gui_2.py with --enable-test-hooks
-        cls.gui_command = [sys.executable, "gui_2.py", "--enable-test-hooks"]
-        print(f"Launching GUI: {' '.join(cls.gui_command)}")
-        cls.gui_process = subprocess.Popen(
-            cls.gui_command,
-            cwd=PROJECT_ROOT,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True
-        )
-        cls.client = ApiHookClient()
-        print("Waiting for GUI to start...")
-        if not cls.client.wait_for_server(timeout=10):
-            cls.gui_process.terminate()
-            raise RuntimeError("GUI failed to start or hook server not responsive.")
-        print("GUI started.")
+ @classmethod
+ def setUpClass(cls):
+ # 1. Launch gui_2.py with --enable-test-hooks
+  cls.gui_command = [sys.executable, "gui_2.py", "--enable-test-hooks"]
+  print(f"Launching GUI: {' '.join(cls.gui_command)}")
+  cls.gui_process = subprocess.Popen(
+   cls.gui_command,
+   cwd=PROJECT_ROOT,
+   stdout=subprocess.PIPE,
+   stderr=subprocess.PIPE,
+   text=True
+  )
+  cls.client = ApiHookClient()
+  print("Waiting for GUI to start...")
+  if not cls.client.wait_for_server(timeout=10):
+   cls.gui_process.terminate()
+   raise RuntimeError("GUI failed to start or hook server not responsive.")
+  print("GUI started.")

-    @classmethod
-    def tearDownClass(cls):
-        if cls.gui_process:
-            cls.gui_process.terminate()
-            cls.gui_process.wait(timeout=5)
+ @classmethod
+ def tearDownClass(cls):
+  if cls.gui_process:
+   cls.gui_process.terminate()
+   cls.gui_process.wait(timeout=5)

-    def test_mma_state_ingestion(self):
-        """Verify that mma_state_update event correctly updates GUI state."""
-        track_data = {
-            "id": "robust_test_track",
-            "title": "Robust Verification Track",
-            "description": "Verifying internal state ingestion"
-        }
-        tickets_data = [
-            {"id": "T1", "target_file": "file1.py", "status": "todo"},
-            {"id": "T2", "target_file": "file2.py", "status": "running"},
-            {"id": "T3", "target_file": "file3.py", "status": "complete"},
-        ]
-        
-        payload = {
-            "status": "active",
-            "active_tier": "Tier 2",
-            "track": track_data,
-            "tickets": tickets_data
-        }
-        
-        print("Pushing mma_state_update...")
-        self.client.push_event("mma_state_update", payload)
-        
-        # Give GUI a moment to process the async task
-        time.sleep(1.0)
-        
-        print("Querying mma_status...")
-        status = self.client.get_mma_status()
-        
-        self.assertEqual(status["mma_status"], "active")
-        self.assertEqual(status["active_tier"], "Tier 2")
-        self.assertEqual(status["active_track"]["id"], "robust_test_track")
-        self.assertEqual(len(status["active_tickets"]), 3)
-        self.assertEqual(status["active_tickets"][2]["status"], "complete")
-        print("MMA state ingestion verified successfully.")
+ def test_mma_state_ingestion(self):
+  """Verify that mma_state_update event correctly updates GUI state."""
+  track_data = {
+   "id": "robust_test_track",
+   "title": "Robust Verification Track",
+   "description": "Verifying internal state ingestion"
+  }
+  tickets_data = [
+   {"id": "T1", "target_file": "file1.py", "status": "todo"},
+   {"id": "T2", "target_file": "file2.py", "status": "running"},
+   {"id": "T3", "target_file": "file3.py", "status": "complete"},
+  ]
+  payload = {
+   "status": "active",
+   "active_tier": "Tier 2",
+   "track": track_data,
+   "tickets": tickets_data
+  }
+  print("Pushing mma_state_update...")
+  self.client.push_event("mma_state_update", payload)
+  # Give GUI a moment to process the async task
+  time.sleep(1.0)
+  print("Querying mma_status...")
+  status = self.client.get_mma_status()
+  self.assertEqual(status["mma_status"], "active")
+  self.assertEqual(status["active_tier"], "Tier 2")
+  self.assertEqual(status["active_track"]["id"], "robust_test_track")
+  self.assertEqual(len(status["active_tickets"]), 3)
+  self.assertEqual(status["active_tickets"][2]["status"], "complete")
+  print("MMA state ingestion verified successfully.")

-    def test_mma_step_approval_trigger(self):
-        """Verify that mma_step_approval event sets the pending approval flag."""
-        payload = {
-            "ticket_id": "T2",
-            "payload": "echo 'Robust Test'"
-        }
-        
-        print("Pushing mma_step_approval...")
-        self.client.push_event("mma_step_approval", payload)
-        
-        time.sleep(1.0)
-        
-        print("Querying mma_status for pending approval...")
-        status = self.client.get_mma_status()
-        
-        self.assertTrue(status["pending_approval"], "GUI did not register pending MMA approval")
-        print("MMA step approval trigger verified successfully.")
+ def test_mma_step_approval_trigger(self):
+  """Verify that mma_step_approval event sets the pending approval flag."""
+  payload = {
+   "ticket_id": "T2",
+   "payload": "echo 'Robust Test'"
+  }
+  print("Pushing mma_step_approval...")
+  self.client.push_event("mma_step_approval", payload)
+  time.sleep(1.0)
+  print("Querying mma_status for pending approval...")
+  status = self.client.get_mma_status()
+  self.assertTrue(status["pending_approval"], "GUI did not register pending MMA approval")
+  print("MMA step approval trigger verified successfully.")

 if __name__ == "__main__":
-    unittest.main()
+ unittest.main()
@@ -5,65 +5,58 @@ import os

 PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 if PROJECT_ROOT not in sys.path:
-    sys.path.insert(0, PROJECT_ROOT)
+ sys.path.insert(0, PROJECT_ROOT)

 from api_hook_client import ApiHookClient

 def diag_run():
-    print("Launching GUI for manual inspection + automated hooks...")
-    # Use a log file for GUI output
-    with open("gui_diag.log", "w") as log_file:
-        gui_process = subprocess.Popen(
-            [sys.executable, "gui_2.py", "--enable-test-hooks"],
-            cwd=PROJECT_ROOT,
-            stdout=log_file,
-            stderr=log_file,
-            text=True
-        )
-        
-        client = ApiHookClient()
-        print("Waiting for GUI...")
-        if not client.wait_for_server(timeout=10):
-            print("GUI failed to start.")
-            gui_process.terminate()
-            return
-
-        # Pushing state
-        track_data = {"id": "diag_track", "title": "Diagnostic Track"}
-        tickets_data = [{"id": f"T{i}", "status": "todo"} for i in range(3)]
-        
-        print("Pushing state update...")
-        client.push_event("mma_state_update", {
-            "status": "active",
-            "active_tier": "Tier 1",
-            "track": track_data,
-            "tickets": tickets_data
-        })
-        
-        time.sleep(2)
-        
-        print("Pushing approval request...")
-        client.push_event("mma_step_approval", {
-            "ticket_id": "T0",
-            "payload": "Get-ChildItem"
-        })
-        
-        print("\nGUI is running. Check 'gui_diag.log' for output.")
-        print("I will now poll mma_status every 2 seconds. Ctrl+C to stop.")
-        
-        try:
-            start_poll = time.time()
-            while time.time() - start_poll < 30:
-                try:
-                    status = client.get_mma_status()
-                    print(f"[{time.strftime('%H:%M:%S')}] Status: {status.get('mma_status')}, Pending Approval: {status.get('pending_approval')}")
-                except Exception as e:
-                    print(f"[{time.strftime('%H:%M:%S')}] Error querying status: {e}")
-                time.sleep(2)
-        except KeyboardInterrupt:
-            print("Stopping...")
-        finally:
-            gui_process.terminate()
+ print("Launching GUI for manual inspection + automated hooks...")
+ # Use a log file for GUI output
+ with open("gui_diag.log", "w") as log_file:
+  gui_process = subprocess.Popen(
+   [sys.executable, "gui_2.py", "--enable-test-hooks"],
+   cwd=PROJECT_ROOT,
+   stdout=log_file,
+   stderr=log_file,
+   text=True
+  )
+  client = ApiHookClient()
+  print("Waiting for GUI...")
+  if not client.wait_for_server(timeout=10):
+   print("GUI failed to start.")
+   gui_process.terminate()
+   return
+   # Pushing state
+  track_data = {"id": "diag_track", "title": "Diagnostic Track"}
+  tickets_data = [{"id": f"T{i}", "status": "todo"} for i in range(3)]
+  print("Pushing state update...")
+  client.push_event("mma_state_update", {
+    "status": "active",
+    "active_tier": "Tier 1",
+    "track": track_data,
+    "tickets": tickets_data
+   })
+  time.sleep(2)
+  print("Pushing approval request...")
+  client.push_event("mma_step_approval", {
+    "ticket_id": "T0",
+    "payload": "Get-ChildItem"
+   })
+  print("\nGUI is running. Check 'gui_diag.log' for output.")
+  print("I will now poll mma_status every 2 seconds. Ctrl+C to stop.")
+  try:
+   start_poll = time.time()
+   while time.time() - start_poll < 30:
+    try:
+     status = client.get_mma_status()
+     print(f"[{time.strftime('%H:%M:%S')}] Status: {status.get('mma_status')}, Pending Approval: {status.get('pending_approval')}")
+    except Exception as e:
+     print(f"[{time.strftime('%H:%M:%S')}] Error querying status: {e}")
+    time.sleep(2)
+  except KeyboardInterrupt:
+   print("Stopping...")
+  finally:
+   gui_process.terminate()

 if __name__ == "__main__":
-    diag_run()
+ diag_run()
@@ -12,130 +12,116 @@ PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))

 # Ensure project root is in sys.path to import modules like api_hook_client
 if PROJECT_ROOT not in sys.path:
-    sys.path.insert(0, PROJECT_ROOT)
-    print(f"Added '{PROJECT_ROOT}' to sys.path for imports.")
+ sys.path.insert(0, PROJECT_ROOT)
+ print(f"Added '{PROJECT_ROOT}' to sys.path for imports.")

 try:
-    from api_hook_client import ApiHookClient
+ from api_hook_client import ApiHookClient
 except ImportError as e:
-    print(f"Error: Could not import ApiHookClient from '{API_HOOK_CLIENT_MODULE}'.")
-    print(f"Please ensure '{API_HOOK_CLIENT_MODULE}.py' is accessible and '{PROJECT_ROOT}' is correctly added to sys.path.")
-    print(f"Import error: {e}")
-    sys.exit(1)
+ print(f"Error: Could not import ApiHookClient from '{API_HOOK_CLIENT_MODULE}'.")
+ print(f"Please ensure '{API_HOOK_CLIENT_MODULE}.py' is accessible and '{PROJECT_ROOT}' is correctly added to sys.path.")
+ print(f"Import error: {e}")
+ sys.exit(1)

 def run_visual_mma_verification():
-    print("Starting visual MMA verification test...")
-
-    # Change current directory to project root
-    original_dir = os.getcwd()
-    if original_dir != PROJECT_ROOT:
-        try:
-            os.chdir(PROJECT_ROOT)
-            print(f"Changed current directory to: {PROJECT_ROOT}")
-        except FileNotFoundError:
-            print(f"Error: Project root directory '{PROJECT_ROOT}' not found.")
-            return
-
-    # 1. Launch gui_2.py with --enable-test-hooks
-    gui_command = [sys.executable, GUI_SCRIPT, TEST_HOOKS_FLAG]
-    print(f"Launching GUI with command: {' '.join(gui_command)}")
-
-    try:
-        gui_process = subprocess.Popen(
-            gui_command,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            cwd=PROJECT_ROOT
-        )
-        print(f"GUI process started with PID: {gui_process.pid}")
-    except FileNotFoundError:
-        print(f"Error: Could not find Python executable '{sys.executable}' or script '{GUI_SCRIPT}'.")
-        return
-    except Exception as e:
-        print(f"Error starting GUI process: {e}")
-        return
-
-    # Wait for GUI to start
-    print("Waiting for GUI to initialize and hook server to start (5 seconds)...")
-    time.sleep(5)
-
-    if gui_process.poll() is not None:
-        print(f"Error: GUI process exited prematurely with return code {gui_process.returncode}.")
-        return
-
-    # 2. Use ApiHookClient
-    try:
-        client = ApiHookClient()
-        print("ApiHookClient initialized successfully.")
-    except Exception as e:
-        print(f"Failed to initialize ApiHookClient. Error: {e}")
-        if gui_process:
-            gui_process.terminate()
-        return
-
-    # 3. Setup MMA data
-    track_data = {
-        "id": "visual_test_track",
-        "title": "Visual Verification Track",
-        "description": "A track to verify MMA UI components"
-    }
-    tickets_data = [
-        {"id": "TICKET-001", "target_file": "core.py", "status": "todo"},
-        {"id": "TICKET-002", "target_file": "utils.py", "status": "running"},
-        {"id": "TICKET-003", "target_file": "tests.py", "status": "complete"},
-        {"id": "TICKET-004", "target_file": "api.py", "status": "blocked"},
-        {"id": "TICKET-005", "target_file": "gui.py", "status": "paused"},
-    ]
-
-    print("\nPushing MMA state update...")
-    try:
-        payload = {
-            "status": "running",
-            "active_tier": "Tier 3",
-            "track": track_data,
-            "tickets": tickets_data
-        }
-        client.push_event("mma_state_update", payload)
-        print("  - MMA state update pushed.")
-    except Exception as e:
-        print(f"  - Warning: Failed to push mma_state_update: {e}")
-
-    time.sleep(3)
-
-    print("Pushing 'mma_step_approval' event to trigger HITL modal...")
-    try:
-        approval_payload = {
-            "ticket_id": "TICKET-002",
-            "payload": "powershell -Command \"Write-Host 'Hello from Tier 3'\""
-        }
-        client.push_event("mma_step_approval", approval_payload)
-        print("mma_step_approval event pushed successfully.")
-    except Exception as e:
-        print(f"Error pushing mma_step_approval event: {e}")
-
-    # 5. Provide clear print statements for manual verification
-    print("\n--- Manual Verification Instructions ---")
-    print("Please visually inspect the running GUI application:")
-    print("1. MMA Dashboard: Ensure the 'MMA Dashboard' panel is visible and active.")
-    print("2. Ticket Queue: Verify the 'Ticket Queue' section displays all 5 tickets with correct statuses.")
-    print("3. Progress Bar: Check that the progress bar correctly reflects the completed/total tickets.")
-    print("4. Approval Modal: Confirm that an 'MMA Step Approval' modal has appeared.")
-    print("\n--------------------------------------")
-    print("The test script has finished its automated actions.")
-    print("The GUI application is still running. Press Enter to exit.")
-
-    try:
-        input()
-    except EOFError:
-        pass
-
-    print("\nStopping GUI process...")
-    if gui_process:
-        gui_process.terminate()
-        gui_process.wait(timeout=5)
-
-    print("Visual MMA verification test script finished.")
+ print("Starting visual MMA verification test...")
+ # Change current directory to project root
+ original_dir = os.getcwd()
+ if original_dir != PROJECT_ROOT:
+  try:
+   os.chdir(PROJECT_ROOT)
+   print(f"Changed current directory to: {PROJECT_ROOT}")
+  except FileNotFoundError:
+   print(f"Error: Project root directory '{PROJECT_ROOT}' not found.")
+   return
+   # 1. Launch gui_2.py with --enable-test-hooks
+ gui_command = [sys.executable, GUI_SCRIPT, TEST_HOOKS_FLAG]
+ print(f"Launching GUI with command: {' '.join(gui_command)}")
+ try:
+  gui_process = subprocess.Popen(
+   gui_command,
+   stdout=subprocess.PIPE,
+   stderr=subprocess.PIPE,
+   text=True,
+   cwd=PROJECT_ROOT
+  )
+  print(f"GUI process started with PID: {gui_process.pid}")
+ except FileNotFoundError:
+  print(f"Error: Could not find Python executable '{sys.executable}' or script '{GUI_SCRIPT}'.")
+  return
+ except Exception as e:
+  print(f"Error starting GUI process: {e}")
+  return
+  # Wait for GUI to start
+ print("Waiting for GUI to initialize and hook server to start (5 seconds)...")
+ time.sleep(5)
+ if gui_process.poll() is not None:
+  print(f"Error: GUI process exited prematurely with return code {gui_process.returncode}.")
+  return
+  # 2. Use ApiHookClient
+ try:
+  client = ApiHookClient()
+  print("ApiHookClient initialized successfully.")
+ except Exception as e:
+  print(f"Failed to initialize ApiHookClient. Error: {e}")
+  if gui_process:
+   gui_process.terminate()
+  return
+  # 3. Setup MMA data
+ track_data = {
+  "id": "visual_test_track",
+  "title": "Visual Verification Track",
+  "description": "A track to verify MMA UI components"
+ }
+ tickets_data = [
+  {"id": "TICKET-001", "target_file": "core.py", "status": "todo"},
+  {"id": "TICKET-002", "target_file": "utils.py", "status": "running"},
+  {"id": "TICKET-003", "target_file": "tests.py", "status": "complete"},
+  {"id": "TICKET-004", "target_file": "api.py", "status": "blocked"},
+  {"id": "TICKET-005", "target_file": "gui.py", "status": "paused"},
+ ]
+ print("\nPushing MMA state update...")
+ try:
+  payload = {
+   "status": "running",
+   "active_tier": "Tier 3",
+   "track": track_data,
+   "tickets": tickets_data
+  }
+  client.push_event("mma_state_update", payload)
+  print("  - MMA state update pushed.")
+ except Exception as e:
+  print(f"  - Warning: Failed to push mma_state_update: {e}")
+ time.sleep(3)
+ print("Pushing 'mma_step_approval' event to trigger HITL modal...")
+ try:
+  approval_payload = {
+   "ticket_id": "TICKET-002",
+   "payload": "powershell -Command \"Write-Host 'Hello from Tier 3'\""
+  }
+  client.push_event("mma_step_approval", approval_payload)
+  print("mma_step_approval event pushed successfully.")
+ except Exception as e:
+  print(f"Error pushing mma_step_approval event: {e}")
+  # 5. Provide clear print statements for manual verification
+ print("\n--- Manual Verification Instructions ---")
+ print("Please visually inspect the running GUI application:")
+ print("1. MMA Dashboard: Ensure the 'MMA Dashboard' panel is visible and active.")
+ print("2. Ticket Queue: Verify the 'Ticket Queue' section displays all 5 tickets with correct statuses.")
+ print("3. Progress Bar: Check that the progress bar correctly reflects the completed/total tickets.")
+ print("4. Approval Modal: Confirm that an 'MMA Step Approval' modal has appeared.")
+ print("\n--------------------------------------")
+ print("The test script has finished its automated actions.")
+ print("The GUI application is still running. Press Enter to exit.")
+ try:
+  input()
+ except EOFError:
+  pass
+ print("\nStopping GUI process...")
+ if gui_process:
+  gui_process.terminate()
+  gui_process.wait(timeout=5)
+ print("Visual MMA verification test script finished.")

 if __name__ == "__main__":
-    run_visual_mma_verification()
+ run_visual_mma_verification()
@@ -11,7 +11,7 @@ from api_hook_client import ApiHookClient

@pytest.mark.integration
 def test_mma_epic_lifecycle(live_gui):
-    """
+ """
    Integration test for the full MMA Epic lifecycle.
    1. Start App.
    2. Trigger 'New Epic' request.
@@ -20,76 +20,61 @@ def test_mma_epic_lifecycle(live_gui):
    5. Verify Tier 2 generates tickets.
    6. Verify execution loop starts.
    """
-    client = ApiHookClient()
-    assert client.wait_for_server(timeout=15), "API hook server failed to start."
-    
-    print("[Test] Initializing MMA Epic lifecycle test...")
-    
-    # 0. Setup: Ensure we have a project and are in a clean state
-    client.click("btn_reset")
-    time.sleep(1)
-    
-    # 1. Set Epic input
-    epic_text = "Improve the logging system to include timestamps in all tool calls."
-    print(f"[Test] Setting Epic input: {epic_text}")
-    client.set_value("mma_epic_input", epic_text)
-    
-    # 2. Trigger 'New Epic' (Plan Epic)
-    print("[Test] Clicking 'Plan Epic (Tier 1)'...")
-    client.click("btn_mma_plan_epic")
-    
-    # 3. Verify that Tier 1 generates tracks
-    print("[Test] Polling for Tier 1 tracks...")
-    tracks_generated = False
-    for i in range(120):
-        status = client.get_value("ai_status")
-        # Check if the proposal modal is shown or status changed
-        if status and "Epic tracks generated" in str(status):
-            tracks_generated = True
-            print(f"[Test] Tracks generated after {i}s")
-            break
-        time.sleep(1)
-    
-    assert tracks_generated, "Tier 1 failed to generate tracks within 60 seconds."
-    
-    # 4. Trigger 'Start Track' for the first track
-    print("[Test] Triggering 'Start Track' for track index 0...")
-    client.click("btn_mma_start_track", user_data={"index": 0})
-    
-    # 5. Verify that Tier 2 generates tickets and starts execution
-    print("[Test] Polling for Tier 2 ticket generation and execution start...")
-    execution_started = False
-    for i in range(60):
-        mma_status = client.get_mma_status()
-        status_str = mma_status.get("mma_status", "idle")
-        active_tier = mma_status.get("active_tier", "")
-        
-        if status_str == "running" or "Tier 3" in str(active_tier):
-            execution_started = True
-            print(f"[Test] Execution started (Status: {status_str}, Tier: {active_tier}) after {i}s")
-            break
-        
-        current_ai_status = client.get_value("ai_status")
-        if i % 5 == 0:
-            print(f"  ... still waiting. Current AI Status: {current_ai_status}")
-            
-        time.sleep(1)
-        
-    assert execution_started, "Tier 2 failed to generate tickets or execution failed to start within 60 seconds."
-    
-    # 6. Final verification of MMA state
-    final_mma = client.get_mma_status()
-    print(f"[Test] Final MMA Status: {final_mma.get('mma_status')}")
-    print(f"[Test] Active Tier: {final_mma.get('active_tier')}")
-    print(f"[Test] Ticket Count: {len(final_mma.get('active_tickets', []))}")
-    
-    assert final_mma.get("mma_status") in ["running", "done", "blocked"]
-    assert len(final_mma.get("active_tickets", [])) > 0
-    
-    print("[Test] MMA Epic lifecycle verification successful!")
+ client = ApiHookClient()
+ assert client.wait_for_server(timeout=15), "API hook server failed to start."
+ print("[Test] Initializing MMA Epic lifecycle test...")
+ # 0. Setup: Ensure we have a project and are in a clean state
+ client.click("btn_reset")
+ time.sleep(1)
+ # 1. Set Epic input
+ epic_text = "Improve the logging system to include timestamps in all tool calls."
+ print(f"[Test] Setting Epic input: {epic_text}")
+ client.set_value("mma_epic_input", epic_text)
+ # 2. Trigger 'New Epic' (Plan Epic)
+ print("[Test] Clicking 'Plan Epic (Tier 1)'...")
+ client.click("btn_mma_plan_epic")
+ # 3. Verify that Tier 1 generates tracks
+ print("[Test] Polling for Tier 1 tracks...")
+ tracks_generated = False
+ for i in range(120):
+  status = client.get_value("ai_status")
+  # Check if the proposal modal is shown or status changed
+  if status and "Epic tracks generated" in str(status):
+   tracks_generated = True
+   print(f"[Test] Tracks generated after {i}s")
+   break
+  time.sleep(1)
+ assert tracks_generated, "Tier 1 failed to generate tracks within 60 seconds."
+ # 4. Trigger 'Start Track' for the first track
+ print("[Test] Triggering 'Start Track' for track index 0...")
+ client.click("btn_mma_start_track", user_data={"index": 0})
+ # 5. Verify that Tier 2 generates tickets and starts execution
+ print("[Test] Polling for Tier 2 ticket generation and execution start...")
+ execution_started = False
+ for i in range(60):
+  mma_status = client.get_mma_status()
+  status_str = mma_status.get("mma_status", "idle")
+  active_tier = mma_status.get("active_tier", "")
+  if status_str == "running" or "Tier 3" in str(active_tier):
+   execution_started = True
+   print(f"[Test] Execution started (Status: {status_str}, Tier: {active_tier}) after {i}s")
+   break
+  current_ai_status = client.get_value("ai_status")
+  if i % 5 == 0:
+   print(f"  ... still waiting. Current AI Status: {current_ai_status}")
+  time.sleep(1)
+ assert execution_started, "Tier 2 failed to generate tickets or execution failed to start within 60 seconds."
+ # 6. Final verification of MMA state
+ final_mma = client.get_mma_status()
+ print(f"[Test] Final MMA Status: {final_mma.get('mma_status')}")
+ print(f"[Test] Active Tier: {final_mma.get('active_tier')}")
+ print(f"[Test] Ticket Count: {len(final_mma.get('active_tickets', []))}")
+ assert final_mma.get("mma_status") in ["running", "done", "blocked"]
+ assert len(final_mma.get("active_tickets", [])) > 0
+ print("[Test] MMA Epic lifecycle verification successful!")

 if __name__ == "__main__":
-    # If run directly, try to use pytest
-    import subprocess
-    # Using sys.executable to ensure we use the same environment
-    subprocess.run([sys.executable, "-m", "pytest", "-v", __file__])
+# If run directly, try to use pytest
+ import subprocess
+ # Using sys.executable to ensure we use the same environment
+ subprocess.run([sys.executable, "-m", "pytest", "-v", __file__])
@@ -10,35 +10,30 @@ from api_hook_client import ApiHookClient

@pytest.mark.integration
 def test_mma_epic_simulation(live_gui):
-    """
+ """
    Integration test for MMA epic simulation.
    Red Phase: asserts False.
    """
-    client = ApiHookClient()
-    assert client.wait_for_server(timeout=10)
-    
-    # Try selecting MMA Dashboard tab if applicable (using typical naming convention)
-    try:
-        client.select_tab('main_tab_bar', 'tab_mma')
-    except Exception:
-        pass
-    
-    # Set model to mock to avoid real API calls and timeouts
-    try:
-        client.set_value('current_model', 'mock')
-    except Exception:
-        pass
-        
-    client.set_value('mma_epic_input', 'Build a simple calculator')
-    client.click('btn_mma_plan_epic')
-    
-    # Poll client.get_mma_status() every 1 second (up to 30 seconds)
-    success = False
-    for i in range(30):
-        status = client.get_mma_status()
-        if status and status.get('tracks') and len(status['tracks']) > 0:
-            success = True
-            break
-        time.sleep(1)
-        
-    assert success, "Failed to generate at least one track."
+ client = ApiHookClient()
+ assert client.wait_for_server(timeout=10)
+ # Try selecting MMA Dashboard tab if applicable (using typical naming convention)
+ try:
+  client.select_tab('main_tab_bar', 'tab_mma')
+ except Exception:
+  pass
+  # Set model to mock to avoid real API calls and timeouts
+ try:
+  client.set_value('current_model', 'mock')
+ except Exception:
+  pass
+ client.set_value('mma_epic_input', 'Build a simple calculator')
+ client.click('btn_mma_plan_epic')
+ # Poll client.get_mma_status() every 1 second (up to 30 seconds)
+ success = False
+ for i in range(30):
+  status = client.get_mma_status()
+  if status and status.get('tracks') and len(status['tracks']) > 0:
+   success = True
+   break
+  time.sleep(1)
+ assert success, "Failed to generate at least one track."