From 6b0823ad6c77520cd3a061d960e16d605911cd56 Mon Sep 17 00:00:00 2001
From: Ed_ <edwardgz@gmail.com>
Date: Sat, 28 Feb 2026 22:50:14 -0500
Subject: [PATCH] checkpoint: this is a mess... need to define stricter DSL or
 system for how the AI devices sims and hookup api for tests.

---
 ai_client.py                                  | 24 ---------
 .../spec.md                                   | 13 ++++-
 conductor_tech_lead.py                        |  4 +-
 config.toml                                   |  2 +-
 gui_2.py                                      | 11 +++-
 manualslop_layout.ini                         |  7 ++-
 tests/mock_gemini_cli.py                      | 53 +++++++++++++++----
 tests/temp_project.toml                       | 26 +--------
 tests/temp_project_history.toml               |  2 +-
 tests/visual_sim_mma_v2.py                    | 36 +++++++++----
 10 files changed, 101 insertions(+), 77 deletions(-)

diff --git a/ai_client.py b/ai_client.py
index 8ee105a..9bd3d6b 100644
--- a/ai_client.py
+++ b/ai_client.py
@@ -1645,30 +1645,6 @@ def send(
     pre_tool_callback  : Optional callback (payload: str) -> bool called before tool execution
     qa_callback        : Optional callback (stderr: str) -> str called for Tier 4 error analysis
     """
- # --- START MOCK LOGIC ---
- if _model == 'mock':
-  import json
-  keyword = "unknown"
-  if 'Epic Initialization' in _custom_system_prompt:
-   keyword = "Epic Initialization"
-   mock_response_content = [
-    {"id": "mock-track-1", "type": "Track", "module": "core", "persona": "Tech Lead", "severity": "Medium", "goal": "Mock Goal 1", "acceptance_criteria": ["criteria 1"], "title": "Mock Goal 1"},
-    {"id": "mock-track-2", "type": "Track", "module": "ui", "persona": "Frontend Lead", "severity": "Low", "goal": "Mock Goal 2", "acceptance_criteria": ["criteria 2"], "title": "Mock Goal 2"}
-   ]
-  elif 'Sprint Planning' in _custom_system_prompt:
-   keyword = "Sprint Planning"
-   mock_response_content = [
-    {"id": "mock-ticket-1", "type": "Ticket", "goal": "Mock Ticket 1", "target_file": "file1.py", "depends_on": [], "context_requirements": "req 1"},
-    {"id": "mock-ticket-2", "type": "Ticket", "goal": "Mock Ticket 2", "target_file": "file2.py", "depends_on": ["mock-ticket-1"], "context_requirements": "req 2"}
-   ]
-  else:
-   # Tier 3 mock response for ticket execution
-   mock_response_content = "SUCCESS: Mock Tier 3 worker implemented the change. [MOCK OUTPUT]"
-
-  print(f"[MOCK AI] Triggered for prompt keyword: {keyword}")
-  return json.dumps(mock_response_content)
- # --- END MOCK LOGIC ---
-
  with _send_lock:
   if _provider == "gemini":
    return _send_gemini(md_content, user_message, base_dir, file_items, discussion_history, pre_tool_callback, qa_callback)
diff --git a/conductor/tracks/robust_live_simulation_verification/spec.md b/conductor/tracks/robust_live_simulation_verification/spec.md
index 4b5de28..2df5234 100644
--- a/conductor/tracks/robust_live_simulation_verification/spec.md
+++ b/conductor/tracks/robust_live_simulation_verification/spec.md
@@ -31,4 +31,15 @@ This is a multi-track phase. To ensure architectural integrity, these tracks **M
 3. **MMA Dashboard Visualization Overhaul:** (Builds the UI to visualize the state and subsets)
 4. **[CURRENT] Robust Live Simulation Verification:** (Builds the tests to verify the UI and state)
 
-**Prerequisites for this track:** `MMA Dashboard Visualization Overhaul` MUST be completed (`[x]`) before starting this track.
\ No newline at end of file
+**Prerequisites for this track:** `MMA Dashboard Visualization Overhaul` MUST be completed (`[x]`) before starting this track.
+
+## Session Compression (2026-02-28)
+**Current State & Glaring Issues:**
+1. **Brittle Interception System:** The visual simulation (`tests/visual_sim_mma_v2.py`) relies heavily on polling an `api_hooks.py` endpoint (`/api/gui/mma_status`) that aggregates several boolean flags (`pending_approval`, `pending_spawn`). This has proven extremely brittle. For example, `mock_gemini_cli.py` defaults to emitting a `read_file` tool call, which triggers the *general* tool approval popup (`_pending_ask`), freezing the test because it was expecting the *MMA spawn* popup (`_pending_mma_spawn`) or the *Track Proposal* modal.
+2. **Mock Pollution in App Domain:** Previous attempts to fix the simulation shoehorned test-specific mock JSON responses directly into `ai_client.py` and `scripts/mma_exec.py`. This conflates the test environment with the production application codebase.
+3. **Popup Handling Failures:** The GUI's state machine for closing popups (like `_show_track_proposal_modal` in `_cb_accept_tracks`) is desynchronized from the hook API. The test clicks "Accept", the tracks generate, but the UI state doesn't cleanly reset, leading to endless timeouts during test runs.
+
+**Next Steps for the Handoff:**
+- Completely rip out the hardcoded mock JSON arrays from `ai_client.py` and `scripts/mma_exec.py`.
+- Refactor `tests/mock_gemini_cli.py` to be a pure, standalone mock that perfectly simulates the expected streaming behavior of `gemini_cli` without relying on the app to intercept specific magic prompts.
+- Stabilize the hook API (`api_hooks.py`) so the test script can unambiguously distinguish between a general tool approval, an MMA step approval, and an MMA worker spawn approval, instead of relying on a fragile `pending_approval` catch-all.
\ No newline at end of file
diff --git a/conductor_tech_lead.py b/conductor_tech_lead.py
index ca7c77d..9d3d3cf 100644
--- a/conductor_tech_lead.py
+++ b/conductor_tech_lead.py
@@ -9,9 +9,7 @@ def generate_tickets(track_brief: str, module_skeletons: str) -> list[dict]:
     Breaks down a Track Brief and module skeletons into discrete Tier 3 Tickets.
     """
  # 1. Set Tier 2 Model (Tech Lead - Flash)
- if ai_client._model != 'mock':
-        ai_client.set_provider('gemini', 'gemini-2.5-flash-lite')
-        ai_client.reset_session() # 2. Construct Prompt
+ # 2. Construct Prompt
  system_prompt = mma_prompts.PROMPTS.get("tier2_sprint_planning")
  user_message = (
   f"### TRACK BRIEF:\n{track_brief}\n\n"
diff --git a/config.toml b/config.toml
index 5046543..1bfcf00 100644
--- a/config.toml
+++ b/config.toml
@@ -1,6 +1,6 @@
 [ai]
 provider = "gemini_cli"
-model = "mock"
+model = "gemini-3-flash-preview"
 temperature = 0.0
 max_tokens = 8192
 history_trunc_limit = 8000
diff --git a/gui_2.py b/gui_2.py
index 951ce44..752006b 100644
--- a/gui_2.py
+++ b/gui_2.py
@@ -991,7 +991,7 @@ class App:
  def _handle_approve_tool(self) -> None:
   """Logic for approving a pending tool execution via API hooks."""
   print("[DEBUG] _handle_approve_tool called")
-  if self._pending_ask:
+  if self._pending_ask_dialog:
    self._handle_approve_ask()
   else:
    print("[DEBUG] No pending tool approval found")
@@ -1000,7 +1000,11 @@ class App:
   """Logic for approving a pending sub-agent spawn via API hooks."""
   print("[DEBUG] _handle_approve_spawn called")
   if self._pending_mma_spawn:
+   # Synchronize with the handler logic
    self._handle_mma_respond(approved=True, prompt=self._mma_spawn_prompt, context_md=self._mma_spawn_context)
+   # Crucially, close the modal state so UI can continue
+   self._mma_spawn_open = False
+   self._pending_mma_spawn = None
   else:
    print("[DEBUG] No pending spawn approval found")
 
@@ -1982,6 +1986,7 @@ class App:
   threading.Thread(target=_bg_task, daemon=True).start()
 
  def _cb_accept_tracks(self) -> None:
+  self._show_track_proposal_modal = False
   def _bg_task():
    # Generate skeletons once
    self.ai_status = "Phase 2: Generating skeletons for all tracks..."
@@ -2118,6 +2123,10 @@ class App:
   if self._show_track_proposal_modal:
    imgui.open_popup("Track Proposal")
   if imgui.begin_popup_modal("Track Proposal", True, imgui.WindowFlags_.always_auto_resize)[0]:
+   if not self._show_track_proposal_modal:
+    imgui.close_current_popup()
+    imgui.end_popup()
+    return
    imgui.text_colored(C_IN, "Proposed Implementation Tracks")
    imgui.separator()
    if not self.proposed_tracks:
diff --git a/manualslop_layout.ini b/manualslop_layout.ini
index e411166..367355d 100644
--- a/manualslop_layout.ini
+++ b/manualslop_layout.ini
@@ -131,7 +131,7 @@ Collapsed=0
 DockId=0x00000006,0
 
 [Window][Approve Tool Execution]
-Pos=512,437
+Pos=1009,547
 Size=416,325
 Collapsed=0
 
@@ -147,6 +147,11 @@ Size=879,1183
 Collapsed=0
 DockId=0x00000004,1
 
+[Window][Track Proposal]
+Pos=709,326
+Size=262,209
+Collapsed=0
+
 [Table][0xFB6E3870,4]
 RefScale=13
 Column 0  Width=80
diff --git a/tests/mock_gemini_cli.py b/tests/mock_gemini_cli.py
index 9a4d652..8b619ee 100644
--- a/tests/mock_gemini_cli.py
+++ b/tests/mock_gemini_cli.py
@@ -8,21 +8,60 @@ def main() -> None:
  sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
  # Read prompt from stdin
  try:
- # On Windows, stdin might be closed or behave weirdly if not handled
   prompt = sys.stdin.read()
  except EOFError:
   prompt = ""
  sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n")
  sys.stderr.flush()
+ 
  # Skip management commands
  if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]:
   return
-  # If the prompt contains tool results, provide final answer
+
+ # Check for specific simulation contexts
+ # Use startswith or check the beginning of the prompt to avoid matching text inside skeletons
+ if 'PATH: Epic Initialization' in prompt[:500]:
+  mock_response = [
+   {"id": "mock-track-1", "type": "Track", "module": "core", "persona": "Tech Lead", "severity": "Medium", "goal": "Mock Goal 1", "acceptance_criteria": ["criteria 1"], "title": "Mock Goal 1"},
+   {"id": "mock-track-2", "type": "Track", "module": "ui", "persona": "Frontend Lead", "severity": "Low", "goal": "Mock Goal 2", "acceptance_criteria": ["criteria 2"], "title": "Mock Goal 2"}
+  ]
+  print(json.dumps({
+   "type": "message",
+   "role": "assistant",
+   "content": json.dumps(mock_response)
+  }), flush=True)
+  print(json.dumps({
+   "type": "result",
+   "status": "success",
+   "stats": {"total_tokens": 100, "input_tokens": 50, "output_tokens": 50},
+   "session_id": "mock-session-epic"
+  }), flush=True)
+  return
+
+ if 'PATH: Sprint Planning' in prompt[:500]:
+  mock_response = [
+   {"id": "mock-ticket-1", "type": "Ticket", "goal": "Mock Ticket 1", "target_file": "file1.py", "depends_on": [], "context_requirements": "req 1"},
+   {"id": "mock-ticket-2", "type": "Ticket", "goal": "Mock Ticket 2", "target_file": "file2.py", "depends_on": ["mock-ticket-1"], "context_requirements": "req 2"}
+  ]
+  print(json.dumps({
+   "type": "message",
+   "role": "assistant",
+   "content": json.dumps(mock_response)
+  }), flush=True)
+  print(json.dumps({
+   "type": "result",
+   "status": "success",
+   "stats": {"total_tokens": 100, "input_tokens": 50, "output_tokens": 50},
+   "session_id": "mock-session-sprint"
+  }), flush=True)
+  return
+
+ # If the prompt contains tool results, provide final answer
  if '"role": "tool"' in prompt or '"tool_call_id"' in prompt:
   print(json.dumps({
      "type": "message", 
      "role": "assistant",
-     "content": "I have processed the tool results. Everything looks good!"
+     "content": "SUCCESS: Mock Tier 3 worker implemented the change. [MOCK OUTPUT]"
     }), flush=True)
   print(json.dumps({
      "type": "result", 
@@ -31,7 +70,8 @@ def main() -> None:
      "session_id": "mock-session-final"
     }), flush=True)
   return
-  # Default flow: simulate a tool call
+  
+ # Default flow: simulate a tool call
  bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
  # Using format that bridge understands
  bridge_tool_call = {
@@ -66,11 +106,6 @@ def main() -> None:
      "tool_id": "call_123",
      "parameters": {"path": "test.txt"}
     }), flush=True)
-  print(json.dumps({
-     "type": "message", 
-     "role": "assistant",
-     "content": "I am reading the file now..."
-    }), flush=True)
   print(json.dumps({
      "type": "result", 
      "status": "success",
diff --git a/tests/temp_project.toml b/tests/temp_project.toml
index cb0e9c4..fd3f3c9 100644
--- a/tests/temp_project.toml
+++ b/tests/temp_project.toml
@@ -22,7 +22,7 @@ base_dir = "."
 paths = []
 
 [gemini_cli]
-binary_path = "gemini"
+binary_path = "C:\\projects\\manual_slop\\.venv\\Scripts\\python.exe C:\\projects\\manual_slop\\tests\\mock_gemini_cli.py"
 
 [deepseek]
 reasoning_effort = "medium"
@@ -40,27 +40,3 @@ fetch_url = true
 epic = "Develop a new feature"
 active_track_id = ""
 tracks = []
-
-[mma.active_track]
-id = "track_024370f1b453"
-description = "Mock Goal 1"
-
-[[mma.active_track.tickets]]
-id = "mock-ticket-1"
-description = "Mock Ticket 1"
-status = "todo"
-assigned_to = "unassigned"
-context_requirements = []
-depends_on = []
-step_mode = false
-
-[[mma.active_track.tickets]]
-id = "mock-ticket-2"
-description = "Mock Ticket 2"
-status = "todo"
-assigned_to = "unassigned"
-context_requirements = []
-depends_on = [
-    "mock-ticket-1",
-]
-step_mode = false
diff --git a/tests/temp_project_history.toml b/tests/temp_project_history.toml
index 5299480..5e72dde 100644
--- a/tests/temp_project_history.toml
+++ b/tests/temp_project_history.toml
@@ -10,7 +10,7 @@ auto_add = true
 
 [discussions.main]
 git_commit = ""
-last_updated = "2026-02-28T22:11:24"
+last_updated = "2026-02-28T22:41:40"
 history = [
     "@2026-02-28T22:02:40\nSystem:\n[PERFORMANCE ALERT] CPU usage high: 83.5%. Please consider optimizing recent changes or reducing load.",
     "@2026-02-28T22:03:10\nSystem:\n[PERFORMANCE ALERT] CPU usage high: 103.9%. Please consider optimizing recent changes or reducing load.",
diff --git a/tests/visual_sim_mma_v2.py b/tests/visual_sim_mma_v2.py
index ecd7fd9..f9e856a 100644
--- a/tests/visual_sim_mma_v2.py
+++ b/tests/visual_sim_mma_v2.py
@@ -17,11 +17,14 @@ def test_mma_complete_lifecycle(live_gui) -> None:
     client = ApiHookClient()
     assert client.wait_for_server(timeout=10)
 
-    # 1. Set model to 'mock'.
+    # 1. Set up the mock CLI provider
     try:
-        client.set_value('current_model', 'mock')
+        client.set_value('current_provider', 'gemini_cli')
+        # Point the CLI adapter to our mock script
+        mock_cli_path = f'{sys.executable} {os.path.abspath("tests/mock_gemini_cli.py")}'
+        client.set_value('gcli_path', mock_cli_path)
     except Exception as e:
-        pytest.fail(f"Failed to set model to 'mock': {e}")
+        pytest.fail(f"Failed to set up mock provider: {e}")
 
     # 2. Enter epic and click 'Plan Epic'.
     client.set_value('mma_epic_input', 'Develop a new feature')
@@ -136,19 +139,30 @@ def test_mma_complete_lifecycle(live_gui) -> None:
 
     # 8. Verify 'active_tier' change and output in 'mma_streams'.
     streams_found = False
-    for _ in range(30):
+    for _ in range(60): # Give it more time for the worker to spawn and respond
         status = client.get_mma_status()
-        streams = status.get('mma_streams', {})
-        if streams and any("Tier 3" in k for k in streams.keys()):
-            print(f"[SIM] Found Tier 3 worker output in streams: {list(streams.keys())}")
-            streams_found = True
-            break
-        # Keep approving if needed
+        
+        # Handle approvals if they pop up during worker execution
         if status and status.get('pending_spawn') is True:
+            print('[SIM] Worker spawn required. Clicking btn_approve_spawn...')
             client.click('btn_approve_spawn')
         elif status and status.get('pending_approval') is True:
+            print('[SIM] Tool approval required. Clicking btn_approve_tool...')
             client.click('btn_approve_tool')
+            
+        streams = status.get('mma_streams', {})
+        print(f"Polling streams: {list(streams.keys())}")
+        
+        if streams and any("Tier 3" in k for k in streams.keys()):
+            print(f"[SIM] Found Tier 3 worker output in streams: {list(streams.keys())}")
+            # Check for our specific mock content
+            tier3_key = [k for k in streams.keys() if "Tier 3" in k][0]
+            if "SUCCESS: Mock Tier 3 worker" in streams[tier3_key]:
+                print("[SIM] Verified mock worker output content.")
+                streams_found = True
+                break
+        
         time.sleep(1)
 
-    assert streams_found or 'Tier 1' in status.get('mma_streams', {}), "No output found in 'mma_streams'."
+    assert streams_found, "No Tier 3 mock output found in 'mma_streams'."
     print("MMA complete lifecycle simulation successful.")