From 6b0823ad6c77520cd3a061d960e16d605911cd56 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sat, 28 Feb 2026 22:50:14 -0500 Subject: [PATCH] checkpoint: this is a mess... need to define stricter DSL or system for how the AI devices sims and hookup api for tests. --- ai_client.py | 24 --------- .../spec.md | 13 ++++- conductor_tech_lead.py | 4 +- config.toml | 2 +- gui_2.py | 11 +++- manualslop_layout.ini | 7 ++- tests/mock_gemini_cli.py | 53 +++++++++++++++---- tests/temp_project.toml | 26 +-------- tests/temp_project_history.toml | 2 +- tests/visual_sim_mma_v2.py | 36 +++++++++---- 10 files changed, 101 insertions(+), 77 deletions(-) diff --git a/ai_client.py b/ai_client.py index 8ee105a..9bd3d6b 100644 --- a/ai_client.py +++ b/ai_client.py @@ -1645,30 +1645,6 @@ def send( pre_tool_callback : Optional callback (payload: str) -> bool called before tool execution qa_callback : Optional callback (stderr: str) -> str called for Tier 4 error analysis """ - # --- START MOCK LOGIC --- - if _model == 'mock': - import json - keyword = "unknown" - if 'Epic Initialization' in _custom_system_prompt: - keyword = "Epic Initialization" - mock_response_content = [ - {"id": "mock-track-1", "type": "Track", "module": "core", "persona": "Tech Lead", "severity": "Medium", "goal": "Mock Goal 1", "acceptance_criteria": ["criteria 1"], "title": "Mock Goal 1"}, - {"id": "mock-track-2", "type": "Track", "module": "ui", "persona": "Frontend Lead", "severity": "Low", "goal": "Mock Goal 2", "acceptance_criteria": ["criteria 2"], "title": "Mock Goal 2"} - ] - elif 'Sprint Planning' in _custom_system_prompt: - keyword = "Sprint Planning" - mock_response_content = [ - {"id": "mock-ticket-1", "type": "Ticket", "goal": "Mock Ticket 1", "target_file": "file1.py", "depends_on": [], "context_requirements": "req 1"}, - {"id": "mock-ticket-2", "type": "Ticket", "goal": "Mock Ticket 2", "target_file": "file2.py", "depends_on": ["mock-ticket-1"], "context_requirements": "req 2"} - ] - else: - # Tier 3 mock response for ticket execution - mock_response_content = "SUCCESS: Mock Tier 3 worker implemented the change. [MOCK OUTPUT]" - - print(f"[MOCK AI] Triggered for prompt keyword: {keyword}") - return json.dumps(mock_response_content) - # --- END MOCK LOGIC --- - with _send_lock: if _provider == "gemini": return _send_gemini(md_content, user_message, base_dir, file_items, discussion_history, pre_tool_callback, qa_callback) diff --git a/conductor/tracks/robust_live_simulation_verification/spec.md b/conductor/tracks/robust_live_simulation_verification/spec.md index 4b5de28..2df5234 100644 --- a/conductor/tracks/robust_live_simulation_verification/spec.md +++ b/conductor/tracks/robust_live_simulation_verification/spec.md @@ -31,4 +31,15 @@ This is a multi-track phase. To ensure architectural integrity, these tracks **M 3. **MMA Dashboard Visualization Overhaul:** (Builds the UI to visualize the state and subsets) 4. **[CURRENT] Robust Live Simulation Verification:** (Builds the tests to verify the UI and state) -**Prerequisites for this track:** `MMA Dashboard Visualization Overhaul` MUST be completed (`[x]`) before starting this track. \ No newline at end of file +**Prerequisites for this track:** `MMA Dashboard Visualization Overhaul` MUST be completed (`[x]`) before starting this track. + +## Session Compression (2026-02-28) +**Current State & Glaring Issues:** +1. **Brittle Interception System:** The visual simulation (`tests/visual_sim_mma_v2.py`) relies heavily on polling an `api_hooks.py` endpoint (`/api/gui/mma_status`) that aggregates several boolean flags (`pending_approval`, `pending_spawn`). This has proven extremely brittle. For example, `mock_gemini_cli.py` defaults to emitting a `read_file` tool call, which triggers the *general* tool approval popup (`_pending_ask`), freezing the test because it was expecting the *MMA spawn* popup (`_pending_mma_spawn`) or the *Track Proposal* modal. +2. **Mock Pollution in App Domain:** Previous attempts to fix the simulation shoehorned test-specific mock JSON responses directly into `ai_client.py` and `scripts/mma_exec.py`. This conflates the test environment with the production application codebase. +3. **Popup Handling Failures:** The GUI's state machine for closing popups (like `_show_track_proposal_modal` in `_cb_accept_tracks`) is desynchronized from the hook API. The test clicks "Accept", the tracks generate, but the UI state doesn't cleanly reset, leading to endless timeouts during test runs. + +**Next Steps for the Handoff:** +- Completely rip out the hardcoded mock JSON arrays from `ai_client.py` and `scripts/mma_exec.py`. +- Refactor `tests/mock_gemini_cli.py` to be a pure, standalone mock that perfectly simulates the expected streaming behavior of `gemini_cli` without relying on the app to intercept specific magic prompts. +- Stabilize the hook API (`api_hooks.py`) so the test script can unambiguously distinguish between a general tool approval, an MMA step approval, and an MMA worker spawn approval, instead of relying on a fragile `pending_approval` catch-all. \ No newline at end of file diff --git a/conductor_tech_lead.py b/conductor_tech_lead.py index ca7c77d..9d3d3cf 100644 --- a/conductor_tech_lead.py +++ b/conductor_tech_lead.py @@ -9,9 +9,7 @@ def generate_tickets(track_brief: str, module_skeletons: str) -> list[dict]: Breaks down a Track Brief and module skeletons into discrete Tier 3 Tickets. """ # 1. Set Tier 2 Model (Tech Lead - Flash) - if ai_client._model != 'mock': - ai_client.set_provider('gemini', 'gemini-2.5-flash-lite') - ai_client.reset_session() # 2. Construct Prompt + # 2. Construct Prompt system_prompt = mma_prompts.PROMPTS.get("tier2_sprint_planning") user_message = ( f"### TRACK BRIEF:\n{track_brief}\n\n" diff --git a/config.toml b/config.toml index 5046543..1bfcf00 100644 --- a/config.toml +++ b/config.toml @@ -1,6 +1,6 @@ [ai] provider = "gemini_cli" -model = "mock" +model = "gemini-3-flash-preview" temperature = 0.0 max_tokens = 8192 history_trunc_limit = 8000 diff --git a/gui_2.py b/gui_2.py index 951ce44..752006b 100644 --- a/gui_2.py +++ b/gui_2.py @@ -991,7 +991,7 @@ class App: def _handle_approve_tool(self) -> None: """Logic for approving a pending tool execution via API hooks.""" print("[DEBUG] _handle_approve_tool called") - if self._pending_ask: + if self._pending_ask_dialog: self._handle_approve_ask() else: print("[DEBUG] No pending tool approval found") @@ -1000,7 +1000,11 @@ class App: """Logic for approving a pending sub-agent spawn via API hooks.""" print("[DEBUG] _handle_approve_spawn called") if self._pending_mma_spawn: + # Synchronize with the handler logic self._handle_mma_respond(approved=True, prompt=self._mma_spawn_prompt, context_md=self._mma_spawn_context) + # Crucially, close the modal state so UI can continue + self._mma_spawn_open = False + self._pending_mma_spawn = None else: print("[DEBUG] No pending spawn approval found") @@ -1982,6 +1986,7 @@ class App: threading.Thread(target=_bg_task, daemon=True).start() def _cb_accept_tracks(self) -> None: + self._show_track_proposal_modal = False def _bg_task(): # Generate skeletons once self.ai_status = "Phase 2: Generating skeletons for all tracks..." @@ -2118,6 +2123,10 @@ class App: if self._show_track_proposal_modal: imgui.open_popup("Track Proposal") if imgui.begin_popup_modal("Track Proposal", True, imgui.WindowFlags_.always_auto_resize)[0]: + if not self._show_track_proposal_modal: + imgui.close_current_popup() + imgui.end_popup() + return imgui.text_colored(C_IN, "Proposed Implementation Tracks") imgui.separator() if not self.proposed_tracks: diff --git a/manualslop_layout.ini b/manualslop_layout.ini index e411166..367355d 100644 --- a/manualslop_layout.ini +++ b/manualslop_layout.ini @@ -131,7 +131,7 @@ Collapsed=0 DockId=0x00000006,0 [Window][Approve Tool Execution] -Pos=512,437 +Pos=1009,547 Size=416,325 Collapsed=0 @@ -147,6 +147,11 @@ Size=879,1183 Collapsed=0 DockId=0x00000004,1 +[Window][Track Proposal] +Pos=709,326 +Size=262,209 +Collapsed=0 + [Table][0xFB6E3870,4] RefScale=13 Column 0 Width=80 diff --git a/tests/mock_gemini_cli.py b/tests/mock_gemini_cli.py index 9a4d652..8b619ee 100644 --- a/tests/mock_gemini_cli.py +++ b/tests/mock_gemini_cli.py @@ -8,21 +8,60 @@ def main() -> None: sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n") # Read prompt from stdin try: - # On Windows, stdin might be closed or behave weirdly if not handled prompt = sys.stdin.read() except EOFError: prompt = "" sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n") sys.stderr.flush() + # Skip management commands if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]: return - # If the prompt contains tool results, provide final answer + + # Check for specific simulation contexts + # Use startswith or check the beginning of the prompt to avoid matching text inside skeletons + if 'PATH: Epic Initialization' in prompt[:500]: + mock_response = [ + {"id": "mock-track-1", "type": "Track", "module": "core", "persona": "Tech Lead", "severity": "Medium", "goal": "Mock Goal 1", "acceptance_criteria": ["criteria 1"], "title": "Mock Goal 1"}, + {"id": "mock-track-2", "type": "Track", "module": "ui", "persona": "Frontend Lead", "severity": "Low", "goal": "Mock Goal 2", "acceptance_criteria": ["criteria 2"], "title": "Mock Goal 2"} + ] + print(json.dumps({ + "type": "message", + "role": "assistant", + "content": json.dumps(mock_response) + }), flush=True) + print(json.dumps({ + "type": "result", + "status": "success", + "stats": {"total_tokens": 100, "input_tokens": 50, "output_tokens": 50}, + "session_id": "mock-session-epic" + }), flush=True) + return + + if 'PATH: Sprint Planning' in prompt[:500]: + mock_response = [ + {"id": "mock-ticket-1", "type": "Ticket", "goal": "Mock Ticket 1", "target_file": "file1.py", "depends_on": [], "context_requirements": "req 1"}, + {"id": "mock-ticket-2", "type": "Ticket", "goal": "Mock Ticket 2", "target_file": "file2.py", "depends_on": ["mock-ticket-1"], "context_requirements": "req 2"} + ] + print(json.dumps({ + "type": "message", + "role": "assistant", + "content": json.dumps(mock_response) + }), flush=True) + print(json.dumps({ + "type": "result", + "status": "success", + "stats": {"total_tokens": 100, "input_tokens": 50, "output_tokens": 50}, + "session_id": "mock-session-sprint" + }), flush=True) + return + + # If the prompt contains tool results, provide final answer if '"role": "tool"' in prompt or '"tool_call_id"' in prompt: print(json.dumps({ "type": "message", "role": "assistant", - "content": "I have processed the tool results. Everything looks good!" + "content": "SUCCESS: Mock Tier 3 worker implemented the change. [MOCK OUTPUT]" }), flush=True) print(json.dumps({ "type": "result", @@ -31,7 +70,8 @@ def main() -> None: "session_id": "mock-session-final" }), flush=True) return - # Default flow: simulate a tool call + + # Default flow: simulate a tool call bridge_path = os.path.abspath("scripts/cli_tool_bridge.py") # Using format that bridge understands bridge_tool_call = { @@ -66,11 +106,6 @@ def main() -> None: "tool_id": "call_123", "parameters": {"path": "test.txt"} }), flush=True) - print(json.dumps({ - "type": "message", - "role": "assistant", - "content": "I am reading the file now..." - }), flush=True) print(json.dumps({ "type": "result", "status": "success", diff --git a/tests/temp_project.toml b/tests/temp_project.toml index cb0e9c4..fd3f3c9 100644 --- a/tests/temp_project.toml +++ b/tests/temp_project.toml @@ -22,7 +22,7 @@ base_dir = "." paths = [] [gemini_cli] -binary_path = "gemini" +binary_path = "C:\\projects\\manual_slop\\.venv\\Scripts\\python.exe C:\\projects\\manual_slop\\tests\\mock_gemini_cli.py" [deepseek] reasoning_effort = "medium" @@ -40,27 +40,3 @@ fetch_url = true epic = "Develop a new feature" active_track_id = "" tracks = [] - -[mma.active_track] -id = "track_024370f1b453" -description = "Mock Goal 1" - -[[mma.active_track.tickets]] -id = "mock-ticket-1" -description = "Mock Ticket 1" -status = "todo" -assigned_to = "unassigned" -context_requirements = [] -depends_on = [] -step_mode = false - -[[mma.active_track.tickets]] -id = "mock-ticket-2" -description = "Mock Ticket 2" -status = "todo" -assigned_to = "unassigned" -context_requirements = [] -depends_on = [ - "mock-ticket-1", -] -step_mode = false diff --git a/tests/temp_project_history.toml b/tests/temp_project_history.toml index 5299480..5e72dde 100644 --- a/tests/temp_project_history.toml +++ b/tests/temp_project_history.toml @@ -10,7 +10,7 @@ auto_add = true [discussions.main] git_commit = "" -last_updated = "2026-02-28T22:11:24" +last_updated = "2026-02-28T22:41:40" history = [ "@2026-02-28T22:02:40\nSystem:\n[PERFORMANCE ALERT] CPU usage high: 83.5%. Please consider optimizing recent changes or reducing load.", "@2026-02-28T22:03:10\nSystem:\n[PERFORMANCE ALERT] CPU usage high: 103.9%. Please consider optimizing recent changes or reducing load.", diff --git a/tests/visual_sim_mma_v2.py b/tests/visual_sim_mma_v2.py index ecd7fd9..f9e856a 100644 --- a/tests/visual_sim_mma_v2.py +++ b/tests/visual_sim_mma_v2.py @@ -17,11 +17,14 @@ def test_mma_complete_lifecycle(live_gui) -> None: client = ApiHookClient() assert client.wait_for_server(timeout=10) - # 1. Set model to 'mock'. + # 1. Set up the mock CLI provider try: - client.set_value('current_model', 'mock') + client.set_value('current_provider', 'gemini_cli') + # Point the CLI adapter to our mock script + mock_cli_path = f'{sys.executable} {os.path.abspath("tests/mock_gemini_cli.py")}' + client.set_value('gcli_path', mock_cli_path) except Exception as e: - pytest.fail(f"Failed to set model to 'mock': {e}") + pytest.fail(f"Failed to set up mock provider: {e}") # 2. Enter epic and click 'Plan Epic'. client.set_value('mma_epic_input', 'Develop a new feature') @@ -136,19 +139,30 @@ def test_mma_complete_lifecycle(live_gui) -> None: # 8. Verify 'active_tier' change and output in 'mma_streams'. streams_found = False - for _ in range(30): + for _ in range(60): # Give it more time for the worker to spawn and respond status = client.get_mma_status() - streams = status.get('mma_streams', {}) - if streams and any("Tier 3" in k for k in streams.keys()): - print(f"[SIM] Found Tier 3 worker output in streams: {list(streams.keys())}") - streams_found = True - break - # Keep approving if needed + + # Handle approvals if they pop up during worker execution if status and status.get('pending_spawn') is True: + print('[SIM] Worker spawn required. Clicking btn_approve_spawn...') client.click('btn_approve_spawn') elif status and status.get('pending_approval') is True: + print('[SIM] Tool approval required. Clicking btn_approve_tool...') client.click('btn_approve_tool') + + streams = status.get('mma_streams', {}) + print(f"Polling streams: {list(streams.keys())}") + + if streams and any("Tier 3" in k for k in streams.keys()): + print(f"[SIM] Found Tier 3 worker output in streams: {list(streams.keys())}") + # Check for our specific mock content + tier3_key = [k for k in streams.keys() if "Tier 3" in k][0] + if "SUCCESS: Mock Tier 3 worker" in streams[tier3_key]: + print("[SIM] Verified mock worker output content.") + streams_found = True + break + time.sleep(1) - assert streams_found or 'Tier 1' in status.get('mma_streams', {}), "No output found in 'mma_streams'." + assert streams_found, "No Tier 3 mock output found in 'mma_streams'." print("MMA complete lifecycle simulation successful.")