feat(conductor): Restore mma_implementation track

2026-02-26 13:13:29 -05:00
parent ff1b2cbce0
commit 9e56245091
19 changed files with 339 additions and 68 deletions
--- a/.gemini/settings.json
+++ b/.gemini/settings.json
@@ -1,4 +1,7 @@
 {
+  "tools": {
+    "discoveryCommand": "python C:/projects/manual_slop/scripts/tool_discovery.py"
+  },
  "hooks": {
    "BeforeTool": [
      {
--- a/ai_client.py
+++ b/ai_client.py
@@ -101,6 +101,7 @@ _ANTHROPIC_CHUNK_SIZE = 120_000

 _SYSTEM_PROMPT = (
    "You are a helpful coding assistant with access to a PowerShell tool and MCP tools (file access: read_file, list_directory, search_files, get_file_summary, web access: web_search, fetch_url). "
+    "When calling file/directory tools, always use the 'path' parameter for the target path. "
    "When asked to create or edit files, prefer targeted edits over full rewrites. "
    "Always explain what you are doing before invoking the tool.\n\n"
    "When writing or rewriting large files (especially those containing quotes, backticks, or special characters), "
@@ -983,6 +984,9 @@ def _send_gemini_cli(md_content: str, user_message: str, base_dir: str,
                _append_comms("IN", "tool_result", {"name": name, "id": call_id, "output": out})
                events.emit("tool_execution", payload={"status": "completed", "tool": name, "result": out, "round": r_idx})
            
+            # CRITICAL: Update payload for the next round
+            payload = json.dumps(tool_results_for_cli)
+
            if _cumulative_tool_bytes > _MAX_TOOL_OUTPUT_BYTES:
                 _append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"})
                 # We should ideally tell the model here, but for CLI we just append to payload
--- a/api_hooks.py
+++ b/api_hooks.py
@@ -275,7 +275,11 @@ class HookServer:
        self.thread = None

    def start(self):
-        if not getattr(self.app, 'test_hooks_enabled', False):
+        if self.thread and self.thread.is_alive():
+            return
+
+        is_gemini_cli = getattr(self.app, 'current_provider', '') == 'gemini_cli'
+        if not getattr(self.app, 'test_hooks_enabled', False) and not is_gemini_cli:
            return

        # Ensure the app has the task queue and lock initialized
--- a/conductor/tracks/archive/gemini_cli_parity_20260225/index.md
+++ b/conductor/tracks/archive/gemini_cli_parity_20260225/index.md
--- a/conductor/tracks/archive/gemini_cli_parity_20260225/metadata.json
+++ b/conductor/tracks/archive/gemini_cli_parity_20260225/metadata.json
--- a/conductor/tracks/archive/gemini_cli_parity_20260225/plan.md
+++ b/conductor/tracks/archive/gemini_cli_parity_20260225/plan.md
--- a/conductor/tracks/archive/gemini_cli_parity_20260225/spec.md
+++ b/conductor/tracks/archive/gemini_cli_parity_20260225/spec.md
--- a/config.toml
+++ b/config.toml
@@ -31,6 +31,7 @@ active = "C:\\projects\\manual_slop\\tests\\temp_project.toml"
 "Discussion Hub" = true
 "Operations Hub" = true
 Theme = true
+"Log Management" = false
 Diagnostics = true

 [headless]
--- a/gemini_cli_adapter.py
+++ b/gemini_cli_adapter.py
@@ -91,6 +91,10 @@ class GeminiCliAdapter:
                        if "session_id" in data:
                            self.session_id = data.get("session_id")
                    elif msg_type == "message":
+                        # CRITICAL: Only accumulate content from the assistant/model role.
+                        # The CLI echoes back the 'user' prompt in the stream, which we must skip.
+                        role = data.get("role", "")
+                        if role in ["assistant", "model"]:
                            content = data.get("content", data.get("text"))
                            if content:
                                accumulated_text += content
--- a/gui_2.py
+++ b/gui_2.py
@@ -7,6 +7,7 @@ import json
 import sys
 import os
 import uuid
+import requests
 from pathlib import Path
 from tkinter import filedialog, Tk
 import aggregate
@@ -300,6 +301,10 @@ class App:
                    ai_client._gemini_cli_adapter = ai_client.GeminiCliAdapter(binary_path=self.ui_gemini_cli_path)
                else:
                    ai_client._gemini_cli_adapter.binary_path = self.ui_gemini_cli_path
+                
+                # Start hook server if not already running (required for bridge)
+                if hasattr(self, 'hook_server'):
+                    self.hook_server.start()
            self.available_models = []
            self._fetch_models(value)

@@ -807,9 +812,12 @@ class App:
                    if item in self._settable_fields:
                        attr_name = self._settable_fields[item]
                        setattr(self, attr_name, value)
-                        if item in ["current_provider", "current_model"]:
-                            ai_client.set_provider(self.current_provider, self.current_model)
-                            ai_client.reset_session()
+                        
+                        if item == "gcli_path":
+                             if not ai_client._gemini_cli_adapter:
+                                 ai_client._gemini_cli_adapter = ai_client.GeminiCliAdapter(binary_path=value)
+                             else:
+                                 ai_client._gemini_cli_adapter.binary_path = value

                elif action == "click":
                    item = task.get("item")
@@ -1325,17 +1333,17 @@ class App:
            # ---- Menubar
            if imgui.begin_main_menu_bar():
                if imgui.begin_menu("manual slop"):
-                    if imgui.menu_item("Quit", "Ctrl+Q")[0]:
+                    if imgui.menu_item("Quit", "Ctrl+Q", False)[0]:
                        self.should_quit = True
                    imgui.end_menu()
                
                if imgui.begin_menu("View"):
                    for name in self.show_windows:
-                        _, self.show_windows[name] = imgui.menu_item(name, None, self.show_windows[name])
+                        _, self.show_windows[name] = imgui.menu_item(name, "", self.show_windows[name])
                    imgui.end_menu()
                
                if imgui.begin_menu("Project"):
-                    if imgui.menu_item("Save All", "Ctrl+S")[0]:
+                    if imgui.menu_item("Save All", "Ctrl+S", False)[0]:
                        self._flush_to_project()
                        self._save_active_project()
                        self._flush_to_config()
--- a/manualslop_layout.ini
+++ b/manualslop_layout.ini
@@ -79,7 +79,7 @@ DockId=0x0000000F,2

 [Window][Theme]
 Pos=0,17
-Size=588,545
+Size=348,545
 Collapsed=0
 DockId=0x00000005,1

@@ -89,14 +89,14 @@ Size=900,700
 Collapsed=0

 [Window][Diagnostics]
-Pos=590,17
+Pos=350,17
 Size=530,1183
 Collapsed=0
 DockId=0x0000000E,0

 [Window][Context Hub]
 Pos=0,17
-Size=588,545
+Size=348,545
 Collapsed=0
 DockId=0x00000005,0

@@ -107,34 +107,39 @@ Collapsed=0
 DockId=0x0000000D,0

 [Window][Discussion Hub]
-Pos=1122,17
+Pos=882,17
 Size=558,1183
 Collapsed=0
 DockId=0x00000004,0

 [Window][Operations Hub]
-Pos=590,17
+Pos=350,17
 Size=530,1183
 Collapsed=0
 DockId=0x0000000E,1

 [Window][Files & Media]
 Pos=0,564
-Size=588,636
+Size=348,636
 Collapsed=0
 DockId=0x00000006,1

 [Window][AI Settings]
 Pos=0,564
-Size=588,636
+Size=348,636
 Collapsed=0
 DockId=0x00000006,0

+[Window][Approve Tool Execution]
+Pos=512,437
+Size=416,325
+Collapsed=0
+
 [Docking][Data]
 DockNode              ID=0x00000008 Pos=3125,170 Size=593,1157 Split=Y
  DockNode            ID=0x00000009 Parent=0x00000008 SizeRef=1029,147 Selected=0x0469CA7A
  DockNode            ID=0x0000000A Parent=0x00000008 SizeRef=1029,145 Selected=0xDF822E02
-DockSpace             ID=0xAFC85805 Window=0x079D3A04 Pos=0,17 Size=1680,1183 Split=Y
+DockSpace             ID=0xAFC85805 Window=0x079D3A04 Pos=0,17 Size=1440,1183 Split=Y
  DockNode            ID=0x0000000C Parent=0xAFC85805 SizeRef=1362,1041 Split=X Selected=0x5D11106F
    DockNode          ID=0x00000003 Parent=0x0000000C SizeRef=1120,1183 Split=X
      DockNode        ID=0x0000000B Parent=0x00000003 SizeRef=404,1186 Split=Y Selected=0xF4139CA2
--- a/reproduce_delay.py
+++ b/reproduce_delay.py
@@ -1,18 +0,0 @@
-import time
-from ai_client import get_gemini_cache_stats
-
-def reproduce_delay():
-    print("Starting reproduction of Gemini cache list delay...")
-    
-    start_time = time.time()
-    try:
-        stats = get_gemini_cache_stats()
-        elapsed = (time.time() - start_time) * 1000.0
-        print(f"get_gemini_cache_stats() took {elapsed:.2f}ms")
-        print(f"Stats: {stats}")
-    except Exception as e:
-        print(f"Error calling get_gemini_cache_stats: {e}")
-        print("Note: This might fail if no valid credentials.toml exists or API key is invalid.")
-
-if __name__ == "__main__":
-    reproduce_delay()
--- a/reproduce_no_text.py
+++ b/reproduce_no_text.py
@@ -1,28 +0,0 @@
-import json
-import subprocess
-import os
-import time
-import sys
-
-# Add project root to sys.path
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".")))
-
-from gemini_cli_adapter import GeminiCliAdapter
-
-def test_repro():
-    adapter = GeminiCliAdapter(binary_path="gemini")
-    # Using a simple message
-    message = "say hello"
-    print(f"Sending message: '{message}'")
-    
-    result = adapter.send(message, model="gemini-3-flash-preview")
-    
-    print("\n--- Result ---")
-    print(f"Text: '{result.get('text')}'")
-    print(f"Tool Calls: {result.get('tool_calls')}")
-    print(f"Usage: {adapter.last_usage}")
-    print(f"Session ID: {adapter.session_id}")
-    print(f"Stderr: {result.get('stderr')}")
-
-if __name__ == "__main__":
-    test_repro()
--- a/scripts/cli_tool_bridge.py
+++ b/scripts/cli_tool_bridge.py
@@ -103,11 +103,12 @@ def main():
        # 4. Check context — if not running via Manual Slop, we pass through (allow)
        # This prevents the hook from affecting normal CLI usage.
        hook_context = os.environ.get("GEMINI_CLI_HOOK_CONTEXT")
+        logging.debug(f"Checking GEMINI_CLI_HOOK_CONTEXT: '{hook_context}'")
        if hook_context != "manual_slop":
-             logging.debug("GEMINI_CLI_HOOK_CONTEXT not set to 'manual_slop'. Allowing execution without confirmation.")
+             logging.debug(f"GEMINI_CLI_HOOK_CONTEXT is '{hook_context}', NOT 'manual_slop'. Allowing execution without confirmation.")
             print(json.dumps({
                "decision": "allow",
-                "reason": "Non-programmatic usage (GEMINI_CLI_HOOK_CONTEXT not set)."
+                "reason": f"Non-programmatic usage (GEMINI_CLI_HOOK_CONTEXT={hook_context})."
            }))
             return

--- a/scripts/tool_discovery.py
+++ b/scripts/tool_discovery.py
@@ -0,0 +1,42 @@
+import json
+import sys
+import os
+
+# Add project root to sys.path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+
+try:
+    import mcp_client
+except ImportError:
+    print("[]")
+    sys.exit(0)
+
+def main():
+    specs = list(mcp_client.MCP_TOOL_SPECS)
+    
+    # Add run_powershell (manually define to match ai_client.py)
+    specs.append({
+        "name": "run_powershell",
+        "description": (
+            "Run a PowerShell script within the project base_dir. "
+            "Use this to create, edit, rename, or delete files and directories. "
+            "The working directory is set to base_dir automatically. "
+            "stdout and stderr are returned to you as the result."
+        ),
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "script": {
+                    "type": "string",
+                    "description": "The PowerShell script to execute."
+                }
+            },
+            "required": ["script"]
+        }
+    })
+    
+    # Output as JSON array of FunctionDeclarations
+    print(json.dumps(specs, indent=2))
+
+if __name__ == "__main__":
+    main()
--- a/tests/mock_gemini_cli.py
+++ b/tests/mock_gemini_cli.py
@@ -6,9 +6,11 @@ import os
 def main():
    # Debug log to stderr
    sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n")
+    sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
    
    # Read prompt from stdin
    try:
+        # On Windows, stdin might be closed or behave weirdly if not handled
        prompt = sys.stdin.read()
    except EOFError:
        prompt = ""
@@ -47,6 +49,7 @@ def main():
    sys.stderr.flush()

    try:
+        # CRITICAL: Use the current process environment to ensure GEMINI_CLI_HOOK_CONTEXT is passed
        process = subprocess.Popen(
            [sys.executable, bridge_path],
            stdin=subprocess.PIPE,
--- a/tests/temp_project_history.toml
+++ b/tests/temp_project_history.toml
@@ -4,7 +4,7 @@ roles = [
    "Vendor API",
    "System",
 ]
-active = "testing deepseek"
+active = "testing gemini-cli"
 auto_add = true

 [discussions.main]
@@ -16,5 +16,13 @@ history = [

 [discussions."testing deepseek"]
 git_commit = ""
-last_updated = "2026-02-26T00:29:48"
+last_updated = "2026-02-26T12:38:38"
+history = [
+    "@2026-02-26T12:38:30\nUser:\nwasap deepseek?",
+    "@2026-02-26T12:38:34\nAI:\nHey there! 👋 I'm DeepSeek, an AI assistant created by DeepSeek Company. I'm here to help you with various tasks like coding, writing, analysis, problem-solving, and more!\n\nI can:\n- Write and debug code in multiple programming languages\n- Help with technical questions and explanations\n- Assist with writing and content creation\n- Analyze documents and data\n- Search the web for current information (when you enable it)\n- Work with files and directories\n\nWhat can I help you with today? Whether it's coding, learning something new, or just having a conversation, I'm ready to assist! 😊",
+]
+
+[discussions."testing gemini-cli"]
+git_commit = ""
+last_updated = "2026-02-26T13:08:53"
 history = []
--- a/tests/test_gemini_cli_edge_cases.py
+++ b/tests/test_gemini_cli_edge_cases.py
@@ -0,0 +1,170 @@
+import pytest
+import time
+import os
+import sys
+import requests
+import json
+from api_hook_client import ApiHookClient
+
+def test_gemini_cli_context_bleed_prevention(live_gui):
+    """
+    Test that the GeminiCliAdapter correctly filters out echoed 'user' messages
+    and only shows assistant content in the GUI history.
+    """
+    client = ApiHookClient("http://127.0.0.1:8999")
+    client.click("btn_reset")
+    client.set_value("auto_add_history", True)
+    
+    # Create a specialized mock for context bleed
+    bleed_mock = os.path.abspath("tests/mock_context_bleed.py")
+    with open(bleed_mock, "w") as f:
+        f.write('''import sys, json
+print(json.dumps({"type": "init", "session_id": "bleed-test"}), flush=True)
+print(json.dumps({"type": "message", "role": "user", "content": "I am echoing you"}), flush=True)
+print(json.dumps({"type": "message", "role": "assistant", "content": "Actual AI Response"}), flush=True)
+print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)
+''')
+    
+    cli_cmd = f'"{sys.executable}" "{bleed_mock}"'
+    client.set_value("current_provider", "gemini_cli")
+    client.set_value("gcli_path", cli_cmd)
+    
+    client.set_value("ai_input", "Test context bleed")
+    client.click("btn_gen_send")
+    
+    # Wait for completion
+    time.sleep(3)
+    
+    session = client.get_session()
+    entries = session.get("session", {}).get("entries", [])
+    
+    # Verify: We expect exactly one AI entry, and it must NOT contain the echoed user message
+    ai_entries = [e for e in entries if e.get("role") == "AI"]
+    assert len(ai_entries) == 1
+    assert ai_entries[0].get("content") == "Actual AI Response"
+    assert "echoing you" not in ai_entries[0].get("content")
+    
+    os.remove(bleed_mock)
+
+def test_gemini_cli_parameter_resilience(live_gui):
+    """
+    Test that mcp_client correctly handles 'file_path' and 'dir_path' aliases 
+    sent by the AI instead of 'path'.
+    """
+    client = ApiHookClient("http://127.0.0.1:8999")
+    client.click("btn_reset")
+    client.set_value("auto_add_history", True)
+    client.select_list_item("proj_files", "manual_slop")
+
+    # Create a mock that uses dir_path for list_directory
+    alias_mock = os.path.abspath("tests/mock_alias_tool.py")
+    bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
+    # Avoid backslashes in f-string expression part
+    if sys.platform == "win32":
+        bridge_path_str = bridge_path.replace("\\", "/")
+    else:
+        bridge_path_str = bridge_path
+        
+    with open(alias_tool_content := "tests/mock_alias_tool.py", "w") as f:
+        f.write(f'''import sys, json, os, subprocess
+prompt = sys.stdin.read()
+if '"role": "tool"' in prompt:
+    print(json.dumps({{"type": "message", "role": "assistant", "content": "Tool worked!"}}), flush=True)
+    print(json.dumps({{"type": "result", "stats": {{"total_tokens": 20}}}}), flush=True)
+else:
+    # We must call the bridge to trigger the GUI approval!
+    tool_call = {{"name": "list_directory", "input": {{"dir_path": "."}}}}
+    bridge_cmd = [sys.executable, "{bridge_path_str}"]
+    proc = subprocess.Popen(bridge_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True)
+    stdout, _ = proc.communicate(input=json.dumps(tool_call))
+    
+    # Even if bridge says allow, we emit the tool_use to the adapter
+    print(json.dumps({{"type": "message", "role": "assistant", "content": "I will list the directory."}}), flush=True)
+    print(json.dumps({{
+        "type": "tool_use", 
+        "name": "list_directory", 
+        "id": "alias_call",
+        "args": {{"dir_path": "."}} 
+    }}), flush=True)
+    print(json.dumps({{"type": "result", "stats": {{"total_tokens": 10}}}}), flush=True)
+''')
+    
+    cli_cmd = f'"{sys.executable}" "{alias_mock}"'
+    client.set_value("current_provider", "gemini_cli")
+    client.set_value("gcli_path", cli_cmd)
+    
+    client.set_value("ai_input", "Test parameter aliases")
+    client.click("btn_gen_send")
+    
+    # Handle approval
+    timeout = 15
+    start_time = time.time()
+    approved = False
+    while time.time() - start_time < timeout:
+        for ev in client.get_events():
+            if ev.get("type") == "ask_received":
+                requests.post("http://127.0.0.1:8999/api/ask/respond", 
+                             json={"request_id": ev.get("request_id"), "response": {"approved": True}})
+                approved = True
+        if approved: break
+        time.sleep(0.5)
+        
+    assert approved, "Tool approval event never received"
+    
+    # Verify tool result in history
+    time.sleep(2)
+    session = client.get_session()
+    entries = session.get("session", {}).get("entries", [])
+    
+    # Check for "Tool worked!" which implies the tool execution was successful
+    found = any("Tool worked!" in e.get("content", "") for e in entries)
+    assert found, "Tool result indicating success not found in history"
+    
+    os.remove(alias_mock)
+
+def test_gemini_cli_loop_termination(live_gui):
+    """
+    Test that multi-round tool calling correctly terminates and preserves 
+    payload (session context) between rounds.
+    """
+    client = ApiHookClient("http://127.0.0.1:8999")
+    client.click("btn_reset")
+    client.set_value("auto_add_history", True)
+    client.select_list_item("proj_files", "manual_slop")
+
+    # This uses the existing mock_gemini_cli.py which is already designed for 2 rounds
+    mock_script = os.path.abspath("tests/mock_gemini_cli.py")
+    cli_cmd = f'"{sys.executable}" "{mock_script}"'
+    client.set_value("current_provider", "gemini_cli")
+    client.set_value("gcli_path", cli_cmd)
+    
+    client.set_value("ai_input", "Perform multi-round tool test")
+    client.click("btn_gen_send")
+    
+    # Handle approvals (mock does one tool call)
+    timeout = 20
+    start_time = time.time()
+    approved = False
+    while time.time() - start_time < timeout:
+        for ev in client.get_events():
+            if ev.get("type") == "ask_received":
+                requests.post("http://127.0.0.1:8999/api/ask/respond", 
+                             json={"request_id": ev.get("request_id"), "response": {"approved": True}})
+                approved = True
+        if approved: break
+        time.sleep(0.5)
+        
+    # Wait for the second round and final answer
+    found_final = False
+    start_time = time.time()
+    while time.time() - start_time < 15:
+        session = client.get_session()
+        entries = session.get("session", {}).get("entries", [])
+        for e in entries:
+            if "processed the tool results" in e.get("content", ""):
+                found_final = True
+                break
+        if found_final: break
+        time.sleep(1)
+        
+    assert found_final, "Final message after multi-round tool loop not found"
--- a/tests/test_process_pending_gui_tasks.py
+++ b/tests/test_process_pending_gui_tasks.py
@@ -0,0 +1,64 @@
+import pytest
+from unittest.mock import MagicMock, patch
+import ai_client
+from gui_2 import App
+
+@pytest.fixture
+def app_instance():
+    with (
+        patch('gui_2.load_config', return_value={'ai': {'provider': 'gemini', 'model': 'gemini-2.5-flash-lite'}, 'projects': {}}),
+        patch('gui_2.save_config'),
+        patch('gui_2.project_manager'),
+        patch('gui_2.session_logger'),
+        patch('gui_2.immapp.run'),
+        patch.object(App, '_load_active_project'),
+        patch.object(App, '_fetch_models'),
+        patch.object(App, '_load_fonts'),
+        patch.object(App, '_post_init'),
+        patch('ai_client.set_provider'),
+        patch('ai_client.reset_session')
+    ):
+        app = App()
+        yield app
+
+def test_redundant_calls_in_process_pending_gui_tasks(app_instance):
+    # Setup
+    app_instance._pending_gui_tasks = [
+        {'action': 'set_value', 'item': 'current_provider', 'value': 'anthropic'}
+    ]
+    
+    with patch('ai_client.set_provider') as mock_set_provider, 
+         patch('ai_client.reset_session') as mock_reset_session:
+        
+        # We need to make sure the property setter's internal calls are also tracked or mocked.
+        # However, the App instance was created with mocked ai_client.
+        # Let's re-patch it specifically for this test.
+        
+        app_instance._process_pending_gui_tasks()
+        
+        # current_provider setter calls:
+        # ai_client.reset_session()
+        # ai_client.set_provider(value, self.current_model)
+        
+        # _process_pending_gui_tasks ALSO calls:
+        # ai_client.set_provider(self.current_provider, self.current_model)
+        # ai_client.reset_session()
+        
+        # Total should be 2 calls for each if redundant.
+        assert mock_set_provider.call_count == 2
+        assert mock_reset_session.call_count == 2
+
+def test_gcli_path_updates_adapter(app_instance):
+    # Setup
+    app_instance.current_provider = 'gemini_cli'
+    app_instance._pending_gui_tasks = [
+        {'action': 'set_value', 'item': 'gcli_path', 'value': '/new/path/to/gemini'}
+    ]
+    
+    # Initialize adapter if it doesn't exist (it shouldn't in mock env)
+    ai_client._gemini_cli_adapter = None
+    
+    app_instance._process_pending_gui_tasks()
+    
+    assert ai_client._gemini_cli_adapter is not None
+    assert ai_client._gemini_cli_adapter.binary_path == '/new/path/to/gemini'