feat(conductor): Restore mma_implementation track

2026-02-26 13:13:29 -05:00
parent ff1b2cbce0
commit 9e56245091
19 changed files with 339 additions and 68 deletions
--- a/.gemini/settings.json
+++ b/.gemini/settings.json
@@ -1,4 +1,7 @@
 {
  "tools": {
    "discoveryCommand": "python C:/projects/manual_slop/scripts/tool_discovery.py"
  },
  "hooks": {
    "BeforeTool": [
      {
--- a/ai_client.py
+++ b/ai_client.py
@@ -101,6 +101,7 @@ _ANTHROPIC_CHUNK_SIZE = 120_000
 _SYSTEM_PROMPT = (
    "You are a helpful coding assistant with access to a PowerShell tool and MCP tools (file access: read_file, list_directory, search_files, get_file_summary, web access: web_search, fetch_url). "
    "When calling file/directory tools, always use the 'path' parameter for the target path. "
    "When asked to create or edit files, prefer targeted edits over full rewrites. "
    "Always explain what you are doing before invoking the tool.\n\n"
    "When writing or rewriting large files (especially those containing quotes, backticks, or special characters), "
@@ -982,6 +983,9 @@ def _send_gemini_cli(md_content: str, user_message: str, base_dir: str,
                _append_comms("IN", "tool_result", {"name": name, "id": call_id, "output": out})
                events.emit("tool_execution", payload={"status": "completed", "tool": name, "result": out, "round": r_idx})
            # CRITICAL: Update payload for the next round
            payload = json.dumps(tool_results_for_cli)
            if _cumulative_tool_bytes > _MAX_TOOL_OUTPUT_BYTES:
                 _append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"})
--- a/api_hooks.py
+++ b/api_hooks.py
@@ -275,7 +275,11 @@ class HookServer:
        self.thread = None
    def start(self):
-        if not getattr(self.app, 'test_hooks_enabled', False):
+        if self.thread and self.thread.is_alive():
            return
        is_gemini_cli = getattr(self.app, 'current_provider', '') == 'gemini_cli'
        if not getattr(self.app, 'test_hooks_enabled', False) and not is_gemini_cli:
            return
        # Ensure the app has the task queue and lock initialized
--- a/conductor/tracks/archive/gemini_cli_parity_20260225/index.md
+++ b/conductor/tracks/archive/gemini_cli_parity_20260225/index.md
--- a/conductor/tracks/archive/gemini_cli_parity_20260225/metadata.json
+++ b/conductor/tracks/archive/gemini_cli_parity_20260225/metadata.json
--- a/conductor/tracks/archive/gemini_cli_parity_20260225/plan.md
+++ b/conductor/tracks/archive/gemini_cli_parity_20260225/plan.md
--- a/conductor/tracks/archive/gemini_cli_parity_20260225/spec.md
+++ b/conductor/tracks/archive/gemini_cli_parity_20260225/spec.md
--- a/config.toml
+++ b/config.toml
@@ -31,6 +31,7 @@ active = "C:\\projects\\manual_slop\\tests\\temp_project.toml"
 "Discussion Hub" = true
 "Operations Hub" = true
 Theme = true
 "Log Management" = false
 Diagnostics = true
 [headless]
--- a/gemini_cli_adapter.py
+++ b/gemini_cli_adapter.py
@@ -91,9 +91,13 @@ class GeminiCliAdapter:
                        if "session_id" in data:
                            self.session_id = data.get("session_id")
                    elif msg_type == "message":
-                        content = data.get("content", data.get("text"))
+                        # CRITICAL: Only accumulate content from the assistant/model role.
-                        if content:
+                        # The CLI echoes back the 'user' prompt in the stream, which we must skip.
-                            accumulated_text += content
+                        role = data.get("role", "")
                        if role in ["assistant", "model"]:
                            content = data.get("content", data.get("text"))
                            if content:
                                accumulated_text += content
                    elif msg_type == "result":
                        self.last_usage = data.get("stats") or data.get("usage")
                        if "session_id" in data:
--- a/gui_2.py
+++ b/gui_2.py
@@ -7,6 +7,7 @@ import json
 import sys
 import os
 import uuid
 import requests
 from pathlib import Path
 from tkinter import filedialog, Tk
 import aggregate
@@ -300,6 +301,10 @@ class App:
                    ai_client._gemini_cli_adapter = ai_client.GeminiCliAdapter(binary_path=self.ui_gemini_cli_path)
                else:
                    ai_client._gemini_cli_adapter.binary_path = self.ui_gemini_cli_path
                # Start hook server if not already running (required for bridge)
                if hasattr(self, 'hook_server'):
                    self.hook_server.start()
            self.available_models = []
            self._fetch_models(value)
@@ -807,9 +812,12 @@ class App:
                    if item in self._settable_fields:
                        attr_name = self._settable_fields[item]
                        setattr(self, attr_name, value)
-                        if item in ["current_provider", "current_model"]:
+                        
-                            ai_client.set_provider(self.current_provider, self.current_model)
+                        if item == "gcli_path":
-                            ai_client.reset_session()
+                             if not ai_client._gemini_cli_adapter:
                                 ai_client._gemini_cli_adapter = ai_client.GeminiCliAdapter(binary_path=value)
                             else:
                                 ai_client._gemini_cli_adapter.binary_path = value
                elif action == "click":
                    item = task.get("item")
@@ -1325,17 +1333,17 @@ class App:
            # ---- Menubar
            if imgui.begin_main_menu_bar():
                if imgui.begin_menu("manual slop"):
-                    if imgui.menu_item("Quit", "Ctrl+Q")[0]:
+                    if imgui.menu_item("Quit", "Ctrl+Q", False)[0]:
                        self.should_quit = True
                    imgui.end_menu()
                if imgui.begin_menu("View"):
                    for name in self.show_windows:
-                        _, self.show_windows[name] = imgui.menu_item(name, None, self.show_windows[name])
+                        _, self.show_windows[name] = imgui.menu_item(name, "", self.show_windows[name])
                    imgui.end_menu()
                if imgui.begin_menu("Project"):
-                    if imgui.menu_item("Save All", "Ctrl+S")[0]:
+                    if imgui.menu_item("Save All", "Ctrl+S", False)[0]:
                        self._flush_to_project()
                        self._save_active_project()
                        self._flush_to_config()
--- a/manualslop_layout.ini
+++ b/manualslop_layout.ini
@@ -79,7 +79,7 @@ DockId=0x0000000F,2
 [Window][Theme]
 Pos=0,17
-Size=588,545
+Size=348,545
 Collapsed=0
 DockId=0x00000005,1
@@ -89,14 +89,14 @@ Size=900,700
 Collapsed=0
 [Window][Diagnostics]
-Pos=590,17
+Pos=350,17
 Size=530,1183
 Collapsed=0
 DockId=0x0000000E,0
 [Window][Context Hub]
 Pos=0,17
-Size=588,545
+Size=348,545
 Collapsed=0
 DockId=0x00000005,0
@@ -107,34 +107,39 @@ Collapsed=0
 DockId=0x0000000D,0
 [Window][Discussion Hub]
-Pos=1122,17
+Pos=882,17
 Size=558,1183
 Collapsed=0
 DockId=0x00000004,0
 [Window][Operations Hub]
-Pos=590,17
+Pos=350,17
 Size=530,1183
 Collapsed=0
 DockId=0x0000000E,1
 [Window][Files & Media]
 Pos=0,564
-Size=588,636
+Size=348,636
 Collapsed=0
 DockId=0x00000006,1
 [Window][AI Settings]
 Pos=0,564
-Size=588,636
+Size=348,636
 Collapsed=0
 DockId=0x00000006,0
 [Window][Approve Tool Execution]
 Pos=512,437
 Size=416,325
 Collapsed=0
 [Docking][Data]
 DockNode              ID=0x00000008 Pos=3125,170 Size=593,1157 Split=Y
  DockNode            ID=0x00000009 Parent=0x00000008 SizeRef=1029,147 Selected=0x0469CA7A
  DockNode            ID=0x0000000A Parent=0x00000008 SizeRef=1029,145 Selected=0xDF822E02
-DockSpace             ID=0xAFC85805 Window=0x079D3A04 Pos=0,17 Size=1680,1183 Split=Y
+DockSpace             ID=0xAFC85805 Window=0x079D3A04 Pos=0,17 Size=1440,1183 Split=Y
  DockNode            ID=0x0000000C Parent=0xAFC85805 SizeRef=1362,1041 Split=X Selected=0x5D11106F
    DockNode          ID=0x00000003 Parent=0x0000000C SizeRef=1120,1183 Split=X
      DockNode        ID=0x0000000B Parent=0x00000003 SizeRef=404,1186 Split=Y Selected=0xF4139CA2
--- a/reproduce_delay.py
+++ b/reproduce_delay.py
@@ -1,18 +0,0 @@
 import time
 from ai_client import get_gemini_cache_stats
 def reproduce_delay():
    print("Starting reproduction of Gemini cache list delay...")
    start_time = time.time()
    try:
        stats = get_gemini_cache_stats()
        elapsed = (time.time() - start_time) * 1000.0
        print(f"get_gemini_cache_stats() took {elapsed:.2f}ms")
        print(f"Stats: {stats}")
    except Exception as e:
        print(f"Error calling get_gemini_cache_stats: {e}")
        print("Note: This might fail if no valid credentials.toml exists or API key is invalid.")
 if __name__ == "__main__":
    reproduce_delay()
--- a/reproduce_no_text.py
+++ b/reproduce_no_text.py
@@ -1,28 +0,0 @@
 import json
 import subprocess
 import os
 import time
 import sys
 # Add project root to sys.path
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".")))
 from gemini_cli_adapter import GeminiCliAdapter
 def test_repro():
    adapter = GeminiCliAdapter(binary_path="gemini")
    # Using a simple message
    message = "say hello"
    print(f"Sending message: '{message}'")
    result = adapter.send(message, model="gemini-3-flash-preview")
    print("\n--- Result ---")
    print(f"Text: '{result.get('text')}'")
    print(f"Tool Calls: {result.get('tool_calls')}")
    print(f"Usage: {adapter.last_usage}")
    print(f"Session ID: {adapter.session_id}")
    print(f"Stderr: {result.get('stderr')}")
 if __name__ == "__main__":
    test_repro()
--- a/scripts/cli_tool_bridge.py
+++ b/scripts/cli_tool_bridge.py
@@ -103,11 +103,12 @@ def main():
        # 4. Check context — if not running via Manual Slop, we pass through (allow)
        # This prevents the hook from affecting normal CLI usage.
        hook_context = os.environ.get("GEMINI_CLI_HOOK_CONTEXT")
        logging.debug(f"Checking GEMINI_CLI_HOOK_CONTEXT: '{hook_context}'")
        if hook_context != "manual_slop":
-             logging.debug("GEMINI_CLI_HOOK_CONTEXT not set to 'manual_slop'. Allowing execution without confirmation.")
+             logging.debug(f"GEMINI_CLI_HOOK_CONTEXT is '{hook_context}', NOT 'manual_slop'. Allowing execution without confirmation.")
             print(json.dumps({
                "decision": "allow",
-                "reason": "Non-programmatic usage (GEMINI_CLI_HOOK_CONTEXT not set)."
+                "reason": f"Non-programmatic usage (GEMINI_CLI_HOOK_CONTEXT={hook_context})."
            }))
             return
--- a/scripts/tool_discovery.py
+++ b/scripts/tool_discovery.py
@@ -0,0 +1,42 @@
 import json
 import sys
 import os
 # Add project root to sys.path
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 try:
    import mcp_client
 except ImportError:
    print("[]")
    sys.exit(0)
 def main():
    specs = list(mcp_client.MCP_TOOL_SPECS)
    # Add run_powershell (manually define to match ai_client.py)
    specs.append({
        "name": "run_powershell",
        "description": (
            "Run a PowerShell script within the project base_dir. "
            "Use this to create, edit, rename, or delete files and directories. "
            "The working directory is set to base_dir automatically. "
            "stdout and stderr are returned to you as the result."
        ),
        "parameters": {
            "type": "object",
            "properties": {
                "script": {
                    "type": "string",
                    "description": "The PowerShell script to execute."
                }
            },
            "required": ["script"]
        }
    })
    # Output as JSON array of FunctionDeclarations
    print(json.dumps(specs, indent=2))
 if __name__ == "__main__":
    main()
--- a/tests/mock_gemini_cli.py
+++ b/tests/mock_gemini_cli.py
@@ -6,9 +6,11 @@ import os
 def main():
    # Debug log to stderr
    sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n")
    sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
    # Read prompt from stdin
    try:
        # On Windows, stdin might be closed or behave weirdly if not handled
        prompt = sys.stdin.read()
    except EOFError:
        prompt = ""
@@ -47,6 +49,7 @@ def main():
    sys.stderr.flush()
    try:
        # CRITICAL: Use the current process environment to ensure GEMINI_CLI_HOOK_CONTEXT is passed
        process = subprocess.Popen(
            [sys.executable, bridge_path],
            stdin=subprocess.PIPE,
--- a/tests/temp_project_history.toml
+++ b/tests/temp_project_history.toml
@@ -4,7 +4,7 @@ roles = [
    "Vendor API",
    "System",
 ]
-active = "testing deepseek"
+active = "testing gemini-cli"
 auto_add = true
 [discussions.main]
@@ -16,5 +16,13 @@ history = [
 [discussions."testing deepseek"]
 git_commit = ""
-last_updated = "2026-02-26T00:29:48"
+last_updated = "2026-02-26T12:38:38"
 history = [
    "@2026-02-26T12:38:30\nUser:\nwasap deepseek?",
    "@2026-02-26T12:38:34\nAI:\nHey there! 👋 I'm DeepSeek, an AI assistant created by DeepSeek Company. I'm here to help you with various tasks like coding, writing, analysis, problem-solving, and more!\n\nI can:\n- Write and debug code in multiple programming languages\n- Help with technical questions and explanations\n- Assist with writing and content creation\n- Analyze documents and data\n- Search the web for current information (when you enable it)\n- Work with files and directories\n\nWhat can I help you with today? Whether it's coding, learning something new, or just having a conversation, I'm ready to assist! 😊",
 ]
 [discussions."testing gemini-cli"]
 git_commit = ""
 last_updated = "2026-02-26T13:08:53"
 history = []
--- a/tests/test_gemini_cli_edge_cases.py
+++ b/tests/test_gemini_cli_edge_cases.py
@@ -0,0 +1,170 @@
 import pytest
 import time
 import os
 import sys
 import requests
 import json
 from api_hook_client import ApiHookClient
 def test_gemini_cli_context_bleed_prevention(live_gui):
    """
    Test that the GeminiCliAdapter correctly filters out echoed 'user' messages
    and only shows assistant content in the GUI history.
    """
    client = ApiHookClient("http://127.0.0.1:8999")
    client.click("btn_reset")
    client.set_value("auto_add_history", True)
    # Create a specialized mock for context bleed
    bleed_mock = os.path.abspath("tests/mock_context_bleed.py")
    with open(bleed_mock, "w") as f:
        f.write('''import sys, json
 print(json.dumps({"type": "init", "session_id": "bleed-test"}), flush=True)
 print(json.dumps({"type": "message", "role": "user", "content": "I am echoing you"}), flush=True)
 print(json.dumps({"type": "message", "role": "assistant", "content": "Actual AI Response"}), flush=True)
 print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)
 ''')
    cli_cmd = f'"{sys.executable}" "{bleed_mock}"'
    client.set_value("current_provider", "gemini_cli")
    client.set_value("gcli_path", cli_cmd)
    client.set_value("ai_input", "Test context bleed")
    client.click("btn_gen_send")
    # Wait for completion
    time.sleep(3)
    session = client.get_session()
    entries = session.get("session", {}).get("entries", [])
    # Verify: We expect exactly one AI entry, and it must NOT contain the echoed user message
    ai_entries = [e for e in entries if e.get("role") == "AI"]
    assert len(ai_entries) == 1
    assert ai_entries[0].get("content") == "Actual AI Response"
    assert "echoing you" not in ai_entries[0].get("content")
    os.remove(bleed_mock)
 def test_gemini_cli_parameter_resilience(live_gui):
    """
    Test that mcp_client correctly handles 'file_path' and 'dir_path' aliases 
    sent by the AI instead of 'path'.
    """
    client = ApiHookClient("http://127.0.0.1:8999")
    client.click("btn_reset")
    client.set_value("auto_add_history", True)
    client.select_list_item("proj_files", "manual_slop")
    # Create a mock that uses dir_path for list_directory
    alias_mock = os.path.abspath("tests/mock_alias_tool.py")
    bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
    # Avoid backslashes in f-string expression part
    if sys.platform == "win32":
        bridge_path_str = bridge_path.replace("\\", "/")
    else:
        bridge_path_str = bridge_path
    with open(alias_tool_content := "tests/mock_alias_tool.py", "w") as f:
        f.write(f'''import sys, json, os, subprocess
 prompt = sys.stdin.read()
 if '"role": "tool"' in prompt:
    print(json.dumps({{"type": "message", "role": "assistant", "content": "Tool worked!"}}), flush=True)
    print(json.dumps({{"type": "result", "stats": {{"total_tokens": 20}}}}), flush=True)
 else:
    # We must call the bridge to trigger the GUI approval!
    tool_call = {{"name": "list_directory", "input": {{"dir_path": "."}}}}
    bridge_cmd = [sys.executable, "{bridge_path_str}"]
    proc = subprocess.Popen(bridge_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True)
    stdout, _ = proc.communicate(input=json.dumps(tool_call))
    # Even if bridge says allow, we emit the tool_use to the adapter
    print(json.dumps({{"type": "message", "role": "assistant", "content": "I will list the directory."}}), flush=True)
    print(json.dumps({{
        "type": "tool_use", 
        "name": "list_directory", 
        "id": "alias_call",
        "args": {{"dir_path": "."}} 
    }}), flush=True)
    print(json.dumps({{"type": "result", "stats": {{"total_tokens": 10}}}}), flush=True)
 ''')
    cli_cmd = f'"{sys.executable}" "{alias_mock}"'
    client.set_value("current_provider", "gemini_cli")
    client.set_value("gcli_path", cli_cmd)
    client.set_value("ai_input", "Test parameter aliases")
    client.click("btn_gen_send")
    # Handle approval
    timeout = 15
    start_time = time.time()
    approved = False
    while time.time() - start_time < timeout:
        for ev in client.get_events():
            if ev.get("type") == "ask_received":
                requests.post("http://127.0.0.1:8999/api/ask/respond", 
                             json={"request_id": ev.get("request_id"), "response": {"approved": True}})
                approved = True
        if approved: break
        time.sleep(0.5)
    assert approved, "Tool approval event never received"
    # Verify tool result in history
    time.sleep(2)
    session = client.get_session()
    entries = session.get("session", {}).get("entries", [])
    # Check for "Tool worked!" which implies the tool execution was successful
    found = any("Tool worked!" in e.get("content", "") for e in entries)
    assert found, "Tool result indicating success not found in history"
    os.remove(alias_mock)
 def test_gemini_cli_loop_termination(live_gui):
    """
    Test that multi-round tool calling correctly terminates and preserves 
    payload (session context) between rounds.
    """
    client = ApiHookClient("http://127.0.0.1:8999")
    client.click("btn_reset")
    client.set_value("auto_add_history", True)
    client.select_list_item("proj_files", "manual_slop")
    # This uses the existing mock_gemini_cli.py which is already designed for 2 rounds
    mock_script = os.path.abspath("tests/mock_gemini_cli.py")
    cli_cmd = f'"{sys.executable}" "{mock_script}"'
    client.set_value("current_provider", "gemini_cli")
    client.set_value("gcli_path", cli_cmd)
    client.set_value("ai_input", "Perform multi-round tool test")
    client.click("btn_gen_send")
    # Handle approvals (mock does one tool call)
    timeout = 20
    start_time = time.time()
    approved = False
    while time.time() - start_time < timeout:
        for ev in client.get_events():
            if ev.get("type") == "ask_received":
                requests.post("http://127.0.0.1:8999/api/ask/respond", 
                             json={"request_id": ev.get("request_id"), "response": {"approved": True}})
                approved = True
        if approved: break
        time.sleep(0.5)
    # Wait for the second round and final answer
    found_final = False
    start_time = time.time()
    while time.time() - start_time < 15:
        session = client.get_session()
        entries = session.get("session", {}).get("entries", [])
        for e in entries:
            if "processed the tool results" in e.get("content", ""):
                found_final = True
                break
        if found_final: break
        time.sleep(1)
    assert found_final, "Final message after multi-round tool loop not found"
--- a/tests/test_process_pending_gui_tasks.py
+++ b/tests/test_process_pending_gui_tasks.py
@@ -0,0 +1,64 @@
 import pytest
 from unittest.mock import MagicMock, patch
 import ai_client
 from gui_2 import App
@pytest.fixture
 def app_instance():
    with (
        patch('gui_2.load_config', return_value={'ai': {'provider': 'gemini', 'model': 'gemini-2.5-flash-lite'}, 'projects': {}}),
        patch('gui_2.save_config'),
        patch('gui_2.project_manager'),
        patch('gui_2.session_logger'),
        patch('gui_2.immapp.run'),
        patch.object(App, '_load_active_project'),
        patch.object(App, '_fetch_models'),
        patch.object(App, '_load_fonts'),
        patch.object(App, '_post_init'),
        patch('ai_client.set_provider'),
        patch('ai_client.reset_session')
    ):
        app = App()
        yield app
 def test_redundant_calls_in_process_pending_gui_tasks(app_instance):
    # Setup
    app_instance._pending_gui_tasks = [
        {'action': 'set_value', 'item': 'current_provider', 'value': 'anthropic'}
    ]
    with patch('ai_client.set_provider') as mock_set_provider, 
         patch('ai_client.reset_session') as mock_reset_session:
        # We need to make sure the property setter's internal calls are also tracked or mocked.
        # However, the App instance was created with mocked ai_client.
        # Let's re-patch it specifically for this test.
        app_instance._process_pending_gui_tasks()
        # current_provider setter calls:
        # ai_client.reset_session()
        # ai_client.set_provider(value, self.current_model)
        # _process_pending_gui_tasks ALSO calls:
        # ai_client.set_provider(self.current_provider, self.current_model)
        # ai_client.reset_session()
        # Total should be 2 calls for each if redundant.
        assert mock_set_provider.call_count == 2
        assert mock_reset_session.call_count == 2
 def test_gcli_path_updates_adapter(app_instance):
    # Setup
    app_instance.current_provider = 'gemini_cli'
    app_instance._pending_gui_tasks = [
        {'action': 'set_value', 'item': 'gcli_path', 'value': '/new/path/to/gemini'}
    ]
    # Initialize adapter if it doesn't exist (it shouldn't in mock env)
    ai_client._gemini_cli_adapter = None
    app_instance._process_pending_gui_tasks()
    assert ai_client._gemini_cli_adapter is not None
    assert ai_client._gemini_cli_adapter.binary_path == '/new/path/to/gemini'