feat(conductor): Restore mma_implementation track

2026-02-26 13:13:29 -05:00
parent ff1b2cbce0
commit 9e56245091
19 changed files with 339 additions and 68 deletions
--- a/tests/mock_gemini_cli.py
+++ b/tests/mock_gemini_cli.py
@@ -6,9 +6,11 @@ import os
 def main():
    # Debug log to stderr
    sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n")
+    sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
    
    # Read prompt from stdin
    try:
+        # On Windows, stdin might be closed or behave weirdly if not handled
        prompt = sys.stdin.read()
    except EOFError:
        prompt = ""
@@ -47,6 +49,7 @@ def main():
    sys.stderr.flush()

    try:
+        # CRITICAL: Use the current process environment to ensure GEMINI_CLI_HOOK_CONTEXT is passed
        process = subprocess.Popen(
            [sys.executable, bridge_path],
            stdin=subprocess.PIPE,
--- a/tests/temp_project_history.toml
+++ b/tests/temp_project_history.toml
@@ -4,7 +4,7 @@ roles = [
    "Vendor API",
    "System",
 ]
-active = "testing deepseek"
+active = "testing gemini-cli"
 auto_add = true

 [discussions.main]
@@ -16,5 +16,13 @@ history = [

 [discussions."testing deepseek"]
 git_commit = ""
-last_updated = "2026-02-26T00:29:48"
+last_updated = "2026-02-26T12:38:38"
+history = [
+    "@2026-02-26T12:38:30\nUser:\nwasap deepseek?",
+    "@2026-02-26T12:38:34\nAI:\nHey there! 👋 I'm DeepSeek, an AI assistant created by DeepSeek Company. I'm here to help you with various tasks like coding, writing, analysis, problem-solving, and more!\n\nI can:\n- Write and debug code in multiple programming languages\n- Help with technical questions and explanations\n- Assist with writing and content creation\n- Analyze documents and data\n- Search the web for current information (when you enable it)\n- Work with files and directories\n\nWhat can I help you with today? Whether it's coding, learning something new, or just having a conversation, I'm ready to assist! 😊",
+]
+
+[discussions."testing gemini-cli"]
+git_commit = ""
+last_updated = "2026-02-26T13:08:53"
 history = []
--- a/tests/test_gemini_cli_edge_cases.py
+++ b/tests/test_gemini_cli_edge_cases.py
@@ -0,0 +1,170 @@
+import pytest
+import time
+import os
+import sys
+import requests
+import json
+from api_hook_client import ApiHookClient
+
+def test_gemini_cli_context_bleed_prevention(live_gui):
+    """
+    Test that the GeminiCliAdapter correctly filters out echoed 'user' messages
+    and only shows assistant content in the GUI history.
+    """
+    client = ApiHookClient("http://127.0.0.1:8999")
+    client.click("btn_reset")
+    client.set_value("auto_add_history", True)
+    
+    # Create a specialized mock for context bleed
+    bleed_mock = os.path.abspath("tests/mock_context_bleed.py")
+    with open(bleed_mock, "w") as f:
+        f.write('''import sys, json
+print(json.dumps({"type": "init", "session_id": "bleed-test"}), flush=True)
+print(json.dumps({"type": "message", "role": "user", "content": "I am echoing you"}), flush=True)
+print(json.dumps({"type": "message", "role": "assistant", "content": "Actual AI Response"}), flush=True)
+print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)
+''')
+    
+    cli_cmd = f'"{sys.executable}" "{bleed_mock}"'
+    client.set_value("current_provider", "gemini_cli")
+    client.set_value("gcli_path", cli_cmd)
+    
+    client.set_value("ai_input", "Test context bleed")
+    client.click("btn_gen_send")
+    
+    # Wait for completion
+    time.sleep(3)
+    
+    session = client.get_session()
+    entries = session.get("session", {}).get("entries", [])
+    
+    # Verify: We expect exactly one AI entry, and it must NOT contain the echoed user message
+    ai_entries = [e for e in entries if e.get("role") == "AI"]
+    assert len(ai_entries) == 1
+    assert ai_entries[0].get("content") == "Actual AI Response"
+    assert "echoing you" not in ai_entries[0].get("content")
+    
+    os.remove(bleed_mock)
+
+def test_gemini_cli_parameter_resilience(live_gui):
+    """
+    Test that mcp_client correctly handles 'file_path' and 'dir_path' aliases 
+    sent by the AI instead of 'path'.
+    """
+    client = ApiHookClient("http://127.0.0.1:8999")
+    client.click("btn_reset")
+    client.set_value("auto_add_history", True)
+    client.select_list_item("proj_files", "manual_slop")
+
+    # Create a mock that uses dir_path for list_directory
+    alias_mock = os.path.abspath("tests/mock_alias_tool.py")
+    bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
+    # Avoid backslashes in f-string expression part
+    if sys.platform == "win32":
+        bridge_path_str = bridge_path.replace("\\", "/")
+    else:
+        bridge_path_str = bridge_path
+        
+    with open(alias_tool_content := "tests/mock_alias_tool.py", "w") as f:
+        f.write(f'''import sys, json, os, subprocess
+prompt = sys.stdin.read()
+if '"role": "tool"' in prompt:
+    print(json.dumps({{"type": "message", "role": "assistant", "content": "Tool worked!"}}), flush=True)
+    print(json.dumps({{"type": "result", "stats": {{"total_tokens": 20}}}}), flush=True)
+else:
+    # We must call the bridge to trigger the GUI approval!
+    tool_call = {{"name": "list_directory", "input": {{"dir_path": "."}}}}
+    bridge_cmd = [sys.executable, "{bridge_path_str}"]
+    proc = subprocess.Popen(bridge_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True)
+    stdout, _ = proc.communicate(input=json.dumps(tool_call))
+    
+    # Even if bridge says allow, we emit the tool_use to the adapter
+    print(json.dumps({{"type": "message", "role": "assistant", "content": "I will list the directory."}}), flush=True)
+    print(json.dumps({{
+        "type": "tool_use", 
+        "name": "list_directory", 
+        "id": "alias_call",
+        "args": {{"dir_path": "."}} 
+    }}), flush=True)
+    print(json.dumps({{"type": "result", "stats": {{"total_tokens": 10}}}}), flush=True)
+''')
+    
+    cli_cmd = f'"{sys.executable}" "{alias_mock}"'
+    client.set_value("current_provider", "gemini_cli")
+    client.set_value("gcli_path", cli_cmd)
+    
+    client.set_value("ai_input", "Test parameter aliases")
+    client.click("btn_gen_send")
+    
+    # Handle approval
+    timeout = 15
+    start_time = time.time()
+    approved = False
+    while time.time() - start_time < timeout:
+        for ev in client.get_events():
+            if ev.get("type") == "ask_received":
+                requests.post("http://127.0.0.1:8999/api/ask/respond", 
+                             json={"request_id": ev.get("request_id"), "response": {"approved": True}})
+                approved = True
+        if approved: break
+        time.sleep(0.5)
+        
+    assert approved, "Tool approval event never received"
+    
+    # Verify tool result in history
+    time.sleep(2)
+    session = client.get_session()
+    entries = session.get("session", {}).get("entries", [])
+    
+    # Check for "Tool worked!" which implies the tool execution was successful
+    found = any("Tool worked!" in e.get("content", "") for e in entries)
+    assert found, "Tool result indicating success not found in history"
+    
+    os.remove(alias_mock)
+
+def test_gemini_cli_loop_termination(live_gui):
+    """
+    Test that multi-round tool calling correctly terminates and preserves 
+    payload (session context) between rounds.
+    """
+    client = ApiHookClient("http://127.0.0.1:8999")
+    client.click("btn_reset")
+    client.set_value("auto_add_history", True)
+    client.select_list_item("proj_files", "manual_slop")
+
+    # This uses the existing mock_gemini_cli.py which is already designed for 2 rounds
+    mock_script = os.path.abspath("tests/mock_gemini_cli.py")
+    cli_cmd = f'"{sys.executable}" "{mock_script}"'
+    client.set_value("current_provider", "gemini_cli")
+    client.set_value("gcli_path", cli_cmd)
+    
+    client.set_value("ai_input", "Perform multi-round tool test")
+    client.click("btn_gen_send")
+    
+    # Handle approvals (mock does one tool call)
+    timeout = 20
+    start_time = time.time()
+    approved = False
+    while time.time() - start_time < timeout:
+        for ev in client.get_events():
+            if ev.get("type") == "ask_received":
+                requests.post("http://127.0.0.1:8999/api/ask/respond", 
+                             json={"request_id": ev.get("request_id"), "response": {"approved": True}})
+                approved = True
+        if approved: break
+        time.sleep(0.5)
+        
+    # Wait for the second round and final answer
+    found_final = False
+    start_time = time.time()
+    while time.time() - start_time < 15:
+        session = client.get_session()
+        entries = session.get("session", {}).get("entries", [])
+        for e in entries:
+            if "processed the tool results" in e.get("content", ""):
+                found_final = True
+                break
+        if found_final: break
+        time.sleep(1)
+        
+    assert found_final, "Final message after multi-round tool loop not found"
--- a/tests/test_process_pending_gui_tasks.py
+++ b/tests/test_process_pending_gui_tasks.py
@@ -0,0 +1,64 @@
+import pytest
+from unittest.mock import MagicMock, patch
+import ai_client
+from gui_2 import App
+
+@pytest.fixture
+def app_instance():
+    with (
+        patch('gui_2.load_config', return_value={'ai': {'provider': 'gemini', 'model': 'gemini-2.5-flash-lite'}, 'projects': {}}),
+        patch('gui_2.save_config'),
+        patch('gui_2.project_manager'),
+        patch('gui_2.session_logger'),
+        patch('gui_2.immapp.run'),
+        patch.object(App, '_load_active_project'),
+        patch.object(App, '_fetch_models'),
+        patch.object(App, '_load_fonts'),
+        patch.object(App, '_post_init'),
+        patch('ai_client.set_provider'),
+        patch('ai_client.reset_session')
+    ):
+        app = App()
+        yield app
+
+def test_redundant_calls_in_process_pending_gui_tasks(app_instance):
+    # Setup
+    app_instance._pending_gui_tasks = [
+        {'action': 'set_value', 'item': 'current_provider', 'value': 'anthropic'}
+    ]
+    
+    with patch('ai_client.set_provider') as mock_set_provider, 
+         patch('ai_client.reset_session') as mock_reset_session:
+        
+        # We need to make sure the property setter's internal calls are also tracked or mocked.
+        # However, the App instance was created with mocked ai_client.
+        # Let's re-patch it specifically for this test.
+        
+        app_instance._process_pending_gui_tasks()
+        
+        # current_provider setter calls:
+        # ai_client.reset_session()
+        # ai_client.set_provider(value, self.current_model)
+        
+        # _process_pending_gui_tasks ALSO calls:
+        # ai_client.set_provider(self.current_provider, self.current_model)
+        # ai_client.reset_session()
+        
+        # Total should be 2 calls for each if redundant.
+        assert mock_set_provider.call_count == 2
+        assert mock_reset_session.call_count == 2
+
+def test_gcli_path_updates_adapter(app_instance):
+    # Setup
+    app_instance.current_provider = 'gemini_cli'
+    app_instance._pending_gui_tasks = [
+        {'action': 'set_value', 'item': 'gcli_path', 'value': '/new/path/to/gemini'}
+    ]
+    
+    # Initialize adapter if it doesn't exist (it shouldn't in mock env)
+    ai_client._gemini_cli_adapter = None
+    
+    app_instance._process_pending_gui_tasks()
+    
+    assert ai_client._gemini_cli_adapter is not None
+    assert ai_client._gemini_cli_adapter.binary_path == '/new/path/to/gemini'