diff --git a/.gemini/settings.json b/.gemini/settings.json index 11e40f6..618442e 100644 --- a/.gemini/settings.json +++ b/.gemini/settings.json @@ -1,4 +1,7 @@ { + "tools": { + "discoveryCommand": "python C:/projects/manual_slop/scripts/tool_discovery.py" + }, "hooks": { "BeforeTool": [ { diff --git a/ai_client.py b/ai_client.py index 7574e61..72fa727 100644 --- a/ai_client.py +++ b/ai_client.py @@ -101,6 +101,7 @@ _ANTHROPIC_CHUNK_SIZE = 120_000 _SYSTEM_PROMPT = ( "You are a helpful coding assistant with access to a PowerShell tool and MCP tools (file access: read_file, list_directory, search_files, get_file_summary, web access: web_search, fetch_url). " + "When calling file/directory tools, always use the 'path' parameter for the target path. " "When asked to create or edit files, prefer targeted edits over full rewrites. " "Always explain what you are doing before invoking the tool.\n\n" "When writing or rewriting large files (especially those containing quotes, backticks, or special characters), " @@ -982,6 +983,9 @@ def _send_gemini_cli(md_content: str, user_message: str, base_dir: str, _append_comms("IN", "tool_result", {"name": name, "id": call_id, "output": out}) events.emit("tool_execution", payload={"status": "completed", "tool": name, "result": out, "round": r_idx}) + + # CRITICAL: Update payload for the next round + payload = json.dumps(tool_results_for_cli) if _cumulative_tool_bytes > _MAX_TOOL_OUTPUT_BYTES: _append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"}) diff --git a/api_hooks.py b/api_hooks.py index ae8fda5..d9c874a 100644 --- a/api_hooks.py +++ b/api_hooks.py @@ -275,7 +275,11 @@ class HookServer: self.thread = None def start(self): - if not getattr(self.app, 'test_hooks_enabled', False): + if self.thread and self.thread.is_alive(): + return + + is_gemini_cli = getattr(self.app, 'current_provider', '') == 'gemini_cli' + if not getattr(self.app, 'test_hooks_enabled', False) and not is_gemini_cli: return # Ensure the app has the task queue and lock initialized diff --git a/conductor/tracks/gemini_cli_parity_20260225/index.md b/conductor/tracks/archive/gemini_cli_parity_20260225/index.md similarity index 100% rename from conductor/tracks/gemini_cli_parity_20260225/index.md rename to conductor/tracks/archive/gemini_cli_parity_20260225/index.md diff --git a/conductor/tracks/gemini_cli_parity_20260225/metadata.json b/conductor/tracks/archive/gemini_cli_parity_20260225/metadata.json similarity index 100% rename from conductor/tracks/gemini_cli_parity_20260225/metadata.json rename to conductor/tracks/archive/gemini_cli_parity_20260225/metadata.json diff --git a/conductor/tracks/gemini_cli_parity_20260225/plan.md b/conductor/tracks/archive/gemini_cli_parity_20260225/plan.md similarity index 100% rename from conductor/tracks/gemini_cli_parity_20260225/plan.md rename to conductor/tracks/archive/gemini_cli_parity_20260225/plan.md diff --git a/conductor/tracks/gemini_cli_parity_20260225/spec.md b/conductor/tracks/archive/gemini_cli_parity_20260225/spec.md similarity index 100% rename from conductor/tracks/gemini_cli_parity_20260225/spec.md rename to conductor/tracks/archive/gemini_cli_parity_20260225/spec.md diff --git a/config.toml b/config.toml index 5b6b34e..7e4ed17 100644 --- a/config.toml +++ b/config.toml @@ -31,6 +31,7 @@ active = "C:\\projects\\manual_slop\\tests\\temp_project.toml" "Discussion Hub" = true "Operations Hub" = true Theme = true +"Log Management" = false Diagnostics = true [headless] diff --git a/gemini_cli_adapter.py b/gemini_cli_adapter.py index 2870656..3cdb552 100644 --- a/gemini_cli_adapter.py +++ b/gemini_cli_adapter.py @@ -91,9 +91,13 @@ class GeminiCliAdapter: if "session_id" in data: self.session_id = data.get("session_id") elif msg_type == "message": - content = data.get("content", data.get("text")) - if content: - accumulated_text += content + # CRITICAL: Only accumulate content from the assistant/model role. + # The CLI echoes back the 'user' prompt in the stream, which we must skip. + role = data.get("role", "") + if role in ["assistant", "model"]: + content = data.get("content", data.get("text")) + if content: + accumulated_text += content elif msg_type == "result": self.last_usage = data.get("stats") or data.get("usage") if "session_id" in data: diff --git a/gui_2.py b/gui_2.py index 896cea0..e2a5d7f 100644 --- a/gui_2.py +++ b/gui_2.py @@ -7,6 +7,7 @@ import json import sys import os import uuid +import requests from pathlib import Path from tkinter import filedialog, Tk import aggregate @@ -300,6 +301,10 @@ class App: ai_client._gemini_cli_adapter = ai_client.GeminiCliAdapter(binary_path=self.ui_gemini_cli_path) else: ai_client._gemini_cli_adapter.binary_path = self.ui_gemini_cli_path + + # Start hook server if not already running (required for bridge) + if hasattr(self, 'hook_server'): + self.hook_server.start() self.available_models = [] self._fetch_models(value) @@ -807,9 +812,12 @@ class App: if item in self._settable_fields: attr_name = self._settable_fields[item] setattr(self, attr_name, value) - if item in ["current_provider", "current_model"]: - ai_client.set_provider(self.current_provider, self.current_model) - ai_client.reset_session() + + if item == "gcli_path": + if not ai_client._gemini_cli_adapter: + ai_client._gemini_cli_adapter = ai_client.GeminiCliAdapter(binary_path=value) + else: + ai_client._gemini_cli_adapter.binary_path = value elif action == "click": item = task.get("item") @@ -1325,17 +1333,17 @@ class App: # ---- Menubar if imgui.begin_main_menu_bar(): if imgui.begin_menu("manual slop"): - if imgui.menu_item("Quit", "Ctrl+Q")[0]: + if imgui.menu_item("Quit", "Ctrl+Q", False)[0]: self.should_quit = True imgui.end_menu() if imgui.begin_menu("View"): for name in self.show_windows: - _, self.show_windows[name] = imgui.menu_item(name, None, self.show_windows[name]) + _, self.show_windows[name] = imgui.menu_item(name, "", self.show_windows[name]) imgui.end_menu() if imgui.begin_menu("Project"): - if imgui.menu_item("Save All", "Ctrl+S")[0]: + if imgui.menu_item("Save All", "Ctrl+S", False)[0]: self._flush_to_project() self._save_active_project() self._flush_to_config() diff --git a/manualslop_layout.ini b/manualslop_layout.ini index 051c0c5..0eeebb1 100644 --- a/manualslop_layout.ini +++ b/manualslop_layout.ini @@ -79,7 +79,7 @@ DockId=0x0000000F,2 [Window][Theme] Pos=0,17 -Size=588,545 +Size=348,545 Collapsed=0 DockId=0x00000005,1 @@ -89,14 +89,14 @@ Size=900,700 Collapsed=0 [Window][Diagnostics] -Pos=590,17 +Pos=350,17 Size=530,1183 Collapsed=0 DockId=0x0000000E,0 [Window][Context Hub] Pos=0,17 -Size=588,545 +Size=348,545 Collapsed=0 DockId=0x00000005,0 @@ -107,34 +107,39 @@ Collapsed=0 DockId=0x0000000D,0 [Window][Discussion Hub] -Pos=1122,17 +Pos=882,17 Size=558,1183 Collapsed=0 DockId=0x00000004,0 [Window][Operations Hub] -Pos=590,17 +Pos=350,17 Size=530,1183 Collapsed=0 DockId=0x0000000E,1 [Window][Files & Media] Pos=0,564 -Size=588,636 +Size=348,636 Collapsed=0 DockId=0x00000006,1 [Window][AI Settings] Pos=0,564 -Size=588,636 +Size=348,636 Collapsed=0 DockId=0x00000006,0 +[Window][Approve Tool Execution] +Pos=512,437 +Size=416,325 +Collapsed=0 + [Docking][Data] DockNode ID=0x00000008 Pos=3125,170 Size=593,1157 Split=Y DockNode ID=0x00000009 Parent=0x00000008 SizeRef=1029,147 Selected=0x0469CA7A DockNode ID=0x0000000A Parent=0x00000008 SizeRef=1029,145 Selected=0xDF822E02 -DockSpace ID=0xAFC85805 Window=0x079D3A04 Pos=0,17 Size=1680,1183 Split=Y +DockSpace ID=0xAFC85805 Window=0x079D3A04 Pos=0,17 Size=1440,1183 Split=Y DockNode ID=0x0000000C Parent=0xAFC85805 SizeRef=1362,1041 Split=X Selected=0x5D11106F DockNode ID=0x00000003 Parent=0x0000000C SizeRef=1120,1183 Split=X DockNode ID=0x0000000B Parent=0x00000003 SizeRef=404,1186 Split=Y Selected=0xF4139CA2 diff --git a/reproduce_delay.py b/reproduce_delay.py deleted file mode 100644 index 7707559..0000000 --- a/reproduce_delay.py +++ /dev/null @@ -1,18 +0,0 @@ -import time -from ai_client import get_gemini_cache_stats - -def reproduce_delay(): - print("Starting reproduction of Gemini cache list delay...") - - start_time = time.time() - try: - stats = get_gemini_cache_stats() - elapsed = (time.time() - start_time) * 1000.0 - print(f"get_gemini_cache_stats() took {elapsed:.2f}ms") - print(f"Stats: {stats}") - except Exception as e: - print(f"Error calling get_gemini_cache_stats: {e}") - print("Note: This might fail if no valid credentials.toml exists or API key is invalid.") - -if __name__ == "__main__": - reproduce_delay() diff --git a/reproduce_no_text.py b/reproduce_no_text.py deleted file mode 100644 index d07cd68..0000000 --- a/reproduce_no_text.py +++ /dev/null @@ -1,28 +0,0 @@ -import json -import subprocess -import os -import time -import sys - -# Add project root to sys.path -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "."))) - -from gemini_cli_adapter import GeminiCliAdapter - -def test_repro(): - adapter = GeminiCliAdapter(binary_path="gemini") - # Using a simple message - message = "say hello" - print(f"Sending message: '{message}'") - - result = adapter.send(message, model="gemini-3-flash-preview") - - print("\n--- Result ---") - print(f"Text: '{result.get('text')}'") - print(f"Tool Calls: {result.get('tool_calls')}") - print(f"Usage: {adapter.last_usage}") - print(f"Session ID: {adapter.session_id}") - print(f"Stderr: {result.get('stderr')}") - -if __name__ == "__main__": - test_repro() diff --git a/scripts/cli_tool_bridge.py b/scripts/cli_tool_bridge.py index abf0148..8c00437 100644 --- a/scripts/cli_tool_bridge.py +++ b/scripts/cli_tool_bridge.py @@ -103,11 +103,12 @@ def main(): # 4. Check context — if not running via Manual Slop, we pass through (allow) # This prevents the hook from affecting normal CLI usage. hook_context = os.environ.get("GEMINI_CLI_HOOK_CONTEXT") + logging.debug(f"Checking GEMINI_CLI_HOOK_CONTEXT: '{hook_context}'") if hook_context != "manual_slop": - logging.debug("GEMINI_CLI_HOOK_CONTEXT not set to 'manual_slop'. Allowing execution without confirmation.") + logging.debug(f"GEMINI_CLI_HOOK_CONTEXT is '{hook_context}', NOT 'manual_slop'. Allowing execution without confirmation.") print(json.dumps({ "decision": "allow", - "reason": "Non-programmatic usage (GEMINI_CLI_HOOK_CONTEXT not set)." + "reason": f"Non-programmatic usage (GEMINI_CLI_HOOK_CONTEXT={hook_context})." })) return diff --git a/scripts/tool_discovery.py b/scripts/tool_discovery.py new file mode 100644 index 0000000..a5316c6 --- /dev/null +++ b/scripts/tool_discovery.py @@ -0,0 +1,42 @@ +import json +import sys +import os + +# Add project root to sys.path +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + +try: + import mcp_client +except ImportError: + print("[]") + sys.exit(0) + +def main(): + specs = list(mcp_client.MCP_TOOL_SPECS) + + # Add run_powershell (manually define to match ai_client.py) + specs.append({ + "name": "run_powershell", + "description": ( + "Run a PowerShell script within the project base_dir. " + "Use this to create, edit, rename, or delete files and directories. " + "The working directory is set to base_dir automatically. " + "stdout and stderr are returned to you as the result." + ), + "parameters": { + "type": "object", + "properties": { + "script": { + "type": "string", + "description": "The PowerShell script to execute." + } + }, + "required": ["script"] + } + }) + + # Output as JSON array of FunctionDeclarations + print(json.dumps(specs, indent=2)) + +if __name__ == "__main__": + main() diff --git a/tests/mock_gemini_cli.py b/tests/mock_gemini_cli.py index a58a0cb..d175a55 100644 --- a/tests/mock_gemini_cli.py +++ b/tests/mock_gemini_cli.py @@ -6,9 +6,11 @@ import os def main(): # Debug log to stderr sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n") + sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n") # Read prompt from stdin try: + # On Windows, stdin might be closed or behave weirdly if not handled prompt = sys.stdin.read() except EOFError: prompt = "" @@ -47,6 +49,7 @@ def main(): sys.stderr.flush() try: + # CRITICAL: Use the current process environment to ensure GEMINI_CLI_HOOK_CONTEXT is passed process = subprocess.Popen( [sys.executable, bridge_path], stdin=subprocess.PIPE, diff --git a/tests/temp_project_history.toml b/tests/temp_project_history.toml index fd6e190..afd2ada 100644 --- a/tests/temp_project_history.toml +++ b/tests/temp_project_history.toml @@ -4,7 +4,7 @@ roles = [ "Vendor API", "System", ] -active = "testing deepseek" +active = "testing gemini-cli" auto_add = true [discussions.main] @@ -16,5 +16,13 @@ history = [ [discussions."testing deepseek"] git_commit = "" -last_updated = "2026-02-26T00:29:48" +last_updated = "2026-02-26T12:38:38" +history = [ + "@2026-02-26T12:38:30\nUser:\nwasap deepseek?", + "@2026-02-26T12:38:34\nAI:\nHey there! 👋 I'm DeepSeek, an AI assistant created by DeepSeek Company. I'm here to help you with various tasks like coding, writing, analysis, problem-solving, and more!\n\nI can:\n- Write and debug code in multiple programming languages\n- Help with technical questions and explanations\n- Assist with writing and content creation\n- Analyze documents and data\n- Search the web for current information (when you enable it)\n- Work with files and directories\n\nWhat can I help you with today? Whether it's coding, learning something new, or just having a conversation, I'm ready to assist! 😊", +] + +[discussions."testing gemini-cli"] +git_commit = "" +last_updated = "2026-02-26T13:08:53" history = [] diff --git a/tests/test_gemini_cli_edge_cases.py b/tests/test_gemini_cli_edge_cases.py new file mode 100644 index 0000000..5b28476 --- /dev/null +++ b/tests/test_gemini_cli_edge_cases.py @@ -0,0 +1,170 @@ +import pytest +import time +import os +import sys +import requests +import json +from api_hook_client import ApiHookClient + +def test_gemini_cli_context_bleed_prevention(live_gui): + """ + Test that the GeminiCliAdapter correctly filters out echoed 'user' messages + and only shows assistant content in the GUI history. + """ + client = ApiHookClient("http://127.0.0.1:8999") + client.click("btn_reset") + client.set_value("auto_add_history", True) + + # Create a specialized mock for context bleed + bleed_mock = os.path.abspath("tests/mock_context_bleed.py") + with open(bleed_mock, "w") as f: + f.write('''import sys, json +print(json.dumps({"type": "init", "session_id": "bleed-test"}), flush=True) +print(json.dumps({"type": "message", "role": "user", "content": "I am echoing you"}), flush=True) +print(json.dumps({"type": "message", "role": "assistant", "content": "Actual AI Response"}), flush=True) +print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True) +''') + + cli_cmd = f'"{sys.executable}" "{bleed_mock}"' + client.set_value("current_provider", "gemini_cli") + client.set_value("gcli_path", cli_cmd) + + client.set_value("ai_input", "Test context bleed") + client.click("btn_gen_send") + + # Wait for completion + time.sleep(3) + + session = client.get_session() + entries = session.get("session", {}).get("entries", []) + + # Verify: We expect exactly one AI entry, and it must NOT contain the echoed user message + ai_entries = [e for e in entries if e.get("role") == "AI"] + assert len(ai_entries) == 1 + assert ai_entries[0].get("content") == "Actual AI Response" + assert "echoing you" not in ai_entries[0].get("content") + + os.remove(bleed_mock) + +def test_gemini_cli_parameter_resilience(live_gui): + """ + Test that mcp_client correctly handles 'file_path' and 'dir_path' aliases + sent by the AI instead of 'path'. + """ + client = ApiHookClient("http://127.0.0.1:8999") + client.click("btn_reset") + client.set_value("auto_add_history", True) + client.select_list_item("proj_files", "manual_slop") + + # Create a mock that uses dir_path for list_directory + alias_mock = os.path.abspath("tests/mock_alias_tool.py") + bridge_path = os.path.abspath("scripts/cli_tool_bridge.py") + # Avoid backslashes in f-string expression part + if sys.platform == "win32": + bridge_path_str = bridge_path.replace("\\", "/") + else: + bridge_path_str = bridge_path + + with open(alias_tool_content := "tests/mock_alias_tool.py", "w") as f: + f.write(f'''import sys, json, os, subprocess +prompt = sys.stdin.read() +if '"role": "tool"' in prompt: + print(json.dumps({{"type": "message", "role": "assistant", "content": "Tool worked!"}}), flush=True) + print(json.dumps({{"type": "result", "stats": {{"total_tokens": 20}}}}), flush=True) +else: + # We must call the bridge to trigger the GUI approval! + tool_call = {{"name": "list_directory", "input": {{"dir_path": "."}}}} + bridge_cmd = [sys.executable, "{bridge_path_str}"] + proc = subprocess.Popen(bridge_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True) + stdout, _ = proc.communicate(input=json.dumps(tool_call)) + + # Even if bridge says allow, we emit the tool_use to the adapter + print(json.dumps({{"type": "message", "role": "assistant", "content": "I will list the directory."}}), flush=True) + print(json.dumps({{ + "type": "tool_use", + "name": "list_directory", + "id": "alias_call", + "args": {{"dir_path": "."}} + }}), flush=True) + print(json.dumps({{"type": "result", "stats": {{"total_tokens": 10}}}}), flush=True) +''') + + cli_cmd = f'"{sys.executable}" "{alias_mock}"' + client.set_value("current_provider", "gemini_cli") + client.set_value("gcli_path", cli_cmd) + + client.set_value("ai_input", "Test parameter aliases") + client.click("btn_gen_send") + + # Handle approval + timeout = 15 + start_time = time.time() + approved = False + while time.time() - start_time < timeout: + for ev in client.get_events(): + if ev.get("type") == "ask_received": + requests.post("http://127.0.0.1:8999/api/ask/respond", + json={"request_id": ev.get("request_id"), "response": {"approved": True}}) + approved = True + if approved: break + time.sleep(0.5) + + assert approved, "Tool approval event never received" + + # Verify tool result in history + time.sleep(2) + session = client.get_session() + entries = session.get("session", {}).get("entries", []) + + # Check for "Tool worked!" which implies the tool execution was successful + found = any("Tool worked!" in e.get("content", "") for e in entries) + assert found, "Tool result indicating success not found in history" + + os.remove(alias_mock) + +def test_gemini_cli_loop_termination(live_gui): + """ + Test that multi-round tool calling correctly terminates and preserves + payload (session context) between rounds. + """ + client = ApiHookClient("http://127.0.0.1:8999") + client.click("btn_reset") + client.set_value("auto_add_history", True) + client.select_list_item("proj_files", "manual_slop") + + # This uses the existing mock_gemini_cli.py which is already designed for 2 rounds + mock_script = os.path.abspath("tests/mock_gemini_cli.py") + cli_cmd = f'"{sys.executable}" "{mock_script}"' + client.set_value("current_provider", "gemini_cli") + client.set_value("gcli_path", cli_cmd) + + client.set_value("ai_input", "Perform multi-round tool test") + client.click("btn_gen_send") + + # Handle approvals (mock does one tool call) + timeout = 20 + start_time = time.time() + approved = False + while time.time() - start_time < timeout: + for ev in client.get_events(): + if ev.get("type") == "ask_received": + requests.post("http://127.0.0.1:8999/api/ask/respond", + json={"request_id": ev.get("request_id"), "response": {"approved": True}}) + approved = True + if approved: break + time.sleep(0.5) + + # Wait for the second round and final answer + found_final = False + start_time = time.time() + while time.time() - start_time < 15: + session = client.get_session() + entries = session.get("session", {}).get("entries", []) + for e in entries: + if "processed the tool results" in e.get("content", ""): + found_final = True + break + if found_final: break + time.sleep(1) + + assert found_final, "Final message after multi-round tool loop not found" diff --git a/tests/test_process_pending_gui_tasks.py b/tests/test_process_pending_gui_tasks.py new file mode 100644 index 0000000..369d07e --- /dev/null +++ b/tests/test_process_pending_gui_tasks.py @@ -0,0 +1,64 @@ +import pytest +from unittest.mock import MagicMock, patch +import ai_client +from gui_2 import App + +@pytest.fixture +def app_instance(): + with ( + patch('gui_2.load_config', return_value={'ai': {'provider': 'gemini', 'model': 'gemini-2.5-flash-lite'}, 'projects': {}}), + patch('gui_2.save_config'), + patch('gui_2.project_manager'), + patch('gui_2.session_logger'), + patch('gui_2.immapp.run'), + patch.object(App, '_load_active_project'), + patch.object(App, '_fetch_models'), + patch.object(App, '_load_fonts'), + patch.object(App, '_post_init'), + patch('ai_client.set_provider'), + patch('ai_client.reset_session') + ): + app = App() + yield app + +def test_redundant_calls_in_process_pending_gui_tasks(app_instance): + # Setup + app_instance._pending_gui_tasks = [ + {'action': 'set_value', 'item': 'current_provider', 'value': 'anthropic'} + ] + + with patch('ai_client.set_provider') as mock_set_provider, + patch('ai_client.reset_session') as mock_reset_session: + + # We need to make sure the property setter's internal calls are also tracked or mocked. + # However, the App instance was created with mocked ai_client. + # Let's re-patch it specifically for this test. + + app_instance._process_pending_gui_tasks() + + # current_provider setter calls: + # ai_client.reset_session() + # ai_client.set_provider(value, self.current_model) + + # _process_pending_gui_tasks ALSO calls: + # ai_client.set_provider(self.current_provider, self.current_model) + # ai_client.reset_session() + + # Total should be 2 calls for each if redundant. + assert mock_set_provider.call_count == 2 + assert mock_reset_session.call_count == 2 + +def test_gcli_path_updates_adapter(app_instance): + # Setup + app_instance.current_provider = 'gemini_cli' + app_instance._pending_gui_tasks = [ + {'action': 'set_value', 'item': 'gcli_path', 'value': '/new/path/to/gemini'} + ] + + # Initialize adapter if it doesn't exist (it shouldn't in mock env) + ai_client._gemini_cli_adapter = None + + app_instance._process_pending_gui_tasks() + + assert ai_client._gemini_cli_adapter is not None + assert ai_client._gemini_cli_adapter.binary_path == '/new/path/to/gemini'