checkpoint: Working on getting gemini cli to actually have parity with gemini api.

This commit is contained in:
2026-02-26 00:31:33 -05:00
parent cbe359b1a5
commit a70680b2a2
14 changed files with 710 additions and 243 deletions

View File

@@ -4,93 +4,99 @@ import subprocess
import os
def main():
# The GUI calls: <binary> run --output-format stream-json
# The prompt is now passed via stdin.
# Debug log to stderr
sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n")
# Read prompt from stdin for debug
prompt = sys.stdin.read()
# Read prompt from stdin
try:
prompt = sys.stdin.read()
except EOFError:
prompt = ""
sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n")
sys.stderr.flush()
if "run" not in sys.argv:
# Skip management commands
if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]:
return
# If the prompt contains tool results (indicated by "role": "tool"),
# it means we are in the second round and should provide a final answer.
if '"role": "tool"' in prompt:
# If the prompt contains tool results, provide final answer
if '"role": "tool"' in prompt or '"tool_call_id"' in prompt:
print(json.dumps({
"type": "message",
"text": "I have processed the tool results. Everything looks good!"
"role": "assistant",
"content": "I have processed the tool results. Everything looks good!"
}), flush=True)
print(json.dumps({
"type": "result",
"usage": {"total_tokens": 100},
"status": "success",
"stats": {"total_tokens": 100, "input_tokens": 80, "output_tokens": 20},
"session_id": "mock-session-final"
}), flush=True)
return
# Simulate the 'BeforeTool' hook by calling the bridge directly.
# Default flow: simulate a tool call
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
tool_call = {
"tool_name": "read_file",
"tool_input": {"path": "test.txt"}
# Using format that bridge understands
bridge_tool_call = {
"name": "read_file",
"input": {"path": "test.txt"}
}
sys.stderr.write(f"DEBUG: Calling bridge at {bridge_path}\n")
sys.stderr.flush()
# Bridge reads from stdin
process = subprocess.Popen(
[sys.executable, bridge_path],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
env=os.environ # Ensure environment variables are inherited
)
stdout, stderr = process.communicate(input=json.dumps(tool_call))
sys.stderr.write(f"DEBUG: Bridge stdout: {stdout}\n")
sys.stderr.write(f"DEBUG: Bridge stderr: {stderr}\n")
sys.stderr.flush()
try:
process = subprocess.Popen(
[sys.executable, bridge_path],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
env=os.environ
)
stdout, stderr = process.communicate(input=json.dumps(bridge_tool_call))
sys.stderr.write(f"DEBUG: Bridge stdout: {stdout}\n")
sys.stderr.write(f"DEBUG: Bridge stderr: {stderr}\n")
decision_data = json.loads(stdout.strip())
decision = decision_data.get("decision")
except Exception as e:
sys.stderr.write(f"DEBUG: Failed to parse bridge output: {e}\n")
sys.stderr.write(f"DEBUG: Bridge failed: {e}\n")
decision = "deny"
# Output JSONL to stdout
if decision == "allow":
# Simulate REAL CLI field names for adapter normalization test
print(json.dumps({
"type": "tool_use",
"name": "read_file",
"args": {"path": "test.txt"}
"tool_name": "read_file",
"tool_id": "call_123",
"parameters": {"path": "test.txt"}
}), flush=True)
print(json.dumps({
"type": "message",
"text": "I read the file. It contains: 'Hello from mock!'"
"role": "assistant",
"content": "I am reading the file now..."
}), flush=True)
print(json.dumps({
"type": "result",
"usage": {"total_tokens": 50},
"status": "success",
"stats": {"total_tokens": 50, "input_tokens": 40, "output_tokens": 10},
"session_id": "mock-session-123"
}), flush=True)
else:
print(json.dumps({
"type": "message",
"text": f"Tool execution was denied. Decision: {decision}"
"role": "assistant",
"content": f"Tool execution was denied. Decision: {decision}"
}), flush=True)
print(json.dumps({
"type": "result",
"usage": {"total_tokens": 10},
"status": "success",
"stats": {"total_tokens": 10, "input_tokens": 10, "output_tokens": 0},
"session_id": "mock-session-denied"
}), flush=True)

View File

@@ -20,7 +20,7 @@ base_dir = "."
paths = []
[gemini_cli]
binary_path = "gemini"
binary_path = "\"C:\\projects\\manual_slop\\.venv\\Scripts\\python.exe\" \"C:\\projects\\manual_slop\\tests\\mock_gemini_cli.py\""
[agent.tools]
run_powershell = true

View File

@@ -16,8 +16,5 @@ history = [
[discussions."testing deepseek"]
git_commit = ""
last_updated = "2026-02-25T23:33:45"
history = [
"@2026-02-25T23:33:21\nUser:\ntesting testing 123?",
"@2026-02-25T23:33:25\nAI:\nI see you're testing with \"testing testing 123\". I'm here and ready to help! I'm a helpful coding assistant with access to PowerShell and various tools for file operations and web access.\n\nWhat would you like me to help you with today? I can assist with:\n- Writing or editing code files\n- Searching for information\n- Reading or analyzing files\n- Running commands\n- Web searches\n- And much more!\n\nJust let me know what you need help with.",
]
last_updated = "2026-02-26T00:29:48"
history = []

View File

@@ -0,0 +1,17 @@
import pytest
from unittest.mock import patch, MagicMock
import ai_client
def test_list_models_gemini_cli():
"""
Verifies that 'ai_client.list_models' correctly returns a list of models
for the 'gemini_cli' provider.
"""
models = ai_client.list_models("gemini_cli")
assert "gemini-3.1-pro-preview" in models
assert "gemini-3-flash-preview" in models
assert "gemini-2.5-pro" in models
assert "gemini-2.5-flash" in models
assert "gemini-2.5-flash-lite" in models
assert len(models) == 5

View File

@@ -0,0 +1,53 @@
import unittest
from unittest.mock import patch, MagicMock
import io
import json
import sys
import os
# Add project root to sys.path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
# Import after path fix
from scripts.cli_tool_bridge import main
class TestCliToolBridgeMapping(unittest.TestCase):
def setUp(self):
os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_mapping_from_api_format(self, mock_request, mock_stdout, mock_stdin):
"""
Verify that bridge correctly maps 'id', 'name', 'input' (Gemini API format)
into tool_name and tool_input for the hook client.
"""
api_tool_call = {
'id': 'call123',
'name': 'read_file',
'input': {'path': 'test.txt'}
}
# 1. Mock stdin with the API format JSON
mock_stdin.write(json.dumps(api_tool_call))
mock_stdin.seek(0)
# 2. Mock ApiHookClient to return approved
mock_request.return_value = {'approved': True}
# Run main
main()
# 3. Verify that request_confirmation was called with mapped values
# If it's not mapped, it will likely be called with None or fail
mock_request.assert_called_once_with('read_file', {'path': 'test.txt'})
# 4. Capture stdout and assert allow
output_str = mock_stdout.getvalue().strip()
self.assertTrue(output_str, "Stdout should not be empty")
output = json.loads(output_str)
self.assertEqual(output.get('decision'), 'allow')
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,175 @@
import unittest
from unittest.mock import patch, MagicMock, ANY
import json
import subprocess
import io
import sys
import os
# Ensure the project root is in sys.path to resolve imports correctly
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
if project_root not in sys.path:
sys.path.append(project_root)
# Import the class to be tested
from gemini_cli_adapter import GeminiCliAdapter
# Mock the session_logger module to prevent file operations during tests.
mock_session_logger = MagicMock()
sys.modules['session_logger'] = mock_session_logger
class TestGeminiCliAdapterParity(unittest.TestCase):
def setUp(self):
"""Set up a fresh adapter instance and reset session state for each test."""
self.adapter = GeminiCliAdapter(binary_path="gemini")
self.adapter.session_id = None
self.adapter.last_usage = None
self.adapter.last_latency = 0.0
# Reset mock calls for session_logger for each test
mock_session_logger.reset_mock()
@patch('subprocess.Popen')
def test_count_tokens_uses_estimation(self, mock_popen):
"""
Test that count_tokens uses character-based estimation.
"""
contents_to_count = ["This is the first line.", "This is the second line."]
expected_chars = len("\n".join(contents_to_count))
expected_tokens = expected_chars // 4
token_count = self.adapter.count_tokens(contents=contents_to_count)
self.assertEqual(token_count, expected_tokens)
# Verify that NO subprocess was started for counting
mock_popen.assert_not_called()
@patch('subprocess.Popen')
def test_send_with_safety_settings_no_flags_added(self, mock_popen):
"""
Test that the send method does NOT add --safety flags when safety_settings are provided,
as this functionality is no longer supported via CLI flags.
"""
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
message_content = "User's prompt here."
safety_settings = [
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"},
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
]
self.adapter.send(message=message_content, safety_settings=safety_settings)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that no --safety flags were added to the command
self.assertNotIn("--safety", command)
# Verify that the message was passed correctly via stdin
process_mock.communicate.assert_called_once_with(input=message_content)
@patch('subprocess.Popen')
def test_send_without_safety_settings_no_flags(self, mock_popen):
"""
Test that when safety_settings is None or an empty list, no --safety flags are added.
"""
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
message_content = "Another prompt."
self.adapter.send(message=message_content, safety_settings=None)
args_none, _ = mock_popen.call_args
self.assertNotIn("--safety", args_none[0])
mock_popen.reset_mock()
self.adapter.send(message=message_content, safety_settings=[])
args_empty, _ = mock_popen.call_args
self.assertNotIn("--safety", args_empty[0])
@patch('subprocess.Popen')
def test_send_with_system_instruction_prepended_to_stdin(self, mock_popen):
"""
Test that the send method prepends the system instruction to the prompt
sent via stdin, and does NOT add a --system flag to the command.
"""
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
message_content = "User's prompt here."
system_instruction_text = "Some instruction"
expected_input = f"{system_instruction_text}\n\n{message_content}"
self.adapter.send(message=message_content, system_instruction=system_instruction_text)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that the system instruction was prepended to the input sent to communicate
process_mock.communicate.assert_called_once_with(input=expected_input)
# Verify that no --system flag was added to the command
self.assertNotIn("--system", command)
@patch('subprocess.Popen')
def test_send_with_model_parameter(self, mock_popen):
"""
Test that the send method correctly adds the -m <model> flag when a model is specified.
"""
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
message_content = "User's prompt here."
model_name = "gemini-1.5-flash"
expected_command_part = f'-m "{model_name}"'
self.adapter.send(message=message_content, model=model_name)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that the -m <model> flag was added to the command
self.assertIn(expected_command_part, command)
# Verify that the message was passed correctly via stdin
process_mock.communicate.assert_called_once_with(input=message_content)
@patch('subprocess.Popen')
def test_send_kills_process_on_communicate_exception(self, mock_popen):
"""
Test that if subprocess.Popen().communicate() raises an exception,
GeminiCliAdapter.send() kills the process and re-raises the exception.
"""
mock_process = MagicMock()
mock_popen.return_value = mock_process
# Define an exception to simulate
simulated_exception = RuntimeError("Simulated communicate error")
mock_process.communicate.side_effect = simulated_exception
message_content = "User message"
# Assert that the exception is raised and process is killed
with self.assertRaises(RuntimeError) as cm:
self.adapter.send(message=message_content)
# Verify that the process's kill method was called
mock_process.kill.assert_called_once()
# Verify that the correct exception was re-raised
self.assertIs(cm.exception, simulated_exception)
if __name__ == '__main__':
unittest.main()

View File

@@ -8,6 +8,7 @@ from api_hook_client import ApiHookClient
def test_gemini_cli_full_integration(live_gui):
"""
Integration test for the Gemini CLI provider and tool bridge.
Handles 'ask_received' events from the bridge and any other approval requests.
"""
client = ApiHookClient("http://127.0.0.1:8999")
@@ -18,21 +19,19 @@ def test_gemini_cli_full_integration(live_gui):
client.select_list_item("proj_files", "manual_slop")
# 1. Setup paths and configure the GUI
# Use the real gemini CLI if available, otherwise use mock
# For CI/testing we prefer mock
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
# Wrap in quotes for shell execution if path has spaces
cli_cmd = f'"{sys.executable}" "{mock_script}"'
# Set provider and binary path via GUI hooks
# Note: Using set_value which now triggers the property setter in gui_2.py
print(f"[TEST] Setting current_provider to gemini_cli")
client.set_value("current_provider", "gemini_cli")
print(f"[TEST] Setting gcli_path to {cli_cmd}")
client.set_value("gcli_path", cli_cmd)
# Verify settings were applied
# Verify settings
assert client.get_value("current_provider") == "gemini_cli"
assert client.get_value("gcli_path") == cli_cmd
# Clear events
client.get_events()
@@ -41,55 +40,48 @@ def test_gemini_cli_full_integration(live_gui):
client.set_value("ai_input", "Please read test.txt")
client.click("btn_gen_send")
# 3. Monitor for the 'ask_received' event
print("[TEST] Waiting for ask_received event...")
request_id = None
timeout = 30
# 3. Monitor for approval events
print("[TEST] Waiting for approval events...")
timeout = 45
start_time = time.time()
approved_count = 0
while time.time() - start_time < timeout:
events = client.get_events()
if events:
print(f"[TEST] Received {len(events)} events: {[e.get('type') for e in events]}")
for ev in events:
if ev.get("type") == "ask_received":
request_id = ev.get("request_id")
print(f"[TEST] Found request_id: {request_id}")
break
if request_id:
break
time.sleep(0.5)
assert request_id is not None, "Timed out waiting for 'ask_received' event from the bridge"
# 4. Respond to the permission request
print("[TEST] Responding to ask with approval")
resp = requests.post(
"http://127.0.0.1:8999/api/ask/respond",
json={
"request_id": request_id,
"response": {"approved": True}
}
)
assert resp.status_code == 200
# 5. Verify that the final response is displayed in the GUI
print("[TEST] Waiting for final message in history...")
final_message_received = False
start_time = time.time()
while time.time() - start_time < timeout:
for ev in events:
etype = ev.get("type")
eid = ev.get("request_id") or ev.get("action_id")
print(f"[TEST] Received event: {etype} (ID: {eid})")
if etype in ["ask_received", "glob_approval_required", "script_confirmation_required"]:
print(f"[TEST] Approving {etype} {eid}")
if etype == "script_confirmation_required":
resp = requests.post(f"http://127.0.0.1:8999/api/confirm/{eid}", json={"approved": True})
else:
resp = requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": eid, "response": {"approved": True}})
assert resp.status_code == 200
approved_count += 1
# Check if we got a final response in history
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
found_final = False
for entry in entries:
content = entry.get("content", "")
if "Hello from mock!" in content:
print(f"[TEST] Success! Found message: {content[:50]}...")
final_message_received = True
if "Hello from mock!" in content or "processed the tool results" in content:
print(f"[TEST] Success! Found final message in history.")
found_final = True
break
if final_message_received:
if found_final:
break
time.sleep(1.0)
assert final_message_received, "Final message from mock CLI was not found in the GUI history"
assert approved_count > 0, "No approval events were processed"
assert found_final, "Final message from mock CLI was not found in the GUI history"
def test_gemini_cli_rejection_and_history(live_gui):
"""
@@ -97,88 +89,53 @@ def test_gemini_cli_rejection_and_history(live_gui):
"""
client = ApiHookClient("http://127.0.0.1:8999")
# 0. Reset session and enable history
# 0. Reset session
client.click("btn_reset")
client.set_value("auto_add_history", True)
# Switch to manual_slop project explicitly
client.select_list_item("proj_files", "manual_slop")
# 1. Setup paths and configure the GUI
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
# 2. Trigger a message that will be denied
# 2. Trigger a message
print("[TEST] Sending user message (to be denied)...")
client.set_value("ai_input", "Deny me")
client.click("btn_gen_send")
# 3. Wait for 'ask_received' and respond with rejection
request_id = None
timeout = 15
# 3. Wait for event and reject
timeout = 20
start_time = time.time()
denied = False
while time.time() - start_time < timeout:
for ev in client.get_events():
if ev.get("type") == "ask_received":
request_id = ev.get("request_id")
etype = ev.get("type")
eid = ev.get("request_id")
print(f"[TEST] Received event: {etype}")
if etype == "ask_received":
print(f"[TEST] Denying request {eid}")
requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": eid, "response": {"approved": False}})
denied = True
break
if request_id: break
if denied: break
time.sleep(0.5)
assert request_id is not None
assert denied, "No ask_received event to deny"
print("[TEST] Responding to ask with REJECTION")
requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": request_id, "response": {"approved": False}})
# 4. Verify rejection message in history
print("[TEST] Waiting for rejection message in history...")
# 4. Verify rejection in history
print("[TEST] Waiting for rejection in history...")
rejection_found = False
start_time = time.time()
while time.time() - start_time < timeout:
while time.time() - start_time < 20:
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
for entry in entries:
if "Tool execution was denied. Decision: deny" in entry.get("content", ""):
if "Tool execution was denied" in entry.get("content", ""):
rejection_found = True
break
if rejection_found: break
time.sleep(1.0)
assert rejection_found, "Rejection message not found in history"
# 5. Send a follow-up message and verify history grows
print("[TEST] Sending follow-up message...")
client.set_value("ai_input", "What happened?")
client.click("btn_gen_send")
# Wait for mock to finish (polling history)
print("[TEST] Waiting for final history entry (max 30s)...")
final_message_received = False
start_time = time.time()
while time.time() - start_time < 30:
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
if len(entries) >= 3:
final_message_received = True
break
# Print snapshot for debug
if int(time.time() - start_time) % 5 == 0:
print(f"[TEST] History length at {int(time.time() - start_time)}s: {len(entries)}")
time.sleep(1.0)
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
# Should have:
# 1. User: Deny me
# 2. AI: Tool execution was denied...
# 3. User: What happened?
# 4. AI or System: ...
print(f"[TEST] Final history length: {len(entries)}")
for i, entry in enumerate(entries):
print(f" {i}: {entry.get('role')} - {entry.get('content')[:30]}...")
assert len(entries) >= 3

View File

@@ -0,0 +1,52 @@
import pytest
from unittest.mock import patch, MagicMock
import sys
import os
# Add project root to sys.path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import ai_client
@pytest.fixture(autouse=True)
def setup_ai_client():
ai_client.reset_session()
ai_client.set_provider("gemini_cli", "gemini-2.5-flash")
ai_client.confirm_and_run_callback = lambda script, base_dir: "Mocked execution"
ai_client.comms_log_callback = lambda entry: None
ai_client.tool_log_callback = lambda script, result: None
yield
@patch('ai_client.GeminiCliAdapter')
@patch('ai_client._get_combined_system_prompt')
def test_send_invokes_adapter_send(mock_prompt, mock_adapter_class):
mock_prompt.return_value = "Mocked Prompt"
mock_instance = mock_adapter_class.return_value
mock_instance.send.return_value = {"text": "Done", "tool_calls": []}
mock_instance.last_usage = {"input_tokens": 10}
mock_instance.last_latency = 0.1
mock_instance.session_id = None
ai_client.send("context", "message", discussion_history="hist")
expected_payload = "[DISCUSSION HISTORY]\n\nhist\n\n---\n\nmessage"
assert mock_instance.send.called
args, kwargs = mock_instance.send.call_args
assert args[0] == expected_payload
assert kwargs['system_instruction'] == "Mocked Prompt\n\n<context>\ncontext\n</context>"
@patch('ai_client.GeminiCliAdapter')
def test_get_history_bleed_stats(mock_adapter_class):
mock_instance = mock_adapter_class.return_value
mock_instance.send.return_value = {"text": "txt", "tool_calls": []}
mock_instance.last_usage = {"input_tokens": 1500}
mock_instance.last_latency = 0.5
mock_instance.session_id = "sess"
# Initialize by sending a message
ai_client.send("context", "msg")
stats = ai_client.get_history_bleed_stats()
assert stats["provider"] == "gemini_cli"
assert stats["current"] == 1500