checkpoint: Working on getting gemini cli to actually have parity with gemini api.
This commit is contained in:
@@ -4,93 +4,99 @@ import subprocess
|
||||
import os
|
||||
|
||||
def main():
|
||||
# The GUI calls: <binary> run --output-format stream-json
|
||||
# The prompt is now passed via stdin.
|
||||
|
||||
# Debug log to stderr
|
||||
sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n")
|
||||
|
||||
# Read prompt from stdin for debug
|
||||
prompt = sys.stdin.read()
|
||||
# Read prompt from stdin
|
||||
try:
|
||||
prompt = sys.stdin.read()
|
||||
except EOFError:
|
||||
prompt = ""
|
||||
|
||||
sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n")
|
||||
sys.stderr.flush()
|
||||
|
||||
if "run" not in sys.argv:
|
||||
# Skip management commands
|
||||
if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]:
|
||||
return
|
||||
|
||||
# If the prompt contains tool results (indicated by "role": "tool"),
|
||||
# it means we are in the second round and should provide a final answer.
|
||||
if '"role": "tool"' in prompt:
|
||||
# If the prompt contains tool results, provide final answer
|
||||
if '"role": "tool"' in prompt or '"tool_call_id"' in prompt:
|
||||
print(json.dumps({
|
||||
"type": "message",
|
||||
"text": "I have processed the tool results. Everything looks good!"
|
||||
"role": "assistant",
|
||||
"content": "I have processed the tool results. Everything looks good!"
|
||||
}), flush=True)
|
||||
print(json.dumps({
|
||||
"type": "result",
|
||||
"usage": {"total_tokens": 100},
|
||||
"status": "success",
|
||||
"stats": {"total_tokens": 100, "input_tokens": 80, "output_tokens": 20},
|
||||
"session_id": "mock-session-final"
|
||||
}), flush=True)
|
||||
return
|
||||
|
||||
# Simulate the 'BeforeTool' hook by calling the bridge directly.
|
||||
# Default flow: simulate a tool call
|
||||
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
|
||||
|
||||
tool_call = {
|
||||
"tool_name": "read_file",
|
||||
"tool_input": {"path": "test.txt"}
|
||||
# Using format that bridge understands
|
||||
bridge_tool_call = {
|
||||
"name": "read_file",
|
||||
"input": {"path": "test.txt"}
|
||||
}
|
||||
|
||||
sys.stderr.write(f"DEBUG: Calling bridge at {bridge_path}\n")
|
||||
sys.stderr.flush()
|
||||
|
||||
# Bridge reads from stdin
|
||||
process = subprocess.Popen(
|
||||
[sys.executable, bridge_path],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
env=os.environ # Ensure environment variables are inherited
|
||||
)
|
||||
stdout, stderr = process.communicate(input=json.dumps(tool_call))
|
||||
|
||||
sys.stderr.write(f"DEBUG: Bridge stdout: {stdout}\n")
|
||||
sys.stderr.write(f"DEBUG: Bridge stderr: {stderr}\n")
|
||||
sys.stderr.flush()
|
||||
|
||||
try:
|
||||
process = subprocess.Popen(
|
||||
[sys.executable, bridge_path],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
env=os.environ
|
||||
)
|
||||
stdout, stderr = process.communicate(input=json.dumps(bridge_tool_call))
|
||||
|
||||
sys.stderr.write(f"DEBUG: Bridge stdout: {stdout}\n")
|
||||
sys.stderr.write(f"DEBUG: Bridge stderr: {stderr}\n")
|
||||
|
||||
decision_data = json.loads(stdout.strip())
|
||||
decision = decision_data.get("decision")
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"DEBUG: Failed to parse bridge output: {e}\n")
|
||||
sys.stderr.write(f"DEBUG: Bridge failed: {e}\n")
|
||||
decision = "deny"
|
||||
|
||||
# Output JSONL to stdout
|
||||
if decision == "allow":
|
||||
# Simulate REAL CLI field names for adapter normalization test
|
||||
print(json.dumps({
|
||||
"type": "tool_use",
|
||||
"name": "read_file",
|
||||
"args": {"path": "test.txt"}
|
||||
"tool_name": "read_file",
|
||||
"tool_id": "call_123",
|
||||
"parameters": {"path": "test.txt"}
|
||||
}), flush=True)
|
||||
|
||||
print(json.dumps({
|
||||
"type": "message",
|
||||
"text": "I read the file. It contains: 'Hello from mock!'"
|
||||
"role": "assistant",
|
||||
"content": "I am reading the file now..."
|
||||
}), flush=True)
|
||||
|
||||
print(json.dumps({
|
||||
"type": "result",
|
||||
"usage": {"total_tokens": 50},
|
||||
"status": "success",
|
||||
"stats": {"total_tokens": 50, "input_tokens": 40, "output_tokens": 10},
|
||||
"session_id": "mock-session-123"
|
||||
}), flush=True)
|
||||
else:
|
||||
print(json.dumps({
|
||||
"type": "message",
|
||||
"text": f"Tool execution was denied. Decision: {decision}"
|
||||
"role": "assistant",
|
||||
"content": f"Tool execution was denied. Decision: {decision}"
|
||||
}), flush=True)
|
||||
print(json.dumps({
|
||||
"type": "result",
|
||||
"usage": {"total_tokens": 10},
|
||||
"status": "success",
|
||||
"stats": {"total_tokens": 10, "input_tokens": 10, "output_tokens": 0},
|
||||
"session_id": "mock-session-denied"
|
||||
}), flush=True)
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ base_dir = "."
|
||||
paths = []
|
||||
|
||||
[gemini_cli]
|
||||
binary_path = "gemini"
|
||||
binary_path = "\"C:\\projects\\manual_slop\\.venv\\Scripts\\python.exe\" \"C:\\projects\\manual_slop\\tests\\mock_gemini_cli.py\""
|
||||
|
||||
[agent.tools]
|
||||
run_powershell = true
|
||||
|
||||
@@ -16,8 +16,5 @@ history = [
|
||||
|
||||
[discussions."testing deepseek"]
|
||||
git_commit = ""
|
||||
last_updated = "2026-02-25T23:33:45"
|
||||
history = [
|
||||
"@2026-02-25T23:33:21\nUser:\ntesting testing 123?",
|
||||
"@2026-02-25T23:33:25\nAI:\nI see you're testing with \"testing testing 123\". I'm here and ready to help! I'm a helpful coding assistant with access to PowerShell and various tools for file operations and web access.\n\nWhat would you like me to help you with today? I can assist with:\n- Writing or editing code files\n- Searching for information\n- Reading or analyzing files\n- Running commands\n- Web searches\n- And much more!\n\nJust let me know what you need help with.",
|
||||
]
|
||||
last_updated = "2026-02-26T00:29:48"
|
||||
history = []
|
||||
|
||||
17
tests/test_ai_client_list_models.py
Normal file
17
tests/test_ai_client_list_models.py
Normal file
@@ -0,0 +1,17 @@
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
import ai_client
|
||||
|
||||
def test_list_models_gemini_cli():
|
||||
"""
|
||||
Verifies that 'ai_client.list_models' correctly returns a list of models
|
||||
for the 'gemini_cli' provider.
|
||||
"""
|
||||
models = ai_client.list_models("gemini_cli")
|
||||
|
||||
assert "gemini-3.1-pro-preview" in models
|
||||
assert "gemini-3-flash-preview" in models
|
||||
assert "gemini-2.5-pro" in models
|
||||
assert "gemini-2.5-flash" in models
|
||||
assert "gemini-2.5-flash-lite" in models
|
||||
assert len(models) == 5
|
||||
53
tests/test_cli_tool_bridge_mapping.py
Normal file
53
tests/test_cli_tool_bridge_mapping.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import unittest
|
||||
from unittest.mock import patch, MagicMock
|
||||
import io
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add project root to sys.path
|
||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
|
||||
# Import after path fix
|
||||
from scripts.cli_tool_bridge import main
|
||||
|
||||
class TestCliToolBridgeMapping(unittest.TestCase):
|
||||
def setUp(self):
|
||||
os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
|
||||
|
||||
@patch('sys.stdin', new_callable=io.StringIO)
|
||||
@patch('sys.stdout', new_callable=io.StringIO)
|
||||
@patch('api_hook_client.ApiHookClient.request_confirmation')
|
||||
def test_mapping_from_api_format(self, mock_request, mock_stdout, mock_stdin):
|
||||
"""
|
||||
Verify that bridge correctly maps 'id', 'name', 'input' (Gemini API format)
|
||||
into tool_name and tool_input for the hook client.
|
||||
"""
|
||||
api_tool_call = {
|
||||
'id': 'call123',
|
||||
'name': 'read_file',
|
||||
'input': {'path': 'test.txt'}
|
||||
}
|
||||
|
||||
# 1. Mock stdin with the API format JSON
|
||||
mock_stdin.write(json.dumps(api_tool_call))
|
||||
mock_stdin.seek(0)
|
||||
|
||||
# 2. Mock ApiHookClient to return approved
|
||||
mock_request.return_value = {'approved': True}
|
||||
|
||||
# Run main
|
||||
main()
|
||||
|
||||
# 3. Verify that request_confirmation was called with mapped values
|
||||
# If it's not mapped, it will likely be called with None or fail
|
||||
mock_request.assert_called_once_with('read_file', {'path': 'test.txt'})
|
||||
|
||||
# 4. Capture stdout and assert allow
|
||||
output_str = mock_stdout.getvalue().strip()
|
||||
self.assertTrue(output_str, "Stdout should not be empty")
|
||||
output = json.loads(output_str)
|
||||
self.assertEqual(output.get('decision'), 'allow')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
175
tests/test_gemini_cli_adapter_parity.py
Normal file
175
tests/test_gemini_cli_adapter_parity.py
Normal file
@@ -0,0 +1,175 @@
|
||||
import unittest
|
||||
from unittest.mock import patch, MagicMock, ANY
|
||||
import json
|
||||
import subprocess
|
||||
import io
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Ensure the project root is in sys.path to resolve imports correctly
|
||||
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||
if project_root not in sys.path:
|
||||
sys.path.append(project_root)
|
||||
|
||||
# Import the class to be tested
|
||||
from gemini_cli_adapter import GeminiCliAdapter
|
||||
|
||||
# Mock the session_logger module to prevent file operations during tests.
|
||||
mock_session_logger = MagicMock()
|
||||
sys.modules['session_logger'] = mock_session_logger
|
||||
|
||||
class TestGeminiCliAdapterParity(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
"""Set up a fresh adapter instance and reset session state for each test."""
|
||||
self.adapter = GeminiCliAdapter(binary_path="gemini")
|
||||
self.adapter.session_id = None
|
||||
self.adapter.last_usage = None
|
||||
self.adapter.last_latency = 0.0
|
||||
# Reset mock calls for session_logger for each test
|
||||
mock_session_logger.reset_mock()
|
||||
|
||||
@patch('subprocess.Popen')
|
||||
def test_count_tokens_uses_estimation(self, mock_popen):
|
||||
"""
|
||||
Test that count_tokens uses character-based estimation.
|
||||
"""
|
||||
contents_to_count = ["This is the first line.", "This is the second line."]
|
||||
expected_chars = len("\n".join(contents_to_count))
|
||||
expected_tokens = expected_chars // 4
|
||||
|
||||
token_count = self.adapter.count_tokens(contents=contents_to_count)
|
||||
self.assertEqual(token_count, expected_tokens)
|
||||
|
||||
# Verify that NO subprocess was started for counting
|
||||
mock_popen.assert_not_called()
|
||||
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_with_safety_settings_no_flags_added(self, mock_popen):
|
||||
"""
|
||||
Test that the send method does NOT add --safety flags when safety_settings are provided,
|
||||
as this functionality is no longer supported via CLI flags.
|
||||
"""
|
||||
process_mock = MagicMock()
|
||||
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
|
||||
process_mock.communicate.return_value = (mock_stdout_content, "")
|
||||
process_mock.returncode = 0
|
||||
mock_popen.return_value = process_mock
|
||||
|
||||
message_content = "User's prompt here."
|
||||
safety_settings = [
|
||||
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"},
|
||||
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
|
||||
]
|
||||
|
||||
self.adapter.send(message=message_content, safety_settings=safety_settings)
|
||||
|
||||
args, kwargs = mock_popen.call_args
|
||||
command = args[0]
|
||||
|
||||
# Verify that no --safety flags were added to the command
|
||||
self.assertNotIn("--safety", command)
|
||||
# Verify that the message was passed correctly via stdin
|
||||
process_mock.communicate.assert_called_once_with(input=message_content)
|
||||
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_without_safety_settings_no_flags(self, mock_popen):
|
||||
"""
|
||||
Test that when safety_settings is None or an empty list, no --safety flags are added.
|
||||
"""
|
||||
process_mock = MagicMock()
|
||||
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
|
||||
process_mock.communicate.return_value = (mock_stdout_content, "")
|
||||
process_mock.returncode = 0
|
||||
mock_popen.return_value = process_mock
|
||||
|
||||
message_content = "Another prompt."
|
||||
|
||||
self.adapter.send(message=message_content, safety_settings=None)
|
||||
args_none, _ = mock_popen.call_args
|
||||
self.assertNotIn("--safety", args_none[0])
|
||||
mock_popen.reset_mock()
|
||||
|
||||
self.adapter.send(message=message_content, safety_settings=[])
|
||||
args_empty, _ = mock_popen.call_args
|
||||
self.assertNotIn("--safety", args_empty[0])
|
||||
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_with_system_instruction_prepended_to_stdin(self, mock_popen):
|
||||
"""
|
||||
Test that the send method prepends the system instruction to the prompt
|
||||
sent via stdin, and does NOT add a --system flag to the command.
|
||||
"""
|
||||
process_mock = MagicMock()
|
||||
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
|
||||
process_mock.communicate.return_value = (mock_stdout_content, "")
|
||||
process_mock.returncode = 0
|
||||
mock_popen.return_value = process_mock
|
||||
|
||||
message_content = "User's prompt here."
|
||||
system_instruction_text = "Some instruction"
|
||||
expected_input = f"{system_instruction_text}\n\n{message_content}"
|
||||
|
||||
self.adapter.send(message=message_content, system_instruction=system_instruction_text)
|
||||
|
||||
args, kwargs = mock_popen.call_args
|
||||
command = args[0]
|
||||
|
||||
# Verify that the system instruction was prepended to the input sent to communicate
|
||||
process_mock.communicate.assert_called_once_with(input=expected_input)
|
||||
|
||||
# Verify that no --system flag was added to the command
|
||||
self.assertNotIn("--system", command)
|
||||
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_with_model_parameter(self, mock_popen):
|
||||
"""
|
||||
Test that the send method correctly adds the -m <model> flag when a model is specified.
|
||||
"""
|
||||
process_mock = MagicMock()
|
||||
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
|
||||
process_mock.communicate.return_value = (mock_stdout_content, "")
|
||||
process_mock.returncode = 0
|
||||
mock_popen.return_value = process_mock
|
||||
|
||||
message_content = "User's prompt here."
|
||||
model_name = "gemini-1.5-flash"
|
||||
expected_command_part = f'-m "{model_name}"'
|
||||
|
||||
self.adapter.send(message=message_content, model=model_name)
|
||||
|
||||
args, kwargs = mock_popen.call_args
|
||||
command = args[0]
|
||||
|
||||
# Verify that the -m <model> flag was added to the command
|
||||
self.assertIn(expected_command_part, command)
|
||||
# Verify that the message was passed correctly via stdin
|
||||
process_mock.communicate.assert_called_once_with(input=message_content)
|
||||
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_kills_process_on_communicate_exception(self, mock_popen):
|
||||
"""
|
||||
Test that if subprocess.Popen().communicate() raises an exception,
|
||||
GeminiCliAdapter.send() kills the process and re-raises the exception.
|
||||
"""
|
||||
mock_process = MagicMock()
|
||||
mock_popen.return_value = mock_process
|
||||
|
||||
# Define an exception to simulate
|
||||
simulated_exception = RuntimeError("Simulated communicate error")
|
||||
mock_process.communicate.side_effect = simulated_exception
|
||||
|
||||
message_content = "User message"
|
||||
|
||||
# Assert that the exception is raised and process is killed
|
||||
with self.assertRaises(RuntimeError) as cm:
|
||||
self.adapter.send(message=message_content)
|
||||
|
||||
# Verify that the process's kill method was called
|
||||
mock_process.kill.assert_called_once()
|
||||
|
||||
# Verify that the correct exception was re-raised
|
||||
self.assertIs(cm.exception, simulated_exception)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -8,6 +8,7 @@ from api_hook_client import ApiHookClient
|
||||
def test_gemini_cli_full_integration(live_gui):
|
||||
"""
|
||||
Integration test for the Gemini CLI provider and tool bridge.
|
||||
Handles 'ask_received' events from the bridge and any other approval requests.
|
||||
"""
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
|
||||
@@ -18,21 +19,19 @@ def test_gemini_cli_full_integration(live_gui):
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
|
||||
# 1. Setup paths and configure the GUI
|
||||
# Use the real gemini CLI if available, otherwise use mock
|
||||
# For CI/testing we prefer mock
|
||||
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
|
||||
# Wrap in quotes for shell execution if path has spaces
|
||||
cli_cmd = f'"{sys.executable}" "{mock_script}"'
|
||||
|
||||
# Set provider and binary path via GUI hooks
|
||||
# Note: Using set_value which now triggers the property setter in gui_2.py
|
||||
print(f"[TEST] Setting current_provider to gemini_cli")
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
print(f"[TEST] Setting gcli_path to {cli_cmd}")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
|
||||
# Verify settings were applied
|
||||
# Verify settings
|
||||
assert client.get_value("current_provider") == "gemini_cli"
|
||||
assert client.get_value("gcli_path") == cli_cmd
|
||||
|
||||
|
||||
# Clear events
|
||||
client.get_events()
|
||||
|
||||
@@ -41,55 +40,48 @@ def test_gemini_cli_full_integration(live_gui):
|
||||
client.set_value("ai_input", "Please read test.txt")
|
||||
client.click("btn_gen_send")
|
||||
|
||||
# 3. Monitor for the 'ask_received' event
|
||||
print("[TEST] Waiting for ask_received event...")
|
||||
request_id = None
|
||||
timeout = 30
|
||||
# 3. Monitor for approval events
|
||||
print("[TEST] Waiting for approval events...")
|
||||
timeout = 45
|
||||
start_time = time.time()
|
||||
approved_count = 0
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
events = client.get_events()
|
||||
if events:
|
||||
print(f"[TEST] Received {len(events)} events: {[e.get('type') for e in events]}")
|
||||
for ev in events:
|
||||
if ev.get("type") == "ask_received":
|
||||
request_id = ev.get("request_id")
|
||||
print(f"[TEST] Found request_id: {request_id}")
|
||||
break
|
||||
if request_id:
|
||||
break
|
||||
time.sleep(0.5)
|
||||
|
||||
assert request_id is not None, "Timed out waiting for 'ask_received' event from the bridge"
|
||||
|
||||
# 4. Respond to the permission request
|
||||
print("[TEST] Responding to ask with approval")
|
||||
resp = requests.post(
|
||||
"http://127.0.0.1:8999/api/ask/respond",
|
||||
json={
|
||||
"request_id": request_id,
|
||||
"response": {"approved": True}
|
||||
}
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
|
||||
# 5. Verify that the final response is displayed in the GUI
|
||||
print("[TEST] Waiting for final message in history...")
|
||||
final_message_received = False
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < timeout:
|
||||
for ev in events:
|
||||
etype = ev.get("type")
|
||||
eid = ev.get("request_id") or ev.get("action_id")
|
||||
print(f"[TEST] Received event: {etype} (ID: {eid})")
|
||||
|
||||
if etype in ["ask_received", "glob_approval_required", "script_confirmation_required"]:
|
||||
print(f"[TEST] Approving {etype} {eid}")
|
||||
if etype == "script_confirmation_required":
|
||||
resp = requests.post(f"http://127.0.0.1:8999/api/confirm/{eid}", json={"approved": True})
|
||||
else:
|
||||
resp = requests.post("http://127.0.0.1:8999/api/ask/respond",
|
||||
json={"request_id": eid, "response": {"approved": True}})
|
||||
assert resp.status_code == 200
|
||||
approved_count += 1
|
||||
|
||||
# Check if we got a final response in history
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
found_final = False
|
||||
for entry in entries:
|
||||
content = entry.get("content", "")
|
||||
if "Hello from mock!" in content:
|
||||
print(f"[TEST] Success! Found message: {content[:50]}...")
|
||||
final_message_received = True
|
||||
if "Hello from mock!" in content or "processed the tool results" in content:
|
||||
print(f"[TEST] Success! Found final message in history.")
|
||||
found_final = True
|
||||
break
|
||||
if final_message_received:
|
||||
|
||||
if found_final:
|
||||
break
|
||||
|
||||
time.sleep(1.0)
|
||||
|
||||
assert final_message_received, "Final message from mock CLI was not found in the GUI history"
|
||||
assert approved_count > 0, "No approval events were processed"
|
||||
assert found_final, "Final message from mock CLI was not found in the GUI history"
|
||||
|
||||
def test_gemini_cli_rejection_and_history(live_gui):
|
||||
"""
|
||||
@@ -97,88 +89,53 @@ def test_gemini_cli_rejection_and_history(live_gui):
|
||||
"""
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
|
||||
# 0. Reset session and enable history
|
||||
# 0. Reset session
|
||||
client.click("btn_reset")
|
||||
client.set_value("auto_add_history", True)
|
||||
# Switch to manual_slop project explicitly
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
|
||||
# 1. Setup paths and configure the GUI
|
||||
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
|
||||
cli_cmd = f'"{sys.executable}" "{mock_script}"'
|
||||
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
|
||||
# 2. Trigger a message that will be denied
|
||||
|
||||
# 2. Trigger a message
|
||||
print("[TEST] Sending user message (to be denied)...")
|
||||
client.set_value("ai_input", "Deny me")
|
||||
client.click("btn_gen_send")
|
||||
|
||||
# 3. Wait for 'ask_received' and respond with rejection
|
||||
request_id = None
|
||||
timeout = 15
|
||||
# 3. Wait for event and reject
|
||||
timeout = 20
|
||||
start_time = time.time()
|
||||
denied = False
|
||||
while time.time() - start_time < timeout:
|
||||
for ev in client.get_events():
|
||||
if ev.get("type") == "ask_received":
|
||||
request_id = ev.get("request_id")
|
||||
etype = ev.get("type")
|
||||
eid = ev.get("request_id")
|
||||
print(f"[TEST] Received event: {etype}")
|
||||
if etype == "ask_received":
|
||||
print(f"[TEST] Denying request {eid}")
|
||||
requests.post("http://127.0.0.1:8999/api/ask/respond",
|
||||
json={"request_id": eid, "response": {"approved": False}})
|
||||
denied = True
|
||||
break
|
||||
if request_id: break
|
||||
if denied: break
|
||||
time.sleep(0.5)
|
||||
|
||||
assert request_id is not None
|
||||
assert denied, "No ask_received event to deny"
|
||||
|
||||
print("[TEST] Responding to ask with REJECTION")
|
||||
requests.post("http://127.0.0.1:8999/api/ask/respond",
|
||||
json={"request_id": request_id, "response": {"approved": False}})
|
||||
|
||||
# 4. Verify rejection message in history
|
||||
print("[TEST] Waiting for rejection message in history...")
|
||||
# 4. Verify rejection in history
|
||||
print("[TEST] Waiting for rejection in history...")
|
||||
rejection_found = False
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < timeout:
|
||||
while time.time() - start_time < 20:
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
for entry in entries:
|
||||
if "Tool execution was denied. Decision: deny" in entry.get("content", ""):
|
||||
if "Tool execution was denied" in entry.get("content", ""):
|
||||
rejection_found = True
|
||||
break
|
||||
if rejection_found: break
|
||||
time.sleep(1.0)
|
||||
|
||||
assert rejection_found, "Rejection message not found in history"
|
||||
|
||||
# 5. Send a follow-up message and verify history grows
|
||||
print("[TEST] Sending follow-up message...")
|
||||
client.set_value("ai_input", "What happened?")
|
||||
client.click("btn_gen_send")
|
||||
|
||||
# Wait for mock to finish (polling history)
|
||||
print("[TEST] Waiting for final history entry (max 30s)...")
|
||||
final_message_received = False
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < 30:
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
if len(entries) >= 3:
|
||||
final_message_received = True
|
||||
break
|
||||
# Print snapshot for debug
|
||||
if int(time.time() - start_time) % 5 == 0:
|
||||
print(f"[TEST] History length at {int(time.time() - start_time)}s: {len(entries)}")
|
||||
time.sleep(1.0)
|
||||
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
# Should have:
|
||||
# 1. User: Deny me
|
||||
# 2. AI: Tool execution was denied...
|
||||
# 3. User: What happened?
|
||||
# 4. AI or System: ...
|
||||
print(f"[TEST] Final history length: {len(entries)}")
|
||||
for i, entry in enumerate(entries):
|
||||
print(f" {i}: {entry.get('role')} - {entry.get('content')[:30]}...")
|
||||
|
||||
assert len(entries) >= 3
|
||||
|
||||
|
||||
52
tests/test_gemini_cli_parity_regression.py
Normal file
52
tests/test_gemini_cli_parity_regression.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add project root to sys.path
|
||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
|
||||
import ai_client
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_ai_client():
|
||||
ai_client.reset_session()
|
||||
ai_client.set_provider("gemini_cli", "gemini-2.5-flash")
|
||||
ai_client.confirm_and_run_callback = lambda script, base_dir: "Mocked execution"
|
||||
ai_client.comms_log_callback = lambda entry: None
|
||||
ai_client.tool_log_callback = lambda script, result: None
|
||||
yield
|
||||
|
||||
@patch('ai_client.GeminiCliAdapter')
|
||||
@patch('ai_client._get_combined_system_prompt')
|
||||
def test_send_invokes_adapter_send(mock_prompt, mock_adapter_class):
|
||||
mock_prompt.return_value = "Mocked Prompt"
|
||||
mock_instance = mock_adapter_class.return_value
|
||||
mock_instance.send.return_value = {"text": "Done", "tool_calls": []}
|
||||
mock_instance.last_usage = {"input_tokens": 10}
|
||||
mock_instance.last_latency = 0.1
|
||||
mock_instance.session_id = None
|
||||
|
||||
ai_client.send("context", "message", discussion_history="hist")
|
||||
|
||||
expected_payload = "[DISCUSSION HISTORY]\n\nhist\n\n---\n\nmessage"
|
||||
assert mock_instance.send.called
|
||||
args, kwargs = mock_instance.send.call_args
|
||||
assert args[0] == expected_payload
|
||||
assert kwargs['system_instruction'] == "Mocked Prompt\n\n<context>\ncontext\n</context>"
|
||||
|
||||
@patch('ai_client.GeminiCliAdapter')
|
||||
def test_get_history_bleed_stats(mock_adapter_class):
|
||||
mock_instance = mock_adapter_class.return_value
|
||||
mock_instance.send.return_value = {"text": "txt", "tool_calls": []}
|
||||
mock_instance.last_usage = {"input_tokens": 1500}
|
||||
mock_instance.last_latency = 0.5
|
||||
mock_instance.session_id = "sess"
|
||||
|
||||
# Initialize by sending a message
|
||||
ai_client.send("context", "msg")
|
||||
|
||||
stats = ai_client.get_history_bleed_stats()
|
||||
|
||||
assert stats["provider"] == "gemini_cli"
|
||||
assert stats["current"] == 1500
|
||||
Reference in New Issue
Block a user