checkpoint: Working on getting gemini cli to actually have parity with gemini api.

This commit is contained in:
2026-02-26 00:31:33 -05:00
parent cbe359b1a5
commit a70680b2a2
14 changed files with 710 additions and 243 deletions

View File

@@ -8,6 +8,7 @@ from api_hook_client import ApiHookClient
def test_gemini_cli_full_integration(live_gui):
"""
Integration test for the Gemini CLI provider and tool bridge.
Handles 'ask_received' events from the bridge and any other approval requests.
"""
client = ApiHookClient("http://127.0.0.1:8999")
@@ -18,21 +19,19 @@ def test_gemini_cli_full_integration(live_gui):
client.select_list_item("proj_files", "manual_slop")
# 1. Setup paths and configure the GUI
# Use the real gemini CLI if available, otherwise use mock
# For CI/testing we prefer mock
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
# Wrap in quotes for shell execution if path has spaces
cli_cmd = f'"{sys.executable}" "{mock_script}"'
# Set provider and binary path via GUI hooks
# Note: Using set_value which now triggers the property setter in gui_2.py
print(f"[TEST] Setting current_provider to gemini_cli")
client.set_value("current_provider", "gemini_cli")
print(f"[TEST] Setting gcli_path to {cli_cmd}")
client.set_value("gcli_path", cli_cmd)
# Verify settings were applied
# Verify settings
assert client.get_value("current_provider") == "gemini_cli"
assert client.get_value("gcli_path") == cli_cmd
# Clear events
client.get_events()
@@ -41,55 +40,48 @@ def test_gemini_cli_full_integration(live_gui):
client.set_value("ai_input", "Please read test.txt")
client.click("btn_gen_send")
# 3. Monitor for the 'ask_received' event
print("[TEST] Waiting for ask_received event...")
request_id = None
timeout = 30
# 3. Monitor for approval events
print("[TEST] Waiting for approval events...")
timeout = 45
start_time = time.time()
approved_count = 0
while time.time() - start_time < timeout:
events = client.get_events()
if events:
print(f"[TEST] Received {len(events)} events: {[e.get('type') for e in events]}")
for ev in events:
if ev.get("type") == "ask_received":
request_id = ev.get("request_id")
print(f"[TEST] Found request_id: {request_id}")
break
if request_id:
break
time.sleep(0.5)
assert request_id is not None, "Timed out waiting for 'ask_received' event from the bridge"
# 4. Respond to the permission request
print("[TEST] Responding to ask with approval")
resp = requests.post(
"http://127.0.0.1:8999/api/ask/respond",
json={
"request_id": request_id,
"response": {"approved": True}
}
)
assert resp.status_code == 200
# 5. Verify that the final response is displayed in the GUI
print("[TEST] Waiting for final message in history...")
final_message_received = False
start_time = time.time()
while time.time() - start_time < timeout:
for ev in events:
etype = ev.get("type")
eid = ev.get("request_id") or ev.get("action_id")
print(f"[TEST] Received event: {etype} (ID: {eid})")
if etype in ["ask_received", "glob_approval_required", "script_confirmation_required"]:
print(f"[TEST] Approving {etype} {eid}")
if etype == "script_confirmation_required":
resp = requests.post(f"http://127.0.0.1:8999/api/confirm/{eid}", json={"approved": True})
else:
resp = requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": eid, "response": {"approved": True}})
assert resp.status_code == 200
approved_count += 1
# Check if we got a final response in history
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
found_final = False
for entry in entries:
content = entry.get("content", "")
if "Hello from mock!" in content:
print(f"[TEST] Success! Found message: {content[:50]}...")
final_message_received = True
if "Hello from mock!" in content or "processed the tool results" in content:
print(f"[TEST] Success! Found final message in history.")
found_final = True
break
if final_message_received:
if found_final:
break
time.sleep(1.0)
assert final_message_received, "Final message from mock CLI was not found in the GUI history"
assert approved_count > 0, "No approval events were processed"
assert found_final, "Final message from mock CLI was not found in the GUI history"
def test_gemini_cli_rejection_and_history(live_gui):
"""
@@ -97,88 +89,53 @@ def test_gemini_cli_rejection_and_history(live_gui):
"""
client = ApiHookClient("http://127.0.0.1:8999")
# 0. Reset session and enable history
# 0. Reset session
client.click("btn_reset")
client.set_value("auto_add_history", True)
# Switch to manual_slop project explicitly
client.select_list_item("proj_files", "manual_slop")
# 1. Setup paths and configure the GUI
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
# 2. Trigger a message that will be denied
# 2. Trigger a message
print("[TEST] Sending user message (to be denied)...")
client.set_value("ai_input", "Deny me")
client.click("btn_gen_send")
# 3. Wait for 'ask_received' and respond with rejection
request_id = None
timeout = 15
# 3. Wait for event and reject
timeout = 20
start_time = time.time()
denied = False
while time.time() - start_time < timeout:
for ev in client.get_events():
if ev.get("type") == "ask_received":
request_id = ev.get("request_id")
etype = ev.get("type")
eid = ev.get("request_id")
print(f"[TEST] Received event: {etype}")
if etype == "ask_received":
print(f"[TEST] Denying request {eid}")
requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": eid, "response": {"approved": False}})
denied = True
break
if request_id: break
if denied: break
time.sleep(0.5)
assert request_id is not None
assert denied, "No ask_received event to deny"
print("[TEST] Responding to ask with REJECTION")
requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": request_id, "response": {"approved": False}})
# 4. Verify rejection message in history
print("[TEST] Waiting for rejection message in history...")
# 4. Verify rejection in history
print("[TEST] Waiting for rejection in history...")
rejection_found = False
start_time = time.time()
while time.time() - start_time < timeout:
while time.time() - start_time < 20:
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
for entry in entries:
if "Tool execution was denied. Decision: deny" in entry.get("content", ""):
if "Tool execution was denied" in entry.get("content", ""):
rejection_found = True
break
if rejection_found: break
time.sleep(1.0)
assert rejection_found, "Rejection message not found in history"
# 5. Send a follow-up message and verify history grows
print("[TEST] Sending follow-up message...")
client.set_value("ai_input", "What happened?")
client.click("btn_gen_send")
# Wait for mock to finish (polling history)
print("[TEST] Waiting for final history entry (max 30s)...")
final_message_received = False
start_time = time.time()
while time.time() - start_time < 30:
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
if len(entries) >= 3:
final_message_received = True
break
# Print snapshot for debug
if int(time.time() - start_time) % 5 == 0:
print(f"[TEST] History length at {int(time.time() - start_time)}s: {len(entries)}")
time.sleep(1.0)
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
# Should have:
# 1. User: Deny me
# 2. AI: Tool execution was denied...
# 3. User: What happened?
# 4. AI or System: ...
print(f"[TEST] Final history length: {len(entries)}")
for i, entry in enumerate(entries):
print(f" {i}: {entry.get('role')} - {entry.get('content')[:30]}...")
assert len(entries) >= 3