Files
manual_slop/src/gemini_cli_adapter.py
2026-03-08 03:11:11 -04:00

192 lines
6.5 KiB
Python

"""
Gemini CLI Adapter - Subprocess wrapper for the `gemini` CLI tool.
This module provides an adapter for running the Google Gemini CLI as a subprocess,
parsing its streaming JSON output, and handling session management.
Key Features:
- Streaming JSON output parsing (init, message, chunk, tool_use, result)
- Session persistence via --resume flag
- Non-blocking line-by-line reading with stream_callback
- Token estimation via character count heuristic (4 chars/token)
- CLI call logging via session_logger
Integration:
- Used by ai_client.py as the 'gemini_cli' provider
- Enables synchronous HITL bridge via GEMINI_CLI_HOOK_CONTEXT env var
Thread Safety:
- Each GeminiCliAdapter instance maintains its own session_id
- Not thread-safe. Use separate instances per thread.
Configuration:
- binary_path: Path to the `gemini` CLI (from project config [gemini_cli].binary_path)
Output Protocol:
The CLI emits JSON-L lines:
{"type": "init", "session_id": "..."}
{"type": "message", "content": "...", "role": "assistant"}
{"type": "tool_use", "name": "...", "parameters": {...}}
{"type": "result", "status": "success", "stats": {"total_tokens": N}}
See Also:
- docs/guide_architecture.md for CLI adapter integration
- src/ai_client.py for provider dispatch
"""
import subprocess
import json
import os
import time
import sys
from src import session_logger
from typing import Optional, Callable, Any
class GeminiCliAdapter:
"""
Adapter for the Gemini CLI that parses streaming JSON output.
"""
def __init__(self, binary_path: str = "gemini"):
self.binary_path = binary_path
self.session_id: Optional[str] = None
self.last_usage: Optional[dict[str, Any]] = None
self.last_latency: float = 0.0
def send(self, message: str, safety_settings: list[Any] | None = None, system_instruction: str | None = None,
model: str | None = None, stream_callback: Optional[Callable[[str], None]] = None) -> dict[str, Any]:
"""
Sends a message to the Gemini CLI and processes the streaming JSON output.
Uses non-blocking line-by-line reading to allow stream_callback.
"""
start_time = time.time()
command_parts = [self.binary_path]
if model:
command_parts.extend(['-m', f'"{model}"'])
command_parts.extend(['--prompt', '""'])
if self.session_id:
command_parts.extend(['--resume', self.session_id])
command_parts.extend(['--output-format', 'stream-json'])
command = " ".join(command_parts)
prompt_text = message
if system_instruction:
prompt_text = f"{system_instruction}\n\n{message}"
accumulated_text = ""
tool_calls = []
stdout_content = []
env = os.environ.copy()
env["GEMINI_CLI_HOOK_CONTEXT"] = "manual_slop"
import shlex
# shlex.split handles quotes correctly even on Windows if we are careful.
# We want to split the entire binary_path into its components.
if os.name == 'nt':
# On Windows, shlex.split with default posix=True might swallow backslashes.
# Using posix=False is better for Windows paths.
cmd_list = shlex.split(self.binary_path, posix=False)
else:
cmd_list = shlex.split(self.binary_path)
if model:
cmd_list.extend(['-m', model])
cmd_list.extend(['--prompt', '""'])
if self.session_id:
cmd_list.extend(['--resume', self.session_id])
cmd_list.extend(['--output-format', 'stream-json'])
# Filter out empty strings and strip quotes (Popen doesn't want them in cmd_list elements)
cmd_list = [c.strip('"') for c in cmd_list if c]
sys.stderr.write(f"[DEBUG] GeminiCliAdapter cmd_list: {cmd_list}\n")
sys.stderr.flush()
process = subprocess.Popen(
cmd_list,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
encoding="utf-8",
shell=False,
env=env
)
# Use communicate to avoid pipe deadlocks with large input/output.
# This blocks until the process exits, so we lose real-time streaming,
# but it's much more robust. We then simulate streaming by processing the output.
try:
stdout_final, stderr_final = process.communicate(input=prompt_text, timeout=60.0)
except subprocess.TimeoutExpired:
process.kill()
stdout_final, stderr_final = process.communicate()
stderr_final += "\n\n[ERROR] Gemini CLI subprocess timed out after 60 seconds."
# Mock a JSON error result to bubble up
stdout_final += '\n{"type": "result", "status": "error", "error": "subprocess timeout"}\n'
last_decode_error = None
for line in stdout_final.splitlines():
line = line.strip()
if not line: continue
stdout_content.append(line)
try:
data = json.loads(line)
msg_type = data.get("type")
if msg_type == "init":
if "session_id" in data:
self.session_id = data.get("session_id")
elif msg_type == "message" or msg_type == "chunk":
role = data.get("role", "")
if role in ["assistant", "model"] or not role:
content = data.get("content", data.get("text"))
if content:
accumulated_text += content
if stream_callback:
stream_callback(content)
elif msg_type == "result":
self.last_usage = data.get("stats") or data.get("usage")
if data.get("status") == "error":
raise Exception(data.get("error", "Unknown CLI error"))
if "session_id" in data:
self.session_id = data.get("session_id")
elif msg_type == "tool_use":
tc = {
"name": data.get("tool_name", data.get("name")),
"args": data.get("parameters", data.get("args", {})),
"id": data.get("tool_id", data.get("id"))
}
if tc["name"]:
tool_calls.append(tc)
except json.JSONDecodeError as e:
last_decode_error = e
continue
current_latency = time.time() - start_time
if process.returncode != 0 and not accumulated_text and not tool_calls:
if last_decode_error:
raise Exception(f"Gemini CLI failed (exit {process.returncode}) with JSONDecodeError: {last_decode_error}\nOutput: {stdout_final}")
raise Exception(f"Gemini CLI failed with exit {process.returncode}\nStderr: {stderr_final}")
session_logger.open_session()
session_logger.log_cli_call(
command=command,
stdin_content=prompt_text,
stdout_content="\n".join(stdout_content),
stderr_content=stderr_final,
latency=current_latency
)
self.last_latency = current_latency
return {
"text": accumulated_text,
"tool_calls": tool_calls,
"stderr": stderr_final
}
def count_tokens(self, contents: list[str]) -> int:
"""
Provides a character-based token estimation for the Gemini CLI.
Uses 4 chars/token as a conservative average.
"""
total_chars = len("\n".join(contents))
return total_chars // 4