# ai_client.py """ Note(Gemini): Acts as the unified interface for multiple LLM providers (Anthropic, Gemini). Abstracts away the differences in how they handle tool schemas, history, and caching. For Anthropic: aggressively manages the ~200k token limit by manually culling stale [FILES UPDATED] entries and dropping the oldest message pairs. For Gemini: injects the initial context directly into system_instruction during chat creation to avoid massive history bloat. """ # ai_client.py import tomllib import json import sys import time import datetime import hashlib import difflib import threading import requests from pathlib import Path from typing import Optional, Callable, Any import os import project_manager import file_cache import mcp_client import anthropic from gemini_cli_adapter import GeminiCliAdapter from google import genai from google.genai import types from events import EventEmitter _provider: str = "gemini" _model: str = "gemini-2.5-flash-lite" _temperature: float = 0.0 _max_tokens: int = 8192 _history_trunc_limit: int = 8000 # Global event emitter for API lifecycle events events: EventEmitter = EventEmitter() def set_model_params(temp: float, max_tok: int, trunc_limit: int = 8000) -> None: global _temperature, _max_tokens, _history_trunc_limit _temperature = temp _max_tokens = max_tok _history_trunc_limit = trunc_limit def get_history_trunc_limit() -> int: return _history_trunc_limit def set_history_trunc_limit(val: int) -> None: global _history_trunc_limit _history_trunc_limit = val _gemini_client: genai.Client | None = None _gemini_chat: Any = None _gemini_cache: Any = None _gemini_cache_md_hash: int | None = None _gemini_cache_created_at: float | None = None # Gemini cache TTL in seconds. Caches are created with this TTL and # proactively rebuilt at 90% of this value to avoid stale-reference errors. _GEMINI_CACHE_TTL: int = 3600 _anthropic_client: anthropic.Anthropic | None = None _anthropic_history: list[dict] = [] _anthropic_history_lock: threading.Lock = threading.Lock() _deepseek_client: Any = None _deepseek_history: list[dict] = [] _deepseek_history_lock: threading.Lock = threading.Lock() _send_lock: threading.Lock = threading.Lock() _gemini_cli_adapter: GeminiCliAdapter | None = None # Injected by gui.py - called when AI wants to run a command. # Signature: (script: str, base_dir: str) -> str | None confirm_and_run_callback: Callable[[str, str], str | None] | None = None # Injected by gui.py - called whenever a comms entry is appended. # Signature: (entry: dict) -> None comms_log_callback: Callable[[dict[str, Any]], None] | None = None # Injected by gui.py - called whenever a tool call completes. # Signature: (script: str, result: str) -> None tool_log_callback: Callable[[str, str], None] | None = None # Increased to allow thorough code exploration before forcing a summary MAX_TOOL_ROUNDS: int = 10 # Maximum cumulative bytes of tool output allowed per send() call. # Prevents unbounded memory growth during long tool-calling loops. _MAX_TOOL_OUTPUT_BYTES: int = 500_000 # Maximum characters per text chunk sent to Anthropic. # Kept well under the ~200k token API limit. _ANTHROPIC_CHUNK_SIZE: int = 120_000 _SYSTEM_PROMPT: str = ( "You are a helpful coding assistant with access to a PowerShell tool and MCP tools (file access: read_file, list_directory, search_files, get_file_summary, web access: web_search, fetch_url). " "When calling file/directory tools, always use the 'path' parameter for the target path. " "When asked to create or edit files, prefer targeted edits over full rewrites. " "Always explain what you are doing before invoking the tool.\n\n" "When writing or rewriting large files (especially those containing quotes, backticks, or special characters), " "avoid python -c with inline strings. Instead: (1) write a .py helper script to disk using a PS here-string " "(@'...'@ for literal content), (2) run it with `python