# ai_client.py from __future__ import annotations """ Note(Gemini): Acts as the unified interface for multiple LLM providers (Anthropic, Gemini). Abstracts away the differences in how they handle tool schemas, history, and caching. For Anthropic: aggressively manages the ~200k token limit by manually culling stale [FILES UPDATED] entries and dropping the oldest message pairs. For Gemini: injects the initial context directly into system_instruction during chat creation to avoid massive history bloat. """ # ai_client.py import tomllib import json import sys import time import datetime import hashlib import difflib import threading import requests # type: ignore[import-untyped] from typing import Optional, Callable, Any, List, Union, cast, Iterable import os import project_manager import file_cache import mcp_client import anthropic from gemini_cli_adapter import GeminiCliAdapter as GeminiCliAdapter from google import genai from google.genai import types from events import EventEmitter _provider: str = "gemini" _model: str = "gemini-2.5-flash-lite" _temperature: float = 0.0 _max_tokens: int = 8192 _history_trunc_limit: int = 8000 # Global event emitter for API lifecycle events events: EventEmitter = EventEmitter() def set_model_params(temp: float, max_tok: int, trunc_limit: int = 8000) -> None: global _temperature, _max_tokens, _history_trunc_limit _temperature = temp _max_tokens = max_tok _history_trunc_limit = trunc_limit def get_history_trunc_limit() -> int: return _history_trunc_limit def set_history_trunc_limit(val: int) -> None: global _history_trunc_limit _history_trunc_limit = val _gemini_client: Optional[genai.Client] = None _gemini_chat: Any = None _gemini_cache: Any = None _gemini_cache_md_hash: Optional[str] = None _gemini_cache_created_at: Optional[float] = None # Gemini cache TTL in seconds. Caches are created with this TTL and # proactively rebuilt at 90% of this value to avoid stale-reference errors. _GEMINI_CACHE_TTL: int = 3600 _anthropic_client: Optional[anthropic.Anthropic] = None _anthropic_history: list[dict[str, Any]] = [] _anthropic_history_lock: threading.Lock = threading.Lock() _deepseek_client: Any = None _deepseek_history: list[dict[str, Any]] = [] _deepseek_history_lock: threading.Lock = threading.Lock() _send_lock: threading.Lock = threading.Lock() _gemini_cli_adapter: Optional[GeminiCliAdapter] = None # Injected by gui.py - called when AI wants to run a command. confirm_and_run_callback: Optional[Callable[[str, str, Optional[Callable[[str], str]]], Optional[str]]] = None # Injected by gui.py - called whenever a comms entry is appended. comms_log_callback: Optional[Callable[[dict[str, Any]], None]] = None # Injected by gui.py - called whenever a tool call completes. tool_log_callback: Optional[Callable[[str, str], None]] = None # Set by caller tiers before ai_client.send(); cleared in finally. current_tier: Optional[str] = None # Increased to allow thorough code exploration before forcing a summary MAX_TOOL_ROUNDS: int = 10 # Maximum cumulative bytes of tool output allowed per send() call. _MAX_TOOL_OUTPUT_BYTES: int = 500_000 # Maximum characters per text chunk sent to Anthropic. _ANTHROPIC_CHUNK_SIZE: int = 120_000 _SYSTEM_PROMPT: str = ( "You are a helpful coding assistant with access to a PowerShell tool and MCP tools (file access: read_file, list_directory, search_files, get_file_summary, web access: web_search, fetch_url). " "When calling file/directory tools, always use the 'path' parameter for the target path. " "When asked to create or edit files, prefer targeted edits over full rewrites. " "Always explain what you are doing before invoking the tool.\n\n" "When writing or rewriting large files (especially those containing quotes, backticks, or special characters), " "avoid python -c with inline strings. Instead: (1) write a .py helper script to disk using a PS here-string " "(@'...'@ for literal content), (2) run it with `python