# ai_client.py from __future__ import annotations """ Note(Gemini): Acts as the unified interface for multiple LLM providers (Anthropic, Gemini). Abstracts away the differences in how they handle tool schemas, history, and caching. For Anthropic: aggressively manages the ~200k token limit by manually culling stale [FILES UPDATED] entries and dropping the oldest message pairs. For Gemini: injects the initial context directly into system_instruction during chat creation to avoid massive history bloat. """ # ai_client.py import tomllib import json import sys import time import datetime import hashlib import difflib import threading import requests from typing import Optional, Callable, Any import os import project_manager import file_cache import mcp_client import anthropic from gemini_cli_adapter import GeminiCliAdapter from google import genai from google.genai import types from events import EventEmitter _provider: str = "gemini" _model: str = "gemini-2.5-flash-lite" _temperature: float = 0.0 _max_tokens: int = 8192 _history_trunc_limit: int = 8000 # Global event emitter for API lifecycle events events: EventEmitter = EventEmitter() def set_model_params(temp: float, max_tok: int, trunc_limit: int = 8000) -> None: global _temperature, _max_tokens, _history_trunc_limit _temperature = temp _max_tokens = max_tok _history_trunc_limit = trunc_limit def get_history_trunc_limit() -> int: return _history_trunc_limit def set_history_trunc_limit(val: int) -> None: global _history_trunc_limit _history_trunc_limit = val _gemini_client: genai.Client | None = None _gemini_chat: Any = None _gemini_cache: Any = None _gemini_cache_md_hash: int | None = None _gemini_cache_created_at: float | None = None # Gemini cache TTL in seconds. Caches are created with this TTL and # proactively rebuilt at 90% of this value to avoid stale-reference errors. _GEMINI_CACHE_TTL: int = 3600 _anthropic_client: anthropic.Anthropic | None = None _anthropic_history: list[dict] = [] _anthropic_history_lock: threading.Lock = threading.Lock() _deepseek_client: Any = None _deepseek_history: list[dict] = [] _deepseek_history_lock: threading.Lock = threading.Lock() _send_lock: threading.Lock = threading.Lock() _gemini_cli_adapter: GeminiCliAdapter | None = None # Injected by gui.py - called when AI wants to run a command. # Signature: (script: str, base_dir: str) -> str | None confirm_and_run_callback: Callable[[str, str], str | None] | None = None # Injected by gui.py - called whenever a comms entry is appended. # Signature: (entry: dict) -> None comms_log_callback: Callable[[dict[str, Any]], None] | None = None # Injected by gui.py - called whenever a tool call completes. # Signature: (script: str, result: str) -> None tool_log_callback: Callable[[str, str], None] | None = None # Increased to allow thorough code exploration before forcing a summary MAX_TOOL_ROUNDS: int = 10 # Maximum cumulative bytes of tool output allowed per send() call. # Prevents unbounded memory growth during long tool-calling loops. _MAX_TOOL_OUTPUT_BYTES: int = 500_000 # Maximum characters per text chunk sent to Anthropic. # Kept well under the ~200k token API limit. _ANTHROPIC_CHUNK_SIZE: int = 120_000 _SYSTEM_PROMPT: str = ( "You are a helpful coding assistant with access to a PowerShell tool and MCP tools (file access: read_file, list_directory, search_files, get_file_summary, web access: web_search, fetch_url). " "When calling file/directory tools, always use the 'path' parameter for the target path. " "When asked to create or edit files, prefer targeted edits over full rewrites. " "Always explain what you are doing before invoking the tool.\n\n" "When writing or rewriting large files (especially those containing quotes, backticks, or special characters), " "avoid python -c with inline strings. Instead: (1) write a .py helper script to disk using a PS here-string " "(@'...'@ for literal content), (2) run it with `python