From 0258a41c47d1683f0c9e7449fc5fdc8fba599cd4 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sat, 21 Feb 2026 15:53:56 -0500 Subject: [PATCH] rever client for now bugged --- ai_client.py | 129 +++++++-------------------------------------------- 1 file changed, 18 insertions(+), 111 deletions(-) diff --git a/ai_client.py b/ai_client.py index 7f05eed..8d93e28 100644 --- a/ai_client.py +++ b/ai_client.py @@ -14,7 +14,7 @@ _anthropic_client = None _anthropic_history: list[dict] = [] # Injected by gui.py - called when AI wants to run a command. -# Signature: (script: str, base_dir: str) -> str | None +# Signature: (script: str) -> str | None # Returns the output string if approved, None if rejected. confirm_and_run_callback = None @@ -22,20 +22,8 @@ confirm_and_run_callback = None # Signature: (entry: dict) -> None comms_log_callback = None -# Injected by gui.py - called whenever a tool call completes. -# Signature: (script: str, result: str, script_path: str | None) -> None -tool_log_callback = None - MAX_TOOL_ROUNDS = 5 -# Anthropic system prompt - cached as the first turn so it counts toward -# the prompt-cache prefix on every subsequent request. -_ANTHROPIC_SYSTEM = ( - "You are a helpful coding assistant with access to a PowerShell tool. " - "When asked to create or edit files, prefer targeted edits over full rewrites. " - "Always explain what you are doing before invoking the tool." -) - # ------------------------------------------------------------------ comms log _comms_log: list[dict] = [] @@ -154,6 +142,8 @@ def _classify_anthropic_error(exc: Exception) -> ProviderError: def _classify_gemini_error(exc: Exception) -> ProviderError: """Map a google-genai SDK exception to a ProviderError.""" body = str(exc).lower() + # google-genai surfaces HTTP errors as google.api_core exceptions or + # google.genai exceptions; inspect the message text as a reliable fallback. try: from google.api_core import exceptions as gac if isinstance(exc, gac.ResourceExhausted): @@ -166,6 +156,7 @@ def _classify_gemini_error(exc: Exception) -> ProviderError: return ProviderError("network", "gemini", exc) except ImportError: pass + # Fallback: parse status code / message string if "429" in body or "quota" in body or "resource exhausted" in body: return ProviderError("quota", "gemini", exc) if "rate" in body and "limit" in body: @@ -288,20 +279,12 @@ def _run_script(script: str, base_dir: str) -> str: """ Delegate to the GUI confirmation callback. Returns result string (stdout/stderr) or a rejection message. - Also fires tool_log_callback if registered. """ if confirm_and_run_callback is None: return "ERROR: no confirmation handler registered" - # confirm_and_run_callback returns (result, script_path) or None - outcome = confirm_and_run_callback(script, base_dir) - if outcome is None: - result = "USER REJECTED: command was not executed" - if tool_log_callback is not None: - tool_log_callback(script, result, None) - return result - result, script_path = outcome - if tool_log_callback is not None: - tool_log_callback(script, result, script_path) + result = confirm_and_run_callback(script, base_dir) + if result is None: + return "USER REJECTED: command was not executed" return result # ------------------------------------------------------------------ gemini @@ -338,6 +321,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str: response = _gemini_chat.send_message(full_message) for round_idx in range(MAX_TOOL_ROUNDS): + # Log the raw response candidates as text summary text_parts_raw = [ part.text for candidate in response.candidates @@ -399,35 +383,6 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str: raise _classify_gemini_error(exc) from exc # ------------------------------------------------------------------ anthropic -# -# Caching strategy (Anthropic prompt caching): -# -# The Anthropic API caches a prefix of the input tokens. To maximise hits: -# -# 1. A persistent system prompt is sent on every request with -# cache_control={"type":"ephemeral"} so it is cached after the first call -# and reused on subsequent calls within the 5-minute TTL window. -# -# 2. The context block (aggregated markdown) is placed as the FIRST user -# message in the history and also marked with cache_control. Because the -# system prompt and the context are stable across tool-use rounds within a -# single send() call, the cache hit rate is very high after round 0. -# -# 3. Tool definitions are passed with cache_control on the last tool so the -# entire tools array is also cached. -# -# Token accounting: the response payload contains cache_creation_input_tokens -# and cache_read_input_tokens in addition to the regular input_tokens field. -# These are included in the comms log under "usage". - -def _anthropic_tools_with_cache() -> list[dict]: - """Return the tools list with cache_control on the last entry.""" - import copy - tools = copy.deepcopy(_ANTHROPIC_TOOLS) - # Mark the last tool so the entire prefix (system + tools) gets cached - tools[-1]["cache_control"] = {"type": "ephemeral"} - return tools - def _ensure_anthropic_client(): global _anthropic_client @@ -436,7 +391,6 @@ def _ensure_anthropic_client(): creds = _load_credentials() _anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"]) - def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str: global _anthropic_history import anthropic @@ -444,54 +398,19 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str: try: _ensure_anthropic_client() - # ---------------------------------------------------------------- - # Build the user turn. - # - # Structure the content as two blocks so the large context portion - # can be cached independently of the user question: - # - # [0] context block <- cache_control applied here - # [1] user question <- not cached (changes every turn) - # - # The Anthropic cache anchors at the LAST cache_control marker in - # the prefix, so everything up to and including the context block - # will be served from cache on subsequent rounds. - # ---------------------------------------------------------------- - user_content = [ - { - "type": "text", - "text": f"\n{md_content}\n", - "cache_control": {"type": "ephemeral"}, - }, - { - "type": "text", - "text": user_message, - }, - ] - - _anthropic_history.append({"role": "user", "content": user_content}) + full_message = f"\n{md_content}\n\n\n{user_message}" + _anthropic_history.append({"role": "user", "content": full_message}) _append_comms("OUT", "request", { - "message": f"\n{md_content}\n\n\n{user_message}", + "message": full_message, }) for round_idx in range(MAX_TOOL_ROUNDS): response = _anthropic_client.messages.create( model=_model, max_tokens=8096, - system=[ - { - "type": "text", - "text": _ANTHROPIC_SYSTEM, - "cache_control": {"type": "ephemeral"}, - } - ], - tools=_anthropic_tools_with_cache(), - messages=_anthropic_history, - # Ask the API to return cache token counts - # betas=["prompt-caching-2024-07-31"], - # TODO(Claude): betas is not a valid field: - # ERROR: Messages.create() got an unexpected keyword argument 'betas' + tools=_ANTHROPIC_TOOLS, + messages=_anthropic_history ) _anthropic_history.append({ @@ -499,34 +418,22 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str: "content": response.content }) + # Summarise the response content for the log text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text] tool_use_blocks = [ {"id": b.id, "name": b.name, "input": b.input} for b in response.content if b.type == "tool_use" ] - - # Extended usage includes cache fields when the beta header is set - usage_dict: dict = {} - if response.usage: - usage_dict = { - "input_tokens": response.usage.input_tokens, - "output_tokens": response.usage.output_tokens, - } - # cache fields are present when the beta is active - cache_creation = getattr(response.usage, "cache_creation_input_tokens", None) - cache_read = getattr(response.usage, "cache_read_input_tokens", None) - if cache_creation is not None: - usage_dict["cache_creation_input_tokens"] = cache_creation - if cache_read is not None: - usage_dict["cache_read_input_tokens"] = cache_read - _append_comms("IN", "response", { "round": round_idx, "stop_reason": response.stop_reason, "text": "\n".join(text_blocks), "tool_calls": tool_use_blocks, - "usage": usage_dict, + "usage": { + "input_tokens": response.usage.input_tokens, + "output_tokens": response.usage.output_tokens, + } if response.usage else {}, }) if response.stop_reason != "tool_use":