diff --git a/ai_client.py b/ai_client.py
index 7f05eed..8d93e28 100644
--- a/ai_client.py
+++ b/ai_client.py
@@ -14,7 +14,7 @@ _anthropic_client = None
_anthropic_history: list[dict] = []
# Injected by gui.py - called when AI wants to run a command.
-# Signature: (script: str, base_dir: str) -> str | None
+# Signature: (script: str) -> str | None
# Returns the output string if approved, None if rejected.
confirm_and_run_callback = None
@@ -22,20 +22,8 @@ confirm_and_run_callback = None
# Signature: (entry: dict) -> None
comms_log_callback = None
-# Injected by gui.py - called whenever a tool call completes.
-# Signature: (script: str, result: str, script_path: str | None) -> None
-tool_log_callback = None
-
MAX_TOOL_ROUNDS = 5
-# Anthropic system prompt - cached as the first turn so it counts toward
-# the prompt-cache prefix on every subsequent request.
-_ANTHROPIC_SYSTEM = (
- "You are a helpful coding assistant with access to a PowerShell tool. "
- "When asked to create or edit files, prefer targeted edits over full rewrites. "
- "Always explain what you are doing before invoking the tool."
-)
-
# ------------------------------------------------------------------ comms log
_comms_log: list[dict] = []
@@ -154,6 +142,8 @@ def _classify_anthropic_error(exc: Exception) -> ProviderError:
def _classify_gemini_error(exc: Exception) -> ProviderError:
"""Map a google-genai SDK exception to a ProviderError."""
body = str(exc).lower()
+ # google-genai surfaces HTTP errors as google.api_core exceptions or
+ # google.genai exceptions; inspect the message text as a reliable fallback.
try:
from google.api_core import exceptions as gac
if isinstance(exc, gac.ResourceExhausted):
@@ -166,6 +156,7 @@ def _classify_gemini_error(exc: Exception) -> ProviderError:
return ProviderError("network", "gemini", exc)
except ImportError:
pass
+ # Fallback: parse status code / message string
if "429" in body or "quota" in body or "resource exhausted" in body:
return ProviderError("quota", "gemini", exc)
if "rate" in body and "limit" in body:
@@ -288,20 +279,12 @@ def _run_script(script: str, base_dir: str) -> str:
"""
Delegate to the GUI confirmation callback.
Returns result string (stdout/stderr) or a rejection message.
- Also fires tool_log_callback if registered.
"""
if confirm_and_run_callback is None:
return "ERROR: no confirmation handler registered"
- # confirm_and_run_callback returns (result, script_path) or None
- outcome = confirm_and_run_callback(script, base_dir)
- if outcome is None:
- result = "USER REJECTED: command was not executed"
- if tool_log_callback is not None:
- tool_log_callback(script, result, None)
- return result
- result, script_path = outcome
- if tool_log_callback is not None:
- tool_log_callback(script, result, script_path)
+ result = confirm_and_run_callback(script, base_dir)
+ if result is None:
+ return "USER REJECTED: command was not executed"
return result
# ------------------------------------------------------------------ gemini
@@ -338,6 +321,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str:
response = _gemini_chat.send_message(full_message)
for round_idx in range(MAX_TOOL_ROUNDS):
+ # Log the raw response candidates as text summary
text_parts_raw = [
part.text
for candidate in response.candidates
@@ -399,35 +383,6 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str:
raise _classify_gemini_error(exc) from exc
# ------------------------------------------------------------------ anthropic
-#
-# Caching strategy (Anthropic prompt caching):
-#
-# The Anthropic API caches a prefix of the input tokens. To maximise hits:
-#
-# 1. A persistent system prompt is sent on every request with
-# cache_control={"type":"ephemeral"} so it is cached after the first call
-# and reused on subsequent calls within the 5-minute TTL window.
-#
-# 2. The context block (aggregated markdown) is placed as the FIRST user
-# message in the history and also marked with cache_control. Because the
-# system prompt and the context are stable across tool-use rounds within a
-# single send() call, the cache hit rate is very high after round 0.
-#
-# 3. Tool definitions are passed with cache_control on the last tool so the
-# entire tools array is also cached.
-#
-# Token accounting: the response payload contains cache_creation_input_tokens
-# and cache_read_input_tokens in addition to the regular input_tokens field.
-# These are included in the comms log under "usage".
-
-def _anthropic_tools_with_cache() -> list[dict]:
- """Return the tools list with cache_control on the last entry."""
- import copy
- tools = copy.deepcopy(_ANTHROPIC_TOOLS)
- # Mark the last tool so the entire prefix (system + tools) gets cached
- tools[-1]["cache_control"] = {"type": "ephemeral"}
- return tools
-
def _ensure_anthropic_client():
global _anthropic_client
@@ -436,7 +391,6 @@ def _ensure_anthropic_client():
creds = _load_credentials()
_anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"])
-
def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
global _anthropic_history
import anthropic
@@ -444,54 +398,19 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
try:
_ensure_anthropic_client()
- # ----------------------------------------------------------------
- # Build the user turn.
- #
- # Structure the content as two blocks so the large context portion
- # can be cached independently of the user question:
- #
- # [0] context block <- cache_control applied here
- # [1] user question <- not cached (changes every turn)
- #
- # The Anthropic cache anchors at the LAST cache_control marker in
- # the prefix, so everything up to and including the context block
- # will be served from cache on subsequent rounds.
- # ----------------------------------------------------------------
- user_content = [
- {
- "type": "text",
- "text": f"\n{md_content}\n",
- "cache_control": {"type": "ephemeral"},
- },
- {
- "type": "text",
- "text": user_message,
- },
- ]
-
- _anthropic_history.append({"role": "user", "content": user_content})
+ full_message = f"\n{md_content}\n\n\n{user_message}"
+ _anthropic_history.append({"role": "user", "content": full_message})
_append_comms("OUT", "request", {
- "message": f"\n{md_content}\n\n\n{user_message}",
+ "message": full_message,
})
for round_idx in range(MAX_TOOL_ROUNDS):
response = _anthropic_client.messages.create(
model=_model,
max_tokens=8096,
- system=[
- {
- "type": "text",
- "text": _ANTHROPIC_SYSTEM,
- "cache_control": {"type": "ephemeral"},
- }
- ],
- tools=_anthropic_tools_with_cache(),
- messages=_anthropic_history,
- # Ask the API to return cache token counts
- # betas=["prompt-caching-2024-07-31"],
- # TODO(Claude): betas is not a valid field:
- # ERROR: Messages.create() got an unexpected keyword argument 'betas'
+ tools=_ANTHROPIC_TOOLS,
+ messages=_anthropic_history
)
_anthropic_history.append({
@@ -499,34 +418,22 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
"content": response.content
})
+ # Summarise the response content for the log
text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text]
tool_use_blocks = [
{"id": b.id, "name": b.name, "input": b.input}
for b in response.content
if b.type == "tool_use"
]
-
- # Extended usage includes cache fields when the beta header is set
- usage_dict: dict = {}
- if response.usage:
- usage_dict = {
- "input_tokens": response.usage.input_tokens,
- "output_tokens": response.usage.output_tokens,
- }
- # cache fields are present when the beta is active
- cache_creation = getattr(response.usage, "cache_creation_input_tokens", None)
- cache_read = getattr(response.usage, "cache_read_input_tokens", None)
- if cache_creation is not None:
- usage_dict["cache_creation_input_tokens"] = cache_creation
- if cache_read is not None:
- usage_dict["cache_read_input_tokens"] = cache_read
-
_append_comms("IN", "response", {
"round": round_idx,
"stop_reason": response.stop_reason,
"text": "\n".join(text_blocks),
"tool_calls": tool_use_blocks,
- "usage": usage_dict,
+ "usage": {
+ "input_tokens": response.usage.input_tokens,
+ "output_tokens": response.usage.output_tokens,
+ } if response.usage else {},
})
if response.stop_reason != "tool_use":