This commit is contained in:
2026-02-21 16:07:26 -05:00
parent 0258a41c47
commit d2568cd616
4 changed files with 154 additions and 66 deletions

View File

@@ -14,7 +14,7 @@ _anthropic_client = None
_anthropic_history: list[dict] = []
# Injected by gui.py - called when AI wants to run a command.
# Signature: (script: str) -> str | None
# Signature: (script: str, base_dir: str) -> str | None
# Returns the output string if approved, None if rejected.
confirm_and_run_callback = None
@@ -22,24 +22,26 @@ confirm_and_run_callback = None
# Signature: (entry: dict) -> None
comms_log_callback = None
# Injected by gui.py - called whenever a tool call completes (after run).
# Signature: (script: str, result: str) -> None
tool_log_callback = None
MAX_TOOL_ROUNDS = 5
# Anthropic system prompt - sent with cache_control so it is cached after the
# first request and reused on every subsequent call within the TTL window.
_ANTHROPIC_SYSTEM = (
"You are a helpful coding assistant with access to a PowerShell tool. "
"When asked to create or edit files, prefer targeted edits over full rewrites. "
"Always explain what you are doing before invoking the tool."
)
# ------------------------------------------------------------------ comms log
_comms_log: list[dict] = []
MAX_FIELD_CHARS = 400 # beyond this we show a truncated preview in the UI
def _clamp(value, max_chars: int = MAX_FIELD_CHARS) -> tuple[str, bool]:
"""Return (display_str, was_truncated)."""
if isinstance(value, (dict, list)):
s = json.dumps(value, ensure_ascii=False, indent=2)
else:
s = str(value)
if len(s) > max_chars:
return s[:max_chars], True
return s, False
def _append_comms(direction: str, kind: str, payload: dict):
"""
@@ -78,15 +80,6 @@ class ProviderError(Exception):
"""
Raised when the upstream API returns a hard error we want to surface
distinctly in the UI (quota, rate-limit, auth, balance, etc.).
Attributes
----------
kind : str
One of: "quota", "rate_limit", "auth", "balance", "network", "unknown"
provider : str
"gemini" or "anthropic"
original : Exception
The underlying SDK exception.
"""
def __init__(self, kind: str, provider: str, original: Exception):
self.kind = kind
@@ -94,7 +87,6 @@ class ProviderError(Exception):
self.original = original
super().__init__(str(original))
# Human-readable banner shown in the Response panel
def ui_message(self) -> str:
labels = {
"quota": "QUOTA EXHAUSTED",
@@ -109,7 +101,6 @@ class ProviderError(Exception):
def _classify_anthropic_error(exc: Exception) -> ProviderError:
"""Map an anthropic SDK exception to a ProviderError."""
try:
import anthropic
if isinstance(exc, anthropic.RateLimitError):
@@ -129,7 +120,6 @@ def _classify_anthropic_error(exc: Exception) -> ProviderError:
return ProviderError("auth", "anthropic", exc)
if status == 402:
return ProviderError("balance", "anthropic", exc)
# Anthropic puts credit-balance errors in the body at 400
if "credit" in body or "balance" in body or "billing" in body:
return ProviderError("balance", "anthropic", exc)
if "quota" in body or "limit" in body or "exceeded" in body:
@@ -140,10 +130,7 @@ def _classify_anthropic_error(exc: Exception) -> ProviderError:
def _classify_gemini_error(exc: Exception) -> ProviderError:
"""Map a google-genai SDK exception to a ProviderError."""
body = str(exc).lower()
# google-genai surfaces HTTP errors as google.api_core exceptions or
# google.genai exceptions; inspect the message text as a reliable fallback.
try:
from google.api_core import exceptions as gac
if isinstance(exc, gac.ResourceExhausted):
@@ -156,7 +143,6 @@ def _classify_gemini_error(exc: Exception) -> ProviderError:
return ProviderError("network", "gemini", exc)
except ImportError:
pass
# Fallback: parse status code / message string
if "429" in body or "quota" in body or "resource exhausted" in body:
return ProviderError("quota", "gemini", exc)
if "rate" in body and "limit" in body:
@@ -226,6 +212,9 @@ def _list_anthropic_models() -> list[str]:
TOOL_NAME = "run_powershell"
# The tool list for Anthropic. cache_control is placed on the last (only) tool
# so that the system-prompt + tools prefix is cached together after the first
# request and served from cache on every subsequent round.
_ANTHROPIC_TOOLS = [
{
"name": TOOL_NAME,
@@ -245,7 +234,8 @@ _ANTHROPIC_TOOLS = [
}
},
"required": ["script"]
}
},
"cache_control": {"type": "ephemeral"},
}
]
@@ -279,13 +269,18 @@ def _run_script(script: str, base_dir: str) -> str:
"""
Delegate to the GUI confirmation callback.
Returns result string (stdout/stderr) or a rejection message.
Also fires tool_log_callback if registered.
"""
if confirm_and_run_callback is None:
return "ERROR: no confirmation handler registered"
result = confirm_and_run_callback(script, base_dir)
if result is None:
return "USER REJECTED: command was not executed"
return result
output = "USER REJECTED: command was not executed"
else:
output = result
if tool_log_callback is not None:
tool_log_callback(script, output)
return output
# ------------------------------------------------------------------ gemini
@@ -321,7 +316,6 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str:
response = _gemini_chat.send_message(full_message)
for round_idx in range(MAX_TOOL_ROUNDS):
# Log the raw response candidates as text summary
text_parts_raw = [
part.text
for candidate in response.candidates
@@ -383,6 +377,32 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str:
raise _classify_gemini_error(exc) from exc
# ------------------------------------------------------------------ anthropic
#
# Caching strategy (Anthropic prompt caching):
#
# The Anthropic API caches a contiguous prefix of the input. To maximise
# cache hits we structure every request as follows:
#
# system (array form):
# [0] _ANTHROPIC_SYSTEM text <- cache_control: ephemeral
# Stable across the whole session; cached after the first request.
#
# tools:
# Last tool has cache_control: ephemeral.
# Stable across the whole session; cached together with the system prompt.
#
# messages[0] (first user turn ever, or re-sent each call):
# content[0]: <context> block <- cache_control: ephemeral
# The aggregated markdown. Changes only when the user regenerates.
# A new cache entry is created when it changes; otherwise it's a hit.
# content[1]: user question <- no cache_control (varies every turn)
#
# Subsequent turns (tool results, follow-up questions) are appended to
# _anthropic_history normally without extra cache markers.
#
# Token cost of cache creation is ~25 % more than a normal input token, but
# cache reads cost ~10 % of a normal input token, so steady-state (many
# rounds / sends per session) is much cheaper.
def _ensure_anthropic_client():
global _anthropic_client
@@ -391,6 +411,7 @@ def _ensure_anthropic_client():
creds = _load_credentials()
_anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"])
def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
global _anthropic_history
import anthropic
@@ -398,19 +419,40 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
try:
_ensure_anthropic_client()
full_message = f"<context>\n{md_content}\n</context>\n\n{user_message}"
_anthropic_history.append({"role": "user", "content": full_message})
# Build the user content: context block (cached) + question (not cached).
# The cache anchor is placed on the context block so the entire prefix
# (system + tools + context) is eligible for caching.
user_content = [
{
"type": "text",
"text": f"<context>\n{md_content}\n</context>",
"cache_control": {"type": "ephemeral"},
},
{
"type": "text",
"text": user_message,
},
]
_anthropic_history.append({"role": "user", "content": user_content})
_append_comms("OUT", "request", {
"message": full_message,
"message": f"<context>\n{md_content}\n</context>\n\n{user_message}",
})
for round_idx in range(MAX_TOOL_ROUNDS):
response = _anthropic_client.messages.create(
model=_model,
max_tokens=8096,
system=[
{
"type": "text",
"text": _ANTHROPIC_SYSTEM,
"cache_control": {"type": "ephemeral"},
}
],
tools=_ANTHROPIC_TOOLS,
messages=_anthropic_history
messages=_anthropic_history,
)
_anthropic_history.append({
@@ -418,22 +460,31 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
"content": response.content
})
# Summarise the response content for the log
text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text]
tool_use_blocks = [
{"id": b.id, "name": b.name, "input": b.input}
for b in response.content
if b.type == "tool_use"
]
# Collect usage; cache fields are present when caching is active
usage_dict: dict = {}
if response.usage:
usage_dict["input_tokens"] = response.usage.input_tokens
usage_dict["output_tokens"] = response.usage.output_tokens
cache_creation = getattr(response.usage, "cache_creation_input_tokens", None)
cache_read = getattr(response.usage, "cache_read_input_tokens", None)
if cache_creation is not None:
usage_dict["cache_creation_input_tokens"] = cache_creation
if cache_read is not None:
usage_dict["cache_read_input_tokens"] = cache_read
_append_comms("IN", "response", {
"round": round_idx,
"stop_reason": response.stop_reason,
"text": "\n".join(text_blocks),
"tool_calls": tool_use_blocks,
"usage": {
"input_tokens": response.usage.input_tokens,
"output_tokens": response.usage.output_tokens,
} if response.usage else {},
"usage": usage_dict,
})
if response.stop_reason != "tool_use":
@@ -455,21 +506,24 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
"output": output,
})
tool_results.append({
"type": "tool_result",
"type": "tool_result",
"tool_use_id": block.id,
"content": output
"content": output,
})
if not tool_results:
break
_anthropic_history.append({
"role": "user",
"content": tool_results
"role": "user",
"content": tool_results,
})
_append_comms("OUT", "tool_result_send", {
"results": [{"tool_use_id": r["tool_use_id"], "content": r["content"]} for r in tool_results],
"results": [
{"tool_use_id": r["tool_use_id"], "content": r["content"]}
for r in tool_results
],
})
text_parts = [