progress

2026-02-21 16:07:26 -05:00
parent 0258a41c47
commit d2568cd616
4 changed files with 154 additions and 66 deletions
--- a/ai_client.py
+++ b/ai_client.py
@@ -14,7 +14,7 @@ _anthropic_client = None
 _anthropic_history: list[dict] = []

 # Injected by gui.py - called when AI wants to run a command.
-# Signature: (script: str) -> str | None
+# Signature: (script: str, base_dir: str) -> str | None
 # Returns the output string if approved, None if rejected.
 confirm_and_run_callback = None

@@ -22,24 +22,26 @@ confirm_and_run_callback = None
 # Signature: (entry: dict) -> None
 comms_log_callback = None

+# Injected by gui.py - called whenever a tool call completes (after run).
+# Signature: (script: str, result: str) -> None
+tool_log_callback = None
+
 MAX_TOOL_ROUNDS = 5

+# Anthropic system prompt - sent with cache_control so it is cached after the
+# first request and reused on every subsequent call within the TTL window.
+_ANTHROPIC_SYSTEM = (
+    "You are a helpful coding assistant with access to a PowerShell tool. "
+    "When asked to create or edit files, prefer targeted edits over full rewrites. "
+    "Always explain what you are doing before invoking the tool."
+)
+
 # ------------------------------------------------------------------ comms log

 _comms_log: list[dict] = []

 MAX_FIELD_CHARS = 400   # beyond this we show a truncated preview in the UI

-def _clamp(value, max_chars: int = MAX_FIELD_CHARS) -> tuple[str, bool]:
-    """Return (display_str, was_truncated)."""
-    if isinstance(value, (dict, list)):
-        s = json.dumps(value, ensure_ascii=False, indent=2)
-    else:
-        s = str(value)
-    if len(s) > max_chars:
-        return s[:max_chars], True
-    return s, False
-

 def _append_comms(direction: str, kind: str, payload: dict):
    """
@@ -78,15 +80,6 @@ class ProviderError(Exception):
    """
    Raised when the upstream API returns a hard error we want to surface
    distinctly in the UI (quota, rate-limit, auth, balance, etc.).
-
-    Attributes
-    ----------
-    kind : str
-        One of: "quota", "rate_limit", "auth", "balance", "network", "unknown"
-    provider : str
-        "gemini" or "anthropic"
-    original : Exception
-        The underlying SDK exception.
    """
    def __init__(self, kind: str, provider: str, original: Exception):
        self.kind = kind
@@ -94,7 +87,6 @@ class ProviderError(Exception):
        self.original = original
        super().__init__(str(original))

-    # Human-readable banner shown in the Response panel
    def ui_message(self) -> str:
        labels = {
            "quota":      "QUOTA EXHAUSTED",
@@ -109,7 +101,6 @@ class ProviderError(Exception):


 def _classify_anthropic_error(exc: Exception) -> ProviderError:
-    """Map an anthropic SDK exception to a ProviderError."""
    try:
        import anthropic
        if isinstance(exc, anthropic.RateLimitError):
@@ -129,7 +120,6 @@ def _classify_anthropic_error(exc: Exception) -> ProviderError:
                return ProviderError("auth", "anthropic", exc)
            if status == 402:
                return ProviderError("balance", "anthropic", exc)
-            # Anthropic puts credit-balance errors in the body at 400
            if "credit" in body or "balance" in body or "billing" in body:
                return ProviderError("balance", "anthropic", exc)
            if "quota" in body or "limit" in body or "exceeded" in body:
@@ -140,10 +130,7 @@ def _classify_anthropic_error(exc: Exception) -> ProviderError:


 def _classify_gemini_error(exc: Exception) -> ProviderError:
-    """Map a google-genai SDK exception to a ProviderError."""
    body = str(exc).lower()
-    # google-genai surfaces HTTP errors as google.api_core exceptions or
-    # google.genai exceptions; inspect the message text as a reliable fallback.
    try:
        from google.api_core import exceptions as gac
        if isinstance(exc, gac.ResourceExhausted):
@@ -156,7 +143,6 @@ def _classify_gemini_error(exc: Exception) -> ProviderError:
            return ProviderError("network", "gemini", exc)
    except ImportError:
        pass
-    # Fallback: parse status code / message string
    if "429" in body or "quota" in body or "resource exhausted" in body:
        return ProviderError("quota", "gemini", exc)
    if "rate" in body and "limit" in body:
@@ -226,6 +212,9 @@ def _list_anthropic_models() -> list[str]:

 TOOL_NAME = "run_powershell"

+# The tool list for Anthropic.  cache_control is placed on the last (only) tool
+# so that the system-prompt + tools prefix is cached together after the first
+# request and served from cache on every subsequent round.
 _ANTHROPIC_TOOLS = [
    {
        "name": TOOL_NAME,
@@ -245,7 +234,8 @@ _ANTHROPIC_TOOLS = [
                }
            },
            "required": ["script"]
-        }
+        },
+        "cache_control": {"type": "ephemeral"},
    }
 ]

@@ -279,13 +269,18 @@ def _run_script(script: str, base_dir: str) -> str:
    """
    Delegate to the GUI confirmation callback.
    Returns result string (stdout/stderr) or a rejection message.
+    Also fires tool_log_callback if registered.
    """
    if confirm_and_run_callback is None:
        return "ERROR: no confirmation handler registered"
    result = confirm_and_run_callback(script, base_dir)
    if result is None:
-        return "USER REJECTED: command was not executed"
-    return result
+        output = "USER REJECTED: command was not executed"
+    else:
+        output = result
+    if tool_log_callback is not None:
+        tool_log_callback(script, output)
+    return output

 # ------------------------------------------------------------------ gemini

@@ -321,7 +316,6 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str:
        response = _gemini_chat.send_message(full_message)

        for round_idx in range(MAX_TOOL_ROUNDS):
-            # Log the raw response candidates as text summary
            text_parts_raw = [
                part.text
                for candidate in response.candidates
@@ -383,6 +377,32 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str:
        raise _classify_gemini_error(exc) from exc

 # ------------------------------------------------------------------ anthropic
+#
+# Caching strategy (Anthropic prompt caching):
+#
+#   The Anthropic API caches a contiguous prefix of the input.  To maximise
+#   cache hits we structure every request as follows:
+#
+#   system (array form):
+#       [0] _ANTHROPIC_SYSTEM text   <- cache_control: ephemeral
+#           Stable across the whole session; cached after the first request.
+#
+#   tools:
+#       Last tool has cache_control: ephemeral.
+#       Stable across the whole session; cached together with the system prompt.
+#
+#   messages[0]  (first user turn ever, or re-sent each call):
+#       content[0]: <context> block  <- cache_control: ephemeral
+#           The aggregated markdown.  Changes only when the user regenerates.
+#           A new cache entry is created when it changes; otherwise it's a hit.
+#       content[1]: user question    <- no cache_control (varies every turn)
+#
+#   Subsequent turns (tool results, follow-up questions) are appended to
+#   _anthropic_history normally without extra cache markers.
+#
+#   Token cost of cache creation is ~25 % more than a normal input token, but
+#   cache reads cost ~10 % of a normal input token, so steady-state (many
+#   rounds / sends per session) is much cheaper.

 def _ensure_anthropic_client():
    global _anthropic_client
@@ -391,6 +411,7 @@ def _ensure_anthropic_client():
        creds = _load_credentials()
        _anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"])

+
 def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
    global _anthropic_history
    import anthropic
@@ -398,19 +419,40 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
    try:
        _ensure_anthropic_client()

-        full_message = f"<context>\n{md_content}\n</context>\n\n{user_message}"
-        _anthropic_history.append({"role": "user", "content": full_message})
+        # Build the user content: context block (cached) + question (not cached).
+        # The cache anchor is placed on the context block so the entire prefix
+        # (system + tools + context) is eligible for caching.
+        user_content = [
+            {
+                "type": "text",
+                "text": f"<context>\n{md_content}\n</context>",
+                "cache_control": {"type": "ephemeral"},
+            },
+            {
+                "type": "text",
+                "text": user_message,
+            },
+        ]
+
+        _anthropic_history.append({"role": "user", "content": user_content})

        _append_comms("OUT", "request", {
-            "message": full_message,
+            "message": f"<context>\n{md_content}\n</context>\n\n{user_message}",
        })

        for round_idx in range(MAX_TOOL_ROUNDS):
            response = _anthropic_client.messages.create(
                model=_model,
                max_tokens=8096,
+                system=[
+                    {
+                        "type": "text",
+                        "text": _ANTHROPIC_SYSTEM,
+                        "cache_control": {"type": "ephemeral"},
+                    }
+                ],
                tools=_ANTHROPIC_TOOLS,
-                messages=_anthropic_history
+                messages=_anthropic_history,
            )

            _anthropic_history.append({
@@ -418,22 +460,31 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
                "content": response.content
            })

-            # Summarise the response content for the log
            text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text]
            tool_use_blocks = [
                {"id": b.id, "name": b.name, "input": b.input}
                for b in response.content
                if b.type == "tool_use"
            ]
+
+            # Collect usage; cache fields are present when caching is active
+            usage_dict: dict = {}
+            if response.usage:
+                usage_dict["input_tokens"]  = response.usage.input_tokens
+                usage_dict["output_tokens"] = response.usage.output_tokens
+                cache_creation = getattr(response.usage, "cache_creation_input_tokens", None)
+                cache_read     = getattr(response.usage, "cache_read_input_tokens",     None)
+                if cache_creation is not None:
+                    usage_dict["cache_creation_input_tokens"] = cache_creation
+                if cache_read is not None:
+                    usage_dict["cache_read_input_tokens"] = cache_read
+
            _append_comms("IN", "response", {
                "round":       round_idx,
                "stop_reason": response.stop_reason,
                "text":        "\n".join(text_blocks),
                "tool_calls":  tool_use_blocks,
-                "usage":       {
-                    "input_tokens":  response.usage.input_tokens,
-                    "output_tokens": response.usage.output_tokens,
-                } if response.usage else {},
+                "usage":       usage_dict,
            })

            if response.stop_reason != "tool_use":
@@ -455,21 +506,24 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
                        "output": output,
                    })
                    tool_results.append({
-                        "type": "tool_result",
+                        "type":        "tool_result",
                        "tool_use_id": block.id,
-                        "content": output
+                        "content":     output,
                    })

            if not tool_results:
                break

            _anthropic_history.append({
-                "role": "user",
-                "content": tool_results
+                "role":    "user",
+                "content": tool_results,
            })

            _append_comms("OUT", "tool_result_send", {
-                "results": [{"tool_use_id": r["tool_use_id"], "content": r["content"]} for r in tool_results],
+                "results": [
+                    {"tool_use_id": r["tool_use_id"], "content": r["content"]}
+                    for r in tool_results
+                ],
            })

        text_parts = [