rever client for now bugged

2026-02-21 15:53:56 -05:00
parent ee95007a67
commit 0258a41c47
1 changed files with 18 additions and 111 deletions
--- a/ai_client.py
+++ b/ai_client.py
@@ -14,7 +14,7 @@ _anthropic_client = None
 _anthropic_history: list[dict] = []

 # Injected by gui.py - called when AI wants to run a command.
-# Signature: (script: str, base_dir: str) -> str | None
+# Signature: (script: str) -> str | None
 # Returns the output string if approved, None if rejected.
 confirm_and_run_callback = None

@@ -22,20 +22,8 @@ confirm_and_run_callback = None
 # Signature: (entry: dict) -> None
 comms_log_callback = None

-# Injected by gui.py - called whenever a tool call completes.
-# Signature: (script: str, result: str, script_path: str | None) -> None
-tool_log_callback = None
-
 MAX_TOOL_ROUNDS = 5

-# Anthropic system prompt - cached as the first turn so it counts toward
-# the prompt-cache prefix on every subsequent request.
-_ANTHROPIC_SYSTEM = (
-    "You are a helpful coding assistant with access to a PowerShell tool. "
-    "When asked to create or edit files, prefer targeted edits over full rewrites. "
-    "Always explain what you are doing before invoking the tool."
-)
-
 # ------------------------------------------------------------------ comms log

 _comms_log: list[dict] = []
@@ -154,6 +142,8 @@ def _classify_anthropic_error(exc: Exception) -> ProviderError:
 def _classify_gemini_error(exc: Exception) -> ProviderError:
    """Map a google-genai SDK exception to a ProviderError."""
    body = str(exc).lower()
+    # google-genai surfaces HTTP errors as google.api_core exceptions or
+    # google.genai exceptions; inspect the message text as a reliable fallback.
    try:
        from google.api_core import exceptions as gac
        if isinstance(exc, gac.ResourceExhausted):
@@ -166,6 +156,7 @@ def _classify_gemini_error(exc: Exception) -> ProviderError:
            return ProviderError("network", "gemini", exc)
    except ImportError:
        pass
+    # Fallback: parse status code / message string
    if "429" in body or "quota" in body or "resource exhausted" in body:
        return ProviderError("quota", "gemini", exc)
    if "rate" in body and "limit" in body:
@@ -288,20 +279,12 @@ def _run_script(script: str, base_dir: str) -> str:
    """
    Delegate to the GUI confirmation callback.
    Returns result string (stdout/stderr) or a rejection message.
-    Also fires tool_log_callback if registered.
    """
    if confirm_and_run_callback is None:
        return "ERROR: no confirmation handler registered"
-    # confirm_and_run_callback returns (result, script_path) or None
-    outcome = confirm_and_run_callback(script, base_dir)
-    if outcome is None:
-        result = "USER REJECTED: command was not executed"
-        if tool_log_callback is not None:
-            tool_log_callback(script, result, None)
-        return result
-    result, script_path = outcome
-    if tool_log_callback is not None:
-        tool_log_callback(script, result, script_path)
+    result = confirm_and_run_callback(script, base_dir)
+    if result is None:
+        return "USER REJECTED: command was not executed"
    return result

 # ------------------------------------------------------------------ gemini
@@ -338,6 +321,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str:
        response = _gemini_chat.send_message(full_message)

        for round_idx in range(MAX_TOOL_ROUNDS):
+            # Log the raw response candidates as text summary
            text_parts_raw = [
                part.text
                for candidate in response.candidates
@@ -399,35 +383,6 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str:
        raise _classify_gemini_error(exc) from exc

 # ------------------------------------------------------------------ anthropic
-#
-# Caching strategy (Anthropic prompt caching):
-#
-#   The Anthropic API caches a prefix of the input tokens.  To maximise hits:
-#
-#   1.  A persistent system prompt is sent on every request with
-#       cache_control={"type":"ephemeral"} so it is cached after the first call
-#       and reused on subsequent calls within the 5-minute TTL window.
-#
-#   2.  The context block (aggregated markdown) is placed as the FIRST user
-#       message in the history and also marked with cache_control.  Because the
-#       system prompt and the context are stable across tool-use rounds within a
-#       single send() call, the cache hit rate is very high after round 0.
-#
-#   3.  Tool definitions are passed with cache_control on the last tool so the
-#       entire tools array is also cached.
-#
-#   Token accounting: the response payload contains cache_creation_input_tokens
-#   and cache_read_input_tokens in addition to the regular input_tokens field.
-#   These are included in the comms log under "usage".
-
-def _anthropic_tools_with_cache() -> list[dict]:
-    """Return the tools list with cache_control on the last entry."""
-    import copy
-    tools = copy.deepcopy(_ANTHROPIC_TOOLS)
-    # Mark the last tool so the entire prefix (system + tools) gets cached
-    tools[-1]["cache_control"] = {"type": "ephemeral"}
-    return tools
-

 def _ensure_anthropic_client():
    global _anthropic_client
@@ -436,7 +391,6 @@ def _ensure_anthropic_client():
        creds = _load_credentials()
        _anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"])

-
 def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
    global _anthropic_history
    import anthropic
@@ -444,54 +398,19 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
    try:
        _ensure_anthropic_client()

-        # ----------------------------------------------------------------
-        # Build the user turn.
-        #
-        # Structure the content as two blocks so the large context portion
-        # can be cached independently of the user question:
-        #
-        #   [0] context block  <- cache_control applied here
-        #   [1] user question  <- not cached (changes every turn)
-        #
-        # The Anthropic cache anchors at the LAST cache_control marker in
-        # the prefix, so everything up to and including the context block
-        # will be served from cache on subsequent rounds.
-        # ----------------------------------------------------------------
-        user_content = [
-            {
-                "type": "text",
-                "text": f"<context>\n{md_content}\n</context>",
-                "cache_control": {"type": "ephemeral"},
-            },
-            {
-                "type": "text",
-                "text": user_message,
-            },
-        ]
-
-        _anthropic_history.append({"role": "user", "content": user_content})
+        full_message = f"<context>\n{md_content}\n</context>\n\n{user_message}"
+        _anthropic_history.append({"role": "user", "content": full_message})

        _append_comms("OUT", "request", {
-            "message": f"<context>\n{md_content}\n</context>\n\n{user_message}",
+            "message": full_message,
        })

        for round_idx in range(MAX_TOOL_ROUNDS):
            response = _anthropic_client.messages.create(
                model=_model,
                max_tokens=8096,
-                system=[
-                    {
-                        "type": "text",
-                        "text": _ANTHROPIC_SYSTEM,
-                        "cache_control": {"type": "ephemeral"},
-                    }
-                ],
-                tools=_anthropic_tools_with_cache(),
-                messages=_anthropic_history,
-                # Ask the API to return cache token counts
-                # betas=["prompt-caching-2024-07-31"],
-                # TODO(Claude): betas is not a valid field:
-                # ERROR: Messages.create() got an unexpected keyword argument 'betas'
+                tools=_ANTHROPIC_TOOLS,
+                messages=_anthropic_history
            )

            _anthropic_history.append({
@@ -499,34 +418,22 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
                "content": response.content
            })

+            # Summarise the response content for the log
            text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text]
            tool_use_blocks = [
                {"id": b.id, "name": b.name, "input": b.input}
                for b in response.content
                if b.type == "tool_use"
            ]
-
-            # Extended usage includes cache fields when the beta header is set
-            usage_dict: dict = {}
-            if response.usage:
-                usage_dict = {
-                    "input_tokens":              response.usage.input_tokens,
-                    "output_tokens":             response.usage.output_tokens,
-                }
-                # cache fields are present when the beta is active
-                cache_creation = getattr(response.usage, "cache_creation_input_tokens", None)
-                cache_read     = getattr(response.usage, "cache_read_input_tokens", None)
-                if cache_creation is not None:
-                    usage_dict["cache_creation_input_tokens"] = cache_creation
-                if cache_read is not None:
-                    usage_dict["cache_read_input_tokens"] = cache_read
-
            _append_comms("IN", "response", {
                "round":       round_idx,
                "stop_reason": response.stop_reason,
                "text":        "\n".join(text_blocks),
                "tool_calls":  tool_use_blocks,
-                "usage":       usage_dict,
+                "usage":       {
+                    "input_tokens":  response.usage.input_tokens,
+                    "output_tokens": response.usage.output_tokens,
+                } if response.usage else {},
            })

            if response.stop_reason != "tool_use":