Merge origin/cache

2026-02-23 22:03:06 -05:00
parent 1d8626bc6b bd8551d282
commit 2ab3f101d6
4 changed files with 113 additions and 28 deletions
--- a/ai_client.py
+++ b/ai_client.py
@@ -17,7 +17,9 @@ import time
 import datetime
 import hashlib
 import difflib
+import threading
 from pathlib import Path
+import os
 import file_cache
 import mcp_client
 import anthropic
@@ -53,6 +55,8 @@ _GEMINI_CACHE_TTL = 3600

 _anthropic_client = None
 _anthropic_history: list[dict] = []
+_anthropic_history_lock = threading.Lock()
+_send_lock = threading.Lock()

 # Injected by gui.py - called when AI wants to run a command.
 # Signature: (script: str, base_dir: str) -> str | None
@@ -69,6 +73,10 @@ tool_log_callback = None
 # Increased to allow thorough code exploration before forcing a summary
 MAX_TOOL_ROUNDS = 10

+# Maximum cumulative bytes of tool output allowed per send() call.
+# Prevents unbounded memory growth during long tool-calling loops.
+_MAX_TOOL_OUTPUT_BYTES = 500_000
+
 # Maximum characters per text chunk sent to Anthropic.
 # Kept well under the ~200k token API limit.
 _ANTHROPIC_CHUNK_SIZE = 120_000
@@ -130,8 +138,18 @@ def clear_comms_log():


 def _load_credentials() -> dict:
-    with open("credentials.toml", "rb") as f:
-        return tomllib.load(f)
+    cred_path = os.environ.get("SLOP_CREDENTIALS", "credentials.toml")
+    try:
+        with open(cred_path, "rb") as f:
+            return tomllib.load(f)
+    except FileNotFoundError:
+        raise FileNotFoundError(
+            f"Credentials file not found: {cred_path}\n"
+            f"Create a credentials.toml with:\n"
+            f"  [gemini]\n  api_key = \"your-key\"\n"
+            f"  [anthropic]\n  api_key = \"your-key\"\n"
+            f"Or set SLOP_CREDENTIALS env var to a custom path."
+        )


 # ------------------------------------------------------------------ provider errors
@@ -246,7 +264,8 @@ def reset_session():
    _gemini_cache_md_hash = None
    _gemini_cache_created_at = None
    _anthropic_client = None
-    _anthropic_history = []
+    with _anthropic_history_lock:
+        _anthropic_history = []
    _CACHED_ANTHROPIC_TOOLS = None
    file_cache.reset_client()

@@ -652,6 +671,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,

        _append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
        payload, all_text = user_message, []
+        _cumulative_tool_bytes = 0

        # Strip stale file refreshes and truncate old tool outputs ONCE before
        # entering the tool loop (not per-round — history entries don't change).
@@ -701,11 +721,11 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
                        if not hist: break
                        for p in hist[0].parts:
                            if hasattr(p, "text") and p.text:
-                                saved += len(p.text) // 4
+                                saved += int(len(p.text) / _CHARS_PER_TOKEN)
                            elif hasattr(p, "function_response") and p.function_response:
                                r = getattr(p.function_response, "response", {})
                                if isinstance(r, dict):
-                                    saved += len(str(r.get("output", ""))) // 4
+                                    saved += int(len(str(r.get("output", ""))) / _CHARS_PER_TOKEN)
                        hist.pop(0)
                        dropped += 1
                    total_in -= max(saved, 200)
@@ -736,10 +756,17 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
                    if r_idx == MAX_TOOL_ROUNDS: out += "\n\n[SYSTEM: MAX ROUNDS. PROVIDE FINAL ANSWER.]"

                out = _truncate_tool_output(out)
+                _cumulative_tool_bytes += len(out)
                f_resps.append(types.Part.from_function_response(name=name, response={"output": out}))
                log.append({"tool_use_id": name, "content": out})
                events.emit("tool_execution", payload={"status": "completed", "tool": name, "result": out, "round": r_idx})
-            
+
+            if _cumulative_tool_bytes > _MAX_TOOL_OUTPUT_BYTES:
+                f_resps.append(types.Part.from_text(
+                    f"SYSTEM WARNING: Cumulative tool output exceeded {_MAX_TOOL_OUTPUT_BYTES // 1000}KB budget. Provide your final answer now."
+                ))
+                _append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"})
+
            _append_comms("OUT", "tool_result_send", {"results": log})
            payload = f_resps
            
@@ -1046,6 +1073,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
        })

        all_text_parts = []
+        _cumulative_tool_bytes = 0

        # We allow MAX_TOOL_ROUNDS, plus 1 final loop to get the text synthesis
        for round_idx in range(MAX_TOOL_ROUNDS + 2):
@@ -1132,10 +1160,12 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
                    _append_comms("OUT", "tool_call", {"name": b_name, "id": b_id, "args": b_input})
                    output = mcp_client.dispatch(b_name, b_input)
                    _append_comms("IN", "tool_result", {"name": b_name, "id": b_id, "output": output})
+                    truncated = _truncate_tool_output(output)
+                    _cumulative_tool_bytes += len(truncated)
                    tool_results.append({
                        "type":        "tool_result",
                        "tool_use_id": b_id,
-                        "content":     _truncate_tool_output(output),
+                        "content":     truncated,
                    })
                    events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})
                elif b_name == TOOL_NAME:
@@ -1151,13 +1181,22 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
                        "id":     b_id,
                        "output": output,
                    })
+                    truncated = _truncate_tool_output(output)
+                    _cumulative_tool_bytes += len(truncated)
                    tool_results.append({
                        "type":        "tool_result",
                        "tool_use_id": b_id,
-                        "content":     _truncate_tool_output(output),
+                        "content":     truncated,
                    })
                    events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})

+            if _cumulative_tool_bytes > _MAX_TOOL_OUTPUT_BYTES:
+                tool_results.append({
+                    "type": "text",
+                    "text": f"SYSTEM WARNING: Cumulative tool output exceeded {_MAX_TOOL_OUTPUT_BYTES // 1000}KB budget. Provide your final answer now."
+                })
+                _append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"})
+
            # Refresh file context after tool calls — only inject CHANGED files
            if file_items:
                file_items, changed = _reread_file_items(file_items)
@@ -1220,11 +1259,12 @@ def send(
    discussion_history : discussion history text (used by Gemini to inject as
                         conversation message instead of caching it)
    """
-    if _provider == "gemini":
-        return _send_gemini(md_content, user_message, base_dir, file_items, discussion_history)
-    elif _provider == "anthropic":
-        return _send_anthropic(md_content, user_message, base_dir, file_items, discussion_history)
-    raise ValueError(f"unknown provider: {_provider}")
+    with _send_lock:
+        if _provider == "gemini":
+            return _send_gemini(md_content, user_message, base_dir, file_items, discussion_history)
+        elif _provider == "anthropic":
+            return _send_anthropic(md_content, user_message, base_dir, file_items, discussion_history)
+        raise ValueError(f"unknown provider: {_provider}")

 def get_history_bleed_stats() -> dict:
    """
@@ -1232,7 +1272,9 @@ def get_history_bleed_stats() -> dict:
    """
    if _provider == "anthropic":
        # For Anthropic, we have a robust estimator
-        current_tokens = _estimate_prompt_tokens([], _anthropic_history)
+        with _anthropic_history_lock:
+            history_snapshot = list(_anthropic_history)
+        current_tokens = _estimate_prompt_tokens([], history_snapshot)
        limit_tokens = _ANTHROPIC_MAX_PROMPT_TOKENS
        percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
        return {