Merge origin/cache
This commit is contained in:
70
ai_client.py
70
ai_client.py
@@ -17,7 +17,9 @@ import time
|
||||
import datetime
|
||||
import hashlib
|
||||
import difflib
|
||||
import threading
|
||||
from pathlib import Path
|
||||
import os
|
||||
import file_cache
|
||||
import mcp_client
|
||||
import anthropic
|
||||
@@ -53,6 +55,8 @@ _GEMINI_CACHE_TTL = 3600
|
||||
|
||||
_anthropic_client = None
|
||||
_anthropic_history: list[dict] = []
|
||||
_anthropic_history_lock = threading.Lock()
|
||||
_send_lock = threading.Lock()
|
||||
|
||||
# Injected by gui.py - called when AI wants to run a command.
|
||||
# Signature: (script: str, base_dir: str) -> str | None
|
||||
@@ -69,6 +73,10 @@ tool_log_callback = None
|
||||
# Increased to allow thorough code exploration before forcing a summary
|
||||
MAX_TOOL_ROUNDS = 10
|
||||
|
||||
# Maximum cumulative bytes of tool output allowed per send() call.
|
||||
# Prevents unbounded memory growth during long tool-calling loops.
|
||||
_MAX_TOOL_OUTPUT_BYTES = 500_000
|
||||
|
||||
# Maximum characters per text chunk sent to Anthropic.
|
||||
# Kept well under the ~200k token API limit.
|
||||
_ANTHROPIC_CHUNK_SIZE = 120_000
|
||||
@@ -130,8 +138,18 @@ def clear_comms_log():
|
||||
|
||||
|
||||
def _load_credentials() -> dict:
|
||||
with open("credentials.toml", "rb") as f:
|
||||
return tomllib.load(f)
|
||||
cred_path = os.environ.get("SLOP_CREDENTIALS", "credentials.toml")
|
||||
try:
|
||||
with open(cred_path, "rb") as f:
|
||||
return tomllib.load(f)
|
||||
except FileNotFoundError:
|
||||
raise FileNotFoundError(
|
||||
f"Credentials file not found: {cred_path}\n"
|
||||
f"Create a credentials.toml with:\n"
|
||||
f" [gemini]\n api_key = \"your-key\"\n"
|
||||
f" [anthropic]\n api_key = \"your-key\"\n"
|
||||
f"Or set SLOP_CREDENTIALS env var to a custom path."
|
||||
)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ provider errors
|
||||
@@ -246,7 +264,8 @@ def reset_session():
|
||||
_gemini_cache_md_hash = None
|
||||
_gemini_cache_created_at = None
|
||||
_anthropic_client = None
|
||||
_anthropic_history = []
|
||||
with _anthropic_history_lock:
|
||||
_anthropic_history = []
|
||||
_CACHED_ANTHROPIC_TOOLS = None
|
||||
file_cache.reset_client()
|
||||
|
||||
@@ -652,6 +671,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
|
||||
|
||||
_append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
|
||||
payload, all_text = user_message, []
|
||||
_cumulative_tool_bytes = 0
|
||||
|
||||
# Strip stale file refreshes and truncate old tool outputs ONCE before
|
||||
# entering the tool loop (not per-round — history entries don't change).
|
||||
@@ -701,11 +721,11 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
|
||||
if not hist: break
|
||||
for p in hist[0].parts:
|
||||
if hasattr(p, "text") and p.text:
|
||||
saved += len(p.text) // 4
|
||||
saved += int(len(p.text) / _CHARS_PER_TOKEN)
|
||||
elif hasattr(p, "function_response") and p.function_response:
|
||||
r = getattr(p.function_response, "response", {})
|
||||
if isinstance(r, dict):
|
||||
saved += len(str(r.get("output", ""))) // 4
|
||||
saved += int(len(str(r.get("output", ""))) / _CHARS_PER_TOKEN)
|
||||
hist.pop(0)
|
||||
dropped += 1
|
||||
total_in -= max(saved, 200)
|
||||
@@ -736,10 +756,17 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
|
||||
if r_idx == MAX_TOOL_ROUNDS: out += "\n\n[SYSTEM: MAX ROUNDS. PROVIDE FINAL ANSWER.]"
|
||||
|
||||
out = _truncate_tool_output(out)
|
||||
_cumulative_tool_bytes += len(out)
|
||||
f_resps.append(types.Part.from_function_response(name=name, response={"output": out}))
|
||||
log.append({"tool_use_id": name, "content": out})
|
||||
events.emit("tool_execution", payload={"status": "completed", "tool": name, "result": out, "round": r_idx})
|
||||
|
||||
|
||||
if _cumulative_tool_bytes > _MAX_TOOL_OUTPUT_BYTES:
|
||||
f_resps.append(types.Part.from_text(
|
||||
f"SYSTEM WARNING: Cumulative tool output exceeded {_MAX_TOOL_OUTPUT_BYTES // 1000}KB budget. Provide your final answer now."
|
||||
))
|
||||
_append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"})
|
||||
|
||||
_append_comms("OUT", "tool_result_send", {"results": log})
|
||||
payload = f_resps
|
||||
|
||||
@@ -1046,6 +1073,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
|
||||
})
|
||||
|
||||
all_text_parts = []
|
||||
_cumulative_tool_bytes = 0
|
||||
|
||||
# We allow MAX_TOOL_ROUNDS, plus 1 final loop to get the text synthesis
|
||||
for round_idx in range(MAX_TOOL_ROUNDS + 2):
|
||||
@@ -1132,10 +1160,12 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
|
||||
_append_comms("OUT", "tool_call", {"name": b_name, "id": b_id, "args": b_input})
|
||||
output = mcp_client.dispatch(b_name, b_input)
|
||||
_append_comms("IN", "tool_result", {"name": b_name, "id": b_id, "output": output})
|
||||
truncated = _truncate_tool_output(output)
|
||||
_cumulative_tool_bytes += len(truncated)
|
||||
tool_results.append({
|
||||
"type": "tool_result",
|
||||
"tool_use_id": b_id,
|
||||
"content": _truncate_tool_output(output),
|
||||
"content": truncated,
|
||||
})
|
||||
events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})
|
||||
elif b_name == TOOL_NAME:
|
||||
@@ -1151,13 +1181,22 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
|
||||
"id": b_id,
|
||||
"output": output,
|
||||
})
|
||||
truncated = _truncate_tool_output(output)
|
||||
_cumulative_tool_bytes += len(truncated)
|
||||
tool_results.append({
|
||||
"type": "tool_result",
|
||||
"tool_use_id": b_id,
|
||||
"content": _truncate_tool_output(output),
|
||||
"content": truncated,
|
||||
})
|
||||
events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})
|
||||
|
||||
if _cumulative_tool_bytes > _MAX_TOOL_OUTPUT_BYTES:
|
||||
tool_results.append({
|
||||
"type": "text",
|
||||
"text": f"SYSTEM WARNING: Cumulative tool output exceeded {_MAX_TOOL_OUTPUT_BYTES // 1000}KB budget. Provide your final answer now."
|
||||
})
|
||||
_append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"})
|
||||
|
||||
# Refresh file context after tool calls — only inject CHANGED files
|
||||
if file_items:
|
||||
file_items, changed = _reread_file_items(file_items)
|
||||
@@ -1220,11 +1259,12 @@ def send(
|
||||
discussion_history : discussion history text (used by Gemini to inject as
|
||||
conversation message instead of caching it)
|
||||
"""
|
||||
if _provider == "gemini":
|
||||
return _send_gemini(md_content, user_message, base_dir, file_items, discussion_history)
|
||||
elif _provider == "anthropic":
|
||||
return _send_anthropic(md_content, user_message, base_dir, file_items, discussion_history)
|
||||
raise ValueError(f"unknown provider: {_provider}")
|
||||
with _send_lock:
|
||||
if _provider == "gemini":
|
||||
return _send_gemini(md_content, user_message, base_dir, file_items, discussion_history)
|
||||
elif _provider == "anthropic":
|
||||
return _send_anthropic(md_content, user_message, base_dir, file_items, discussion_history)
|
||||
raise ValueError(f"unknown provider: {_provider}")
|
||||
|
||||
def get_history_bleed_stats() -> dict:
|
||||
"""
|
||||
@@ -1232,7 +1272,9 @@ def get_history_bleed_stats() -> dict:
|
||||
"""
|
||||
if _provider == "anthropic":
|
||||
# For Anthropic, we have a robust estimator
|
||||
current_tokens = _estimate_prompt_tokens([], _anthropic_history)
|
||||
with _anthropic_history_lock:
|
||||
history_snapshot = list(_anthropic_history)
|
||||
current_tokens = _estimate_prompt_tokens([], history_snapshot)
|
||||
limit_tokens = _ANTHROPIC_MAX_PROMPT_TOKENS
|
||||
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user