add history truncation
This commit is contained in:
42
ai_client.py
42
ai_client.py
@@ -23,10 +23,13 @@ _model: str = "gemini-2.5-flash"
|
||||
_temperature: float = 0.0
|
||||
_max_tokens: int = 8192
|
||||
|
||||
def set_model_params(temp: float, max_tok: int):
|
||||
global _temperature, _max_tokens
|
||||
_history_trunc_limit: int = 8000
|
||||
|
||||
def set_model_params(temp: float, max_tok: int, trunc_limit: int = 8000):
|
||||
global _temperature, _max_tokens, _history_trunc_limit
|
||||
_temperature = temp
|
||||
_max_tokens = max_tok
|
||||
_history_trunc_limit = trunc_limit
|
||||
|
||||
_gemini_client = None
|
||||
_gemini_chat = None
|
||||
@@ -201,6 +204,16 @@ def set_provider(provider: str, model: str):
|
||||
_model = model
|
||||
|
||||
|
||||
|
||||
def cleanup():
|
||||
"""Called on application exit to prevent orphaned caches from billing."""
|
||||
global _gemini_client, _gemini_cache
|
||||
if _gemini_client and _gemini_cache:
|
||||
try:
|
||||
_gemini_client.caches.delete(name=_gemini_cache.name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def reset_session():
|
||||
global _gemini_client, _gemini_chat, _gemini_cache
|
||||
global _anthropic_client, _anthropic_history
|
||||
@@ -487,6 +500,22 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
|
||||
_gemini_chat = _gemini_client.chats.create(**kwargs)
|
||||
_gemini_chat._last_md_hash = current_md_hash
|
||||
|
||||
# COMPRESS HISTORY: Truncate massive tool outputs from previous turns to stop token leaks
|
||||
if _gemini_chat and getattr(_gemini_chat, "history", None):
|
||||
for msg in _gemini_chat.history:
|
||||
if msg.role == "user" and hasattr(msg, "parts"):
|
||||
for p in msg.parts:
|
||||
if hasattr(p, "function_response") and p.function_response and hasattr(p.function_response, "response"):
|
||||
r = p.function_response.response
|
||||
if isinstance(r, dict) and "output" in r:
|
||||
val = r["output"]
|
||||
if isinstance(val, str):
|
||||
if "[SYSTEM: FILES UPDATED]" in val:
|
||||
val = val.split("[SYSTEM: FILES UPDATED]")[0].strip()
|
||||
if _history_trunc_limit > 0 and len(val) > _history_trunc_limit:
|
||||
val = val[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS. Original output was too large.]"
|
||||
r["output"] = val
|
||||
|
||||
_append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
|
||||
payload, all_text = user_message, []
|
||||
|
||||
@@ -760,6 +789,15 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
|
||||
|
||||
user_content = [{"type": "text", "text": user_message}]
|
||||
|
||||
# COMPRESS HISTORY: Truncate massive tool outputs from previous turns
|
||||
for msg in _anthropic_history:
|
||||
if msg.get("role") == "user" and isinstance(msg.get("content"), list):
|
||||
for block in msg["content"]:
|
||||
if isinstance(block, dict) and block.get("type") == "tool_result":
|
||||
t_content = block.get("content", "")
|
||||
if _history_trunc_limit > 0 and isinstance(t_content, str) and len(t_content) > _history_trunc_limit:
|
||||
block["content"] = t_content[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS. Original output was too large.]"
|
||||
|
||||
_strip_cache_controls(_anthropic_history)
|
||||
_repair_anthropic_history(_anthropic_history)
|
||||
_anthropic_history.append({"role": "user", "content": user_content})
|
||||
|
||||
Reference in New Issue
Block a user