add history truncation

This commit is contained in:
2026-02-22 10:34:53 -05:00
parent bf2d09f3fd
commit da8df7a393
4 changed files with 52 additions and 9 deletions

View File

@@ -23,10 +23,13 @@ _model: str = "gemini-2.5-flash"
_temperature: float = 0.0
_max_tokens: int = 8192
def set_model_params(temp: float, max_tok: int):
global _temperature, _max_tokens
_history_trunc_limit: int = 8000
def set_model_params(temp: float, max_tok: int, trunc_limit: int = 8000):
global _temperature, _max_tokens, _history_trunc_limit
_temperature = temp
_max_tokens = max_tok
_history_trunc_limit = trunc_limit
_gemini_client = None
_gemini_chat = None
@@ -201,6 +204,16 @@ def set_provider(provider: str, model: str):
_model = model
def cleanup():
"""Called on application exit to prevent orphaned caches from billing."""
global _gemini_client, _gemini_cache
if _gemini_client and _gemini_cache:
try:
_gemini_client.caches.delete(name=_gemini_cache.name)
except Exception:
pass
def reset_session():
global _gemini_client, _gemini_chat, _gemini_cache
global _anthropic_client, _anthropic_history
@@ -487,6 +500,22 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
_gemini_chat = _gemini_client.chats.create(**kwargs)
_gemini_chat._last_md_hash = current_md_hash
# COMPRESS HISTORY: Truncate massive tool outputs from previous turns to stop token leaks
if _gemini_chat and getattr(_gemini_chat, "history", None):
for msg in _gemini_chat.history:
if msg.role == "user" and hasattr(msg, "parts"):
for p in msg.parts:
if hasattr(p, "function_response") and p.function_response and hasattr(p.function_response, "response"):
r = p.function_response.response
if isinstance(r, dict) and "output" in r:
val = r["output"]
if isinstance(val, str):
if "[SYSTEM: FILES UPDATED]" in val:
val = val.split("[SYSTEM: FILES UPDATED]")[0].strip()
if _history_trunc_limit > 0 and len(val) > _history_trunc_limit:
val = val[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS. Original output was too large.]"
r["output"] = val
_append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
payload, all_text = user_message, []
@@ -760,6 +789,15 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
user_content = [{"type": "text", "text": user_message}]
# COMPRESS HISTORY: Truncate massive tool outputs from previous turns
for msg in _anthropic_history:
if msg.get("role") == "user" and isinstance(msg.get("content"), list):
for block in msg["content"]:
if isinstance(block, dict) and block.get("type") == "tool_result":
t_content = block.get("content", "")
if _history_trunc_limit > 0 and isinstance(t_content, str) and len(t_content) > _history_trunc_limit:
block["content"] = t_content[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS. Original output was too large.]"
_strip_cache_controls(_anthropic_history)
_repair_anthropic_history(_anthropic_history)
_anthropic_history.append({"role": "user", "content": user_content})