From da8df7a39329f36ff94ae2d21973469fe2d5c333 Mon Sep 17 00:00:00 2001
From: Ed_ <edwardgz@gmail.com>
Date: Sun, 22 Feb 2026 10:34:53 -0500
Subject: [PATCH] add history truncation

---
 ai_client.py     | 42 ++++++++++++++++++++++++++++++++++++++++--
 config.toml      |  1 +
 gui.py           | 14 +++++++++-----
 manual_slop.toml |  4 ++--
 4 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/ai_client.py b/ai_client.py
index 509ee7c..74570ab 100644
--- a/ai_client.py
+++ b/ai_client.py
@@ -23,10 +23,13 @@ _model: str = "gemini-2.5-flash"
 _temperature: float = 0.0
 _max_tokens: int = 8192
 
-def set_model_params(temp: float, max_tok: int):
-    global _temperature, _max_tokens
+_history_trunc_limit: int = 8000
+
+def set_model_params(temp: float, max_tok: int, trunc_limit: int = 8000):
+    global _temperature, _max_tokens, _history_trunc_limit
     _temperature = temp
     _max_tokens = max_tok
+    _history_trunc_limit = trunc_limit
 
 _gemini_client = None
 _gemini_chat = None
@@ -201,6 +204,16 @@ def set_provider(provider: str, model: str):
     _model = model
 
 
+
+def cleanup():
+    """Called on application exit to prevent orphaned caches from billing."""
+    global _gemini_client, _gemini_cache
+    if _gemini_client and _gemini_cache:
+        try:
+            _gemini_client.caches.delete(name=_gemini_cache.name)
+        except Exception:
+            pass
+
 def reset_session():
     global _gemini_client, _gemini_chat, _gemini_cache
     global _anthropic_client, _anthropic_history
@@ -487,6 +500,22 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
             _gemini_chat = _gemini_client.chats.create(**kwargs)
             _gemini_chat._last_md_hash = current_md_hash
         
+        # COMPRESS HISTORY: Truncate massive tool outputs from previous turns to stop token leaks
+        if _gemini_chat and getattr(_gemini_chat, "history", None):
+            for msg in _gemini_chat.history:
+                if msg.role == "user" and hasattr(msg, "parts"):
+                    for p in msg.parts:
+                        if hasattr(p, "function_response") and p.function_response and hasattr(p.function_response, "response"):
+                            r = p.function_response.response
+                            if isinstance(r, dict) and "output" in r:
+                                val = r["output"]
+                                if isinstance(val, str):
+                                    if "[SYSTEM: FILES UPDATED]" in val:
+                                        val = val.split("[SYSTEM: FILES UPDATED]")[0].strip()
+                                    if _history_trunc_limit > 0 and len(val) > _history_trunc_limit:
+                                        val = val[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS. Original output was too large.]"
+                                    r["output"] = val
+
         _append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
         payload, all_text = user_message, []
         
@@ -760,6 +789,15 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
 
         user_content = [{"type": "text", "text": user_message}]
 
+        # COMPRESS HISTORY: Truncate massive tool outputs from previous turns
+        for msg in _anthropic_history:
+            if msg.get("role") == "user" and isinstance(msg.get("content"), list):
+                for block in msg["content"]:
+                    if isinstance(block, dict) and block.get("type") == "tool_result":
+                        t_content = block.get("content", "")
+                        if _history_trunc_limit > 0 and isinstance(t_content, str) and len(t_content) > _history_trunc_limit:
+                            block["content"] = t_content[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS. Original output was too large.]"
+
         _strip_cache_controls(_anthropic_history)
         _repair_anthropic_history(_anthropic_history)
         _anthropic_history.append({"role": "user", "content": user_content})
diff --git a/config.toml b/config.toml
index 37c1d7c..d1ee746 100644
--- a/config.toml
+++ b/config.toml
@@ -3,6 +3,7 @@ provider = "gemini"
 model = "gemini-3.1-pro-preview"
 temperature = 0.6000000238418579
 max_tokens = 12000
+history_trunc_limit = 8000
 system_prompt = "DO NOT EVER make a shell script unless told to. DO NOT EVER make a readme or a file describing your changes unless your are told to. If you have commands I should be entering into the command line or if you have something to explain to me, please just use code blocks or normal text output. DO NOT DO ANYTHING OTHER THAN WHAT YOU WERE TOLD TODO. DO NOT EVER, EVER DO ANYTHING OTHER THAN WHAT YOU WERE TOLD TO DO. IF YOU WANT TO DO OTHER THINGS, SIMPLY SUGGEST THEM, AND THEN I WILL REVIEW YOUR CHANGES, AND MAKE THE DECISION ON HOW TO PROCEED. WHEN WRITING SCRIPTS USE A 120-160 character limit per line. I don't want to see scrunched code.\n"
 
 [theme]
diff --git a/gui.py b/gui.py
index 4da3671..6f17c85 100644
--- a/gui.py
+++ b/gui.py
@@ -3,10 +3,9 @@
 Note(Gemini):
 The main DearPyGui interface orchestrator.
 This is not a simple UI wrapper; it's a complex state machine that:
-1. Handles multi-viewport docking (allowing panels to act as OS windows).
-2. Manages background daemon threads for AI requests so the UI doesn't block.
-3. Implements lock-protected comms queues for safe main-thread rendering.
-4. Pauses AI execution to prompt the human for destructive PowerShell script approval.
+1. Manages background daemon threads for AI requests so the UI doesn't block.
+2. Implements lock-protected comms queues for safe main-thread rendering.
+3. Pauses AI execution to prompt the human for destructive PowerShell script approval.
 """
 # gui.py
 import dearpygui.dearpygui as dpg
@@ -377,6 +376,7 @@ class App:
         self.current_model: str = ai_cfg.get("model", "gemini-2.5-flash")
         self.temperature: float = ai_cfg.get("temperature", 0.0)
         self.max_tokens: int = ai_cfg.get("max_tokens", 8192)
+        self.history_trunc_limit: int = ai_cfg.get("history_trunc_limit", 8000)
         self.available_models: list[str] = []
 
         # ---- project management ----
@@ -845,6 +845,7 @@ class App:
             "model":    self.current_model,
             "temperature": dpg.get_value("ai_temperature") if dpg.does_item_exist("ai_temperature") else self.temperature,
             "max_tokens": dpg.get_value("ai_max_tokens") if dpg.does_item_exist("ai_max_tokens") else self.max_tokens,
+            "history_trunc_limit": dpg.get_value("ai_history_trunc") if dpg.does_item_exist("ai_history_trunc") else self.history_trunc_limit,
         }
         if dpg.does_item_exist("global_system_prompt"):
             self.config["ai"]["system_prompt"] = dpg.get_value("global_system_prompt")
@@ -1153,7 +1154,8 @@ class App:
         ai_client.set_custom_system_prompt("\n\n".join(combined_sp))
         temp = dpg.get_value("ai_temperature") if dpg.does_item_exist("ai_temperature") else 0.0
         max_tok = dpg.get_value("ai_max_tokens") if dpg.does_item_exist("ai_max_tokens") else 8192
-        ai_client.set_model_params(temp, max_tok)
+        trunc = dpg.get_value("ai_history_trunc") if dpg.does_item_exist("ai_history_trunc") else 8000
+        ai_client.set_model_params(temp, max_tok, trunc)
 
         def do_send():
             auto_add = dpg.get_value("auto_add_history") if dpg.does_item_exist("auto_add_history") else False
@@ -1785,6 +1787,7 @@ class App:
             dpg.add_text("Parameters")
             dpg.add_input_float(tag="ai_temperature", label="Temperature", default_value=self.temperature, min_value=0.0, max_value=2.0)
             dpg.add_input_int(tag="ai_max_tokens", label="Max Tokens (Output)", default_value=self.max_tokens, step=1024)
+            dpg.add_input_int(tag="ai_history_trunc", label="History Truncation Limit", default_value=self.history_trunc_limit, step=1024)
 
         # ---- Message panel ----
         with dpg.window(
@@ -2106,6 +2109,7 @@ class App:
 
         dpg.save_init_file("dpg_layout.ini")
         session_logger.close_session()
+        ai_client.cleanup()  # Destroy active API caches to stop billing
         dpg.destroy_context()
 
 
diff --git a/manual_slop.toml b/manual_slop.toml
index 87f8746..3036cee 100644
--- a/manual_slop.toml
+++ b/manual_slop.toml
@@ -146,8 +146,8 @@ history = [
 ]
 
 [discussion.discussions."docs writeup"]
-git_commit = ""
-last_updated = "2026-02-22T10:16:30"
+git_commit = "bf2d09f3fd817d64fbf6b4aa667e2b635b6fbc0e"
+last_updated = "2026-02-22T10:34:24"
 history = [
     "@2026-02-22T08:56:39\nUser:\nLets write extensive documentation in the same style that I used for my VEFontCache-Oodin project.\nI added it's directories to your context.",
     "@2026-02-22T08:56:58\nAI:\n(No text returned)",