add history truncation

2026-02-22 10:34:53 -05:00
parent bf2d09f3fd
commit da8df7a393
4 changed files with 52 additions and 9 deletions
--- a/ai_client.py
+++ b/ai_client.py
@@ -23,10 +23,13 @@ _model: str = "gemini-2.5-flash"
 _temperature: float = 0.0
 _max_tokens: int = 8192

-def set_model_params(temp: float, max_tok: int):
-    global _temperature, _max_tokens
+_history_trunc_limit: int = 8000
+
+def set_model_params(temp: float, max_tok: int, trunc_limit: int = 8000):
+    global _temperature, _max_tokens, _history_trunc_limit
    _temperature = temp
    _max_tokens = max_tok
+    _history_trunc_limit = trunc_limit

 _gemini_client = None
 _gemini_chat = None
@@ -201,6 +204,16 @@ def set_provider(provider: str, model: str):
    _model = model


+
+def cleanup():
+    """Called on application exit to prevent orphaned caches from billing."""
+    global _gemini_client, _gemini_cache
+    if _gemini_client and _gemini_cache:
+        try:
+            _gemini_client.caches.delete(name=_gemini_cache.name)
+        except Exception:
+            pass
+
 def reset_session():
    global _gemini_client, _gemini_chat, _gemini_cache
    global _anthropic_client, _anthropic_history
@@ -487,6 +500,22 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
            _gemini_chat = _gemini_client.chats.create(**kwargs)
            _gemini_chat._last_md_hash = current_md_hash
        
+        # COMPRESS HISTORY: Truncate massive tool outputs from previous turns to stop token leaks
+        if _gemini_chat and getattr(_gemini_chat, "history", None):
+            for msg in _gemini_chat.history:
+                if msg.role == "user" and hasattr(msg, "parts"):
+                    for p in msg.parts:
+                        if hasattr(p, "function_response") and p.function_response and hasattr(p.function_response, "response"):
+                            r = p.function_response.response
+                            if isinstance(r, dict) and "output" in r:
+                                val = r["output"]
+                                if isinstance(val, str):
+                                    if "[SYSTEM: FILES UPDATED]" in val:
+                                        val = val.split("[SYSTEM: FILES UPDATED]")[0].strip()
+                                    if _history_trunc_limit > 0 and len(val) > _history_trunc_limit:
+                                        val = val[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS. Original output was too large.]"
+                                    r["output"] = val
+
        _append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
        payload, all_text = user_message, []
        
@@ -760,6 +789,15 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item

        user_content = [{"type": "text", "text": user_message}]

+        # COMPRESS HISTORY: Truncate massive tool outputs from previous turns
+        for msg in _anthropic_history:
+            if msg.get("role") == "user" and isinstance(msg.get("content"), list):
+                for block in msg["content"]:
+                    if isinstance(block, dict) and block.get("type") == "tool_result":
+                        t_content = block.get("content", "")
+                        if _history_trunc_limit > 0 and isinstance(t_content, str) and len(t_content) > _history_trunc_limit:
+                            block["content"] = t_content[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS. Original output was too large.]"
+
        _strip_cache_controls(_anthropic_history)
        _repair_anthropic_history(_anthropic_history)
        _anthropic_history.append({"role": "user", "content": user_content})
--- a/config.toml
+++ b/config.toml
@@ -3,6 +3,7 @@ provider = "gemini"
 model = "gemini-3.1-pro-preview"
 temperature = 0.6000000238418579
 max_tokens = 12000
+history_trunc_limit = 8000
 system_prompt = "DO NOT EVER make a shell script unless told to. DO NOT EVER make a readme or a file describing your changes unless your are told to. If you have commands I should be entering into the command line or if you have something to explain to me, please just use code blocks or normal text output. DO NOT DO ANYTHING OTHER THAN WHAT YOU WERE TOLD TODO. DO NOT EVER, EVER DO ANYTHING OTHER THAN WHAT YOU WERE TOLD TO DO. IF YOU WANT TO DO OTHER THINGS, SIMPLY SUGGEST THEM, AND THEN I WILL REVIEW YOUR CHANGES, AND MAKE THE DECISION ON HOW TO PROCEED. WHEN WRITING SCRIPTS USE A 120-160 character limit per line. I don't want to see scrunched code.\n"

 [theme]
--- a/gui.py
+++ b/gui.py
@@ -3,10 +3,9 @@
 Note(Gemini):
 The main DearPyGui interface orchestrator.
 This is not a simple UI wrapper; it's a complex state machine that:
-1. Handles multi-viewport docking (allowing panels to act as OS windows).
-2. Manages background daemon threads for AI requests so the UI doesn't block.
-3. Implements lock-protected comms queues for safe main-thread rendering.
-4. Pauses AI execution to prompt the human for destructive PowerShell script approval.
+1. Manages background daemon threads for AI requests so the UI doesn't block.
+2. Implements lock-protected comms queues for safe main-thread rendering.
+3. Pauses AI execution to prompt the human for destructive PowerShell script approval.
 """
 # gui.py
 import dearpygui.dearpygui as dpg
@@ -377,6 +376,7 @@ class App:
        self.current_model: str = ai_cfg.get("model", "gemini-2.5-flash")
        self.temperature: float = ai_cfg.get("temperature", 0.0)
        self.max_tokens: int = ai_cfg.get("max_tokens", 8192)
+        self.history_trunc_limit: int = ai_cfg.get("history_trunc_limit", 8000)
        self.available_models: list[str] = []

        # ---- project management ----
@@ -845,6 +845,7 @@ class App:
            "model":    self.current_model,
            "temperature": dpg.get_value("ai_temperature") if dpg.does_item_exist("ai_temperature") else self.temperature,
            "max_tokens": dpg.get_value("ai_max_tokens") if dpg.does_item_exist("ai_max_tokens") else self.max_tokens,
+            "history_trunc_limit": dpg.get_value("ai_history_trunc") if dpg.does_item_exist("ai_history_trunc") else self.history_trunc_limit,
        }
        if dpg.does_item_exist("global_system_prompt"):
            self.config["ai"]["system_prompt"] = dpg.get_value("global_system_prompt")
@@ -1153,7 +1154,8 @@ class App:
        ai_client.set_custom_system_prompt("\n\n".join(combined_sp))
        temp = dpg.get_value("ai_temperature") if dpg.does_item_exist("ai_temperature") else 0.0
        max_tok = dpg.get_value("ai_max_tokens") if dpg.does_item_exist("ai_max_tokens") else 8192
-        ai_client.set_model_params(temp, max_tok)
+        trunc = dpg.get_value("ai_history_trunc") if dpg.does_item_exist("ai_history_trunc") else 8000
+        ai_client.set_model_params(temp, max_tok, trunc)

        def do_send():
            auto_add = dpg.get_value("auto_add_history") if dpg.does_item_exist("auto_add_history") else False
@@ -1785,6 +1787,7 @@ class App:
            dpg.add_text("Parameters")
            dpg.add_input_float(tag="ai_temperature", label="Temperature", default_value=self.temperature, min_value=0.0, max_value=2.0)
            dpg.add_input_int(tag="ai_max_tokens", label="Max Tokens (Output)", default_value=self.max_tokens, step=1024)
+            dpg.add_input_int(tag="ai_history_trunc", label="History Truncation Limit", default_value=self.history_trunc_limit, step=1024)

        # ---- Message panel ----
        with dpg.window(
@@ -2106,6 +2109,7 @@ class App:

        dpg.save_init_file("dpg_layout.ini")
        session_logger.close_session()
+        ai_client.cleanup()  # Destroy active API caches to stop billing
        dpg.destroy_context()


--- a/manual_slop.toml
+++ b/manual_slop.toml
@@ -146,8 +146,8 @@ history = [
 ]

 [discussion.discussions."docs writeup"]
-git_commit = ""
-last_updated = "2026-02-22T10:16:30"
+git_commit = "bf2d09f3fd817d64fbf6b4aa667e2b635b6fbc0e"
+last_updated = "2026-02-22T10:34:24"
 history = [
    "@2026-02-22T08:56:39\nUser:\nLets write extensive documentation in the same style that I used for my VEFontCache-Oodin project.\nI added it's directories to your context.",
    "@2026-02-22T08:56:58\nAI:\n(No text returned)",