From da8df7a39329f36ff94ae2d21973469fe2d5c333 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sun, 22 Feb 2026 10:34:53 -0500 Subject: [PATCH] add history truncation --- ai_client.py | 42 ++++++++++++++++++++++++++++++++++++++++-- config.toml | 1 + gui.py | 14 +++++++++----- manual_slop.toml | 4 ++-- 4 files changed, 52 insertions(+), 9 deletions(-) diff --git a/ai_client.py b/ai_client.py index 509ee7c..74570ab 100644 --- a/ai_client.py +++ b/ai_client.py @@ -23,10 +23,13 @@ _model: str = "gemini-2.5-flash" _temperature: float = 0.0 _max_tokens: int = 8192 -def set_model_params(temp: float, max_tok: int): - global _temperature, _max_tokens +_history_trunc_limit: int = 8000 + +def set_model_params(temp: float, max_tok: int, trunc_limit: int = 8000): + global _temperature, _max_tokens, _history_trunc_limit _temperature = temp _max_tokens = max_tok + _history_trunc_limit = trunc_limit _gemini_client = None _gemini_chat = None @@ -201,6 +204,16 @@ def set_provider(provider: str, model: str): _model = model + +def cleanup(): + """Called on application exit to prevent orphaned caches from billing.""" + global _gemini_client, _gemini_cache + if _gemini_client and _gemini_cache: + try: + _gemini_client.caches.delete(name=_gemini_cache.name) + except Exception: + pass + def reset_session(): global _gemini_client, _gemini_chat, _gemini_cache global _anthropic_client, _anthropic_history @@ -487,6 +500,22 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: _gemini_chat = _gemini_client.chats.create(**kwargs) _gemini_chat._last_md_hash = current_md_hash + # COMPRESS HISTORY: Truncate massive tool outputs from previous turns to stop token leaks + if _gemini_chat and getattr(_gemini_chat, "history", None): + for msg in _gemini_chat.history: + if msg.role == "user" and hasattr(msg, "parts"): + for p in msg.parts: + if hasattr(p, "function_response") and p.function_response and hasattr(p.function_response, "response"): + r = p.function_response.response + if isinstance(r, dict) and "output" in r: + val = r["output"] + if isinstance(val, str): + if "[SYSTEM: FILES UPDATED]" in val: + val = val.split("[SYSTEM: FILES UPDATED]")[0].strip() + if _history_trunc_limit > 0 and len(val) > _history_trunc_limit: + val = val[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS. Original output was too large.]" + r["output"] = val + _append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"}) payload, all_text = user_message, [] @@ -760,6 +789,15 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item user_content = [{"type": "text", "text": user_message}] + # COMPRESS HISTORY: Truncate massive tool outputs from previous turns + for msg in _anthropic_history: + if msg.get("role") == "user" and isinstance(msg.get("content"), list): + for block in msg["content"]: + if isinstance(block, dict) and block.get("type") == "tool_result": + t_content = block.get("content", "") + if _history_trunc_limit > 0 and isinstance(t_content, str) and len(t_content) > _history_trunc_limit: + block["content"] = t_content[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS. Original output was too large.]" + _strip_cache_controls(_anthropic_history) _repair_anthropic_history(_anthropic_history) _anthropic_history.append({"role": "user", "content": user_content}) diff --git a/config.toml b/config.toml index 37c1d7c..d1ee746 100644 --- a/config.toml +++ b/config.toml @@ -3,6 +3,7 @@ provider = "gemini" model = "gemini-3.1-pro-preview" temperature = 0.6000000238418579 max_tokens = 12000 +history_trunc_limit = 8000 system_prompt = "DO NOT EVER make a shell script unless told to. DO NOT EVER make a readme or a file describing your changes unless your are told to. If you have commands I should be entering into the command line or if you have something to explain to me, please just use code blocks or normal text output. DO NOT DO ANYTHING OTHER THAN WHAT YOU WERE TOLD TODO. DO NOT EVER, EVER DO ANYTHING OTHER THAN WHAT YOU WERE TOLD TO DO. IF YOU WANT TO DO OTHER THINGS, SIMPLY SUGGEST THEM, AND THEN I WILL REVIEW YOUR CHANGES, AND MAKE THE DECISION ON HOW TO PROCEED. WHEN WRITING SCRIPTS USE A 120-160 character limit per line. I don't want to see scrunched code.\n" [theme] diff --git a/gui.py b/gui.py index 4da3671..6f17c85 100644 --- a/gui.py +++ b/gui.py @@ -3,10 +3,9 @@ Note(Gemini): The main DearPyGui interface orchestrator. This is not a simple UI wrapper; it's a complex state machine that: -1. Handles multi-viewport docking (allowing panels to act as OS windows). -2. Manages background daemon threads for AI requests so the UI doesn't block. -3. Implements lock-protected comms queues for safe main-thread rendering. -4. Pauses AI execution to prompt the human for destructive PowerShell script approval. +1. Manages background daemon threads for AI requests so the UI doesn't block. +2. Implements lock-protected comms queues for safe main-thread rendering. +3. Pauses AI execution to prompt the human for destructive PowerShell script approval. """ # gui.py import dearpygui.dearpygui as dpg @@ -377,6 +376,7 @@ class App: self.current_model: str = ai_cfg.get("model", "gemini-2.5-flash") self.temperature: float = ai_cfg.get("temperature", 0.0) self.max_tokens: int = ai_cfg.get("max_tokens", 8192) + self.history_trunc_limit: int = ai_cfg.get("history_trunc_limit", 8000) self.available_models: list[str] = [] # ---- project management ---- @@ -845,6 +845,7 @@ class App: "model": self.current_model, "temperature": dpg.get_value("ai_temperature") if dpg.does_item_exist("ai_temperature") else self.temperature, "max_tokens": dpg.get_value("ai_max_tokens") if dpg.does_item_exist("ai_max_tokens") else self.max_tokens, + "history_trunc_limit": dpg.get_value("ai_history_trunc") if dpg.does_item_exist("ai_history_trunc") else self.history_trunc_limit, } if dpg.does_item_exist("global_system_prompt"): self.config["ai"]["system_prompt"] = dpg.get_value("global_system_prompt") @@ -1153,7 +1154,8 @@ class App: ai_client.set_custom_system_prompt("\n\n".join(combined_sp)) temp = dpg.get_value("ai_temperature") if dpg.does_item_exist("ai_temperature") else 0.0 max_tok = dpg.get_value("ai_max_tokens") if dpg.does_item_exist("ai_max_tokens") else 8192 - ai_client.set_model_params(temp, max_tok) + trunc = dpg.get_value("ai_history_trunc") if dpg.does_item_exist("ai_history_trunc") else 8000 + ai_client.set_model_params(temp, max_tok, trunc) def do_send(): auto_add = dpg.get_value("auto_add_history") if dpg.does_item_exist("auto_add_history") else False @@ -1785,6 +1787,7 @@ class App: dpg.add_text("Parameters") dpg.add_input_float(tag="ai_temperature", label="Temperature", default_value=self.temperature, min_value=0.0, max_value=2.0) dpg.add_input_int(tag="ai_max_tokens", label="Max Tokens (Output)", default_value=self.max_tokens, step=1024) + dpg.add_input_int(tag="ai_history_trunc", label="History Truncation Limit", default_value=self.history_trunc_limit, step=1024) # ---- Message panel ---- with dpg.window( @@ -2106,6 +2109,7 @@ class App: dpg.save_init_file("dpg_layout.ini") session_logger.close_session() + ai_client.cleanup() # Destroy active API caches to stop billing dpg.destroy_context() diff --git a/manual_slop.toml b/manual_slop.toml index 87f8746..3036cee 100644 --- a/manual_slop.toml +++ b/manual_slop.toml @@ -146,8 +146,8 @@ history = [ ] [discussion.discussions."docs writeup"] -git_commit = "" -last_updated = "2026-02-22T10:16:30" +git_commit = "bf2d09f3fd817d64fbf6b4aa667e2b635b6fbc0e" +last_updated = "2026-02-22T10:34:24" history = [ "@2026-02-22T08:56:39\nUser:\nLets write extensive documentation in the same style that I used for my VEFontCache-Oodin project.\nI added it's directories to your context.", "@2026-02-22T08:56:58\nAI:\n(No text returned)",