add history truncation

This commit is contained in:
2026-02-22 10:34:53 -05:00
parent bf2d09f3fd
commit da8df7a393
4 changed files with 52 additions and 9 deletions

View File

@@ -23,10 +23,13 @@ _model: str = "gemini-2.5-flash"
_temperature: float = 0.0 _temperature: float = 0.0
_max_tokens: int = 8192 _max_tokens: int = 8192
def set_model_params(temp: float, max_tok: int): _history_trunc_limit: int = 8000
global _temperature, _max_tokens
def set_model_params(temp: float, max_tok: int, trunc_limit: int = 8000):
global _temperature, _max_tokens, _history_trunc_limit
_temperature = temp _temperature = temp
_max_tokens = max_tok _max_tokens = max_tok
_history_trunc_limit = trunc_limit
_gemini_client = None _gemini_client = None
_gemini_chat = None _gemini_chat = None
@@ -201,6 +204,16 @@ def set_provider(provider: str, model: str):
_model = model _model = model
def cleanup():
"""Called on application exit to prevent orphaned caches from billing."""
global _gemini_client, _gemini_cache
if _gemini_client and _gemini_cache:
try:
_gemini_client.caches.delete(name=_gemini_cache.name)
except Exception:
pass
def reset_session(): def reset_session():
global _gemini_client, _gemini_chat, _gemini_cache global _gemini_client, _gemini_chat, _gemini_cache
global _anthropic_client, _anthropic_history global _anthropic_client, _anthropic_history
@@ -487,6 +500,22 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
_gemini_chat = _gemini_client.chats.create(**kwargs) _gemini_chat = _gemini_client.chats.create(**kwargs)
_gemini_chat._last_md_hash = current_md_hash _gemini_chat._last_md_hash = current_md_hash
# COMPRESS HISTORY: Truncate massive tool outputs from previous turns to stop token leaks
if _gemini_chat and getattr(_gemini_chat, "history", None):
for msg in _gemini_chat.history:
if msg.role == "user" and hasattr(msg, "parts"):
for p in msg.parts:
if hasattr(p, "function_response") and p.function_response and hasattr(p.function_response, "response"):
r = p.function_response.response
if isinstance(r, dict) and "output" in r:
val = r["output"]
if isinstance(val, str):
if "[SYSTEM: FILES UPDATED]" in val:
val = val.split("[SYSTEM: FILES UPDATED]")[0].strip()
if _history_trunc_limit > 0 and len(val) > _history_trunc_limit:
val = val[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS. Original output was too large.]"
r["output"] = val
_append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"}) _append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
payload, all_text = user_message, [] payload, all_text = user_message, []
@@ -760,6 +789,15 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
user_content = [{"type": "text", "text": user_message}] user_content = [{"type": "text", "text": user_message}]
# COMPRESS HISTORY: Truncate massive tool outputs from previous turns
for msg in _anthropic_history:
if msg.get("role") == "user" and isinstance(msg.get("content"), list):
for block in msg["content"]:
if isinstance(block, dict) and block.get("type") == "tool_result":
t_content = block.get("content", "")
if _history_trunc_limit > 0 and isinstance(t_content, str) and len(t_content) > _history_trunc_limit:
block["content"] = t_content[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS. Original output was too large.]"
_strip_cache_controls(_anthropic_history) _strip_cache_controls(_anthropic_history)
_repair_anthropic_history(_anthropic_history) _repair_anthropic_history(_anthropic_history)
_anthropic_history.append({"role": "user", "content": user_content}) _anthropic_history.append({"role": "user", "content": user_content})

View File

@@ -3,6 +3,7 @@ provider = "gemini"
model = "gemini-3.1-pro-preview" model = "gemini-3.1-pro-preview"
temperature = 0.6000000238418579 temperature = 0.6000000238418579
max_tokens = 12000 max_tokens = 12000
history_trunc_limit = 8000
system_prompt = "DO NOT EVER make a shell script unless told to. DO NOT EVER make a readme or a file describing your changes unless your are told to. If you have commands I should be entering into the command line or if you have something to explain to me, please just use code blocks or normal text output. DO NOT DO ANYTHING OTHER THAN WHAT YOU WERE TOLD TODO. DO NOT EVER, EVER DO ANYTHING OTHER THAN WHAT YOU WERE TOLD TO DO. IF YOU WANT TO DO OTHER THINGS, SIMPLY SUGGEST THEM, AND THEN I WILL REVIEW YOUR CHANGES, AND MAKE THE DECISION ON HOW TO PROCEED. WHEN WRITING SCRIPTS USE A 120-160 character limit per line. I don't want to see scrunched code.\n" system_prompt = "DO NOT EVER make a shell script unless told to. DO NOT EVER make a readme or a file describing your changes unless your are told to. If you have commands I should be entering into the command line or if you have something to explain to me, please just use code blocks or normal text output. DO NOT DO ANYTHING OTHER THAN WHAT YOU WERE TOLD TODO. DO NOT EVER, EVER DO ANYTHING OTHER THAN WHAT YOU WERE TOLD TO DO. IF YOU WANT TO DO OTHER THINGS, SIMPLY SUGGEST THEM, AND THEN I WILL REVIEW YOUR CHANGES, AND MAKE THE DECISION ON HOW TO PROCEED. WHEN WRITING SCRIPTS USE A 120-160 character limit per line. I don't want to see scrunched code.\n"
[theme] [theme]

14
gui.py
View File

@@ -3,10 +3,9 @@
Note(Gemini): Note(Gemini):
The main DearPyGui interface orchestrator. The main DearPyGui interface orchestrator.
This is not a simple UI wrapper; it's a complex state machine that: This is not a simple UI wrapper; it's a complex state machine that:
1. Handles multi-viewport docking (allowing panels to act as OS windows). 1. Manages background daemon threads for AI requests so the UI doesn't block.
2. Manages background daemon threads for AI requests so the UI doesn't block. 2. Implements lock-protected comms queues for safe main-thread rendering.
3. Implements lock-protected comms queues for safe main-thread rendering. 3. Pauses AI execution to prompt the human for destructive PowerShell script approval.
4. Pauses AI execution to prompt the human for destructive PowerShell script approval.
""" """
# gui.py # gui.py
import dearpygui.dearpygui as dpg import dearpygui.dearpygui as dpg
@@ -377,6 +376,7 @@ class App:
self.current_model: str = ai_cfg.get("model", "gemini-2.5-flash") self.current_model: str = ai_cfg.get("model", "gemini-2.5-flash")
self.temperature: float = ai_cfg.get("temperature", 0.0) self.temperature: float = ai_cfg.get("temperature", 0.0)
self.max_tokens: int = ai_cfg.get("max_tokens", 8192) self.max_tokens: int = ai_cfg.get("max_tokens", 8192)
self.history_trunc_limit: int = ai_cfg.get("history_trunc_limit", 8000)
self.available_models: list[str] = [] self.available_models: list[str] = []
# ---- project management ---- # ---- project management ----
@@ -845,6 +845,7 @@ class App:
"model": self.current_model, "model": self.current_model,
"temperature": dpg.get_value("ai_temperature") if dpg.does_item_exist("ai_temperature") else self.temperature, "temperature": dpg.get_value("ai_temperature") if dpg.does_item_exist("ai_temperature") else self.temperature,
"max_tokens": dpg.get_value("ai_max_tokens") if dpg.does_item_exist("ai_max_tokens") else self.max_tokens, "max_tokens": dpg.get_value("ai_max_tokens") if dpg.does_item_exist("ai_max_tokens") else self.max_tokens,
"history_trunc_limit": dpg.get_value("ai_history_trunc") if dpg.does_item_exist("ai_history_trunc") else self.history_trunc_limit,
} }
if dpg.does_item_exist("global_system_prompt"): if dpg.does_item_exist("global_system_prompt"):
self.config["ai"]["system_prompt"] = dpg.get_value("global_system_prompt") self.config["ai"]["system_prompt"] = dpg.get_value("global_system_prompt")
@@ -1153,7 +1154,8 @@ class App:
ai_client.set_custom_system_prompt("\n\n".join(combined_sp)) ai_client.set_custom_system_prompt("\n\n".join(combined_sp))
temp = dpg.get_value("ai_temperature") if dpg.does_item_exist("ai_temperature") else 0.0 temp = dpg.get_value("ai_temperature") if dpg.does_item_exist("ai_temperature") else 0.0
max_tok = dpg.get_value("ai_max_tokens") if dpg.does_item_exist("ai_max_tokens") else 8192 max_tok = dpg.get_value("ai_max_tokens") if dpg.does_item_exist("ai_max_tokens") else 8192
ai_client.set_model_params(temp, max_tok) trunc = dpg.get_value("ai_history_trunc") if dpg.does_item_exist("ai_history_trunc") else 8000
ai_client.set_model_params(temp, max_tok, trunc)
def do_send(): def do_send():
auto_add = dpg.get_value("auto_add_history") if dpg.does_item_exist("auto_add_history") else False auto_add = dpg.get_value("auto_add_history") if dpg.does_item_exist("auto_add_history") else False
@@ -1785,6 +1787,7 @@ class App:
dpg.add_text("Parameters") dpg.add_text("Parameters")
dpg.add_input_float(tag="ai_temperature", label="Temperature", default_value=self.temperature, min_value=0.0, max_value=2.0) dpg.add_input_float(tag="ai_temperature", label="Temperature", default_value=self.temperature, min_value=0.0, max_value=2.0)
dpg.add_input_int(tag="ai_max_tokens", label="Max Tokens (Output)", default_value=self.max_tokens, step=1024) dpg.add_input_int(tag="ai_max_tokens", label="Max Tokens (Output)", default_value=self.max_tokens, step=1024)
dpg.add_input_int(tag="ai_history_trunc", label="History Truncation Limit", default_value=self.history_trunc_limit, step=1024)
# ---- Message panel ---- # ---- Message panel ----
with dpg.window( with dpg.window(
@@ -2106,6 +2109,7 @@ class App:
dpg.save_init_file("dpg_layout.ini") dpg.save_init_file("dpg_layout.ini")
session_logger.close_session() session_logger.close_session()
ai_client.cleanup() # Destroy active API caches to stop billing
dpg.destroy_context() dpg.destroy_context()

View File

@@ -146,8 +146,8 @@ history = [
] ]
[discussion.discussions."docs writeup"] [discussion.discussions."docs writeup"]
git_commit = "" git_commit = "bf2d09f3fd817d64fbf6b4aa667e2b635b6fbc0e"
last_updated = "2026-02-22T10:16:30" last_updated = "2026-02-22T10:34:24"
history = [ history = [
"@2026-02-22T08:56:39\nUser:\nLets write extensive documentation in the same style that I used for my VEFontCache-Oodin project.\nI added it's directories to your context.", "@2026-02-22T08:56:39\nUser:\nLets write extensive documentation in the same style that I used for my VEFontCache-Oodin project.\nI added it's directories to your context.",
"@2026-02-22T08:56:58\nAI:\n(No text returned)", "@2026-02-22T08:56:58\nAI:\n(No text returned)",