Port missing features to gui_2 and optimize caching

- Port 10 missing features from gui.py to gui_2.py: performance
    diagnostics, prior session log viewing, token budget visualization,
    agent tools config, API hooks server, GUI task queue, discussion
    truncation, THINKING/LIVE indicators, event subscriptions, and
    session usage tracking
  - Persist window visibility state in config.toml
  - Fix Gemini cache invalidation by separating discussion history
    from cached context (use MD5 hash instead of built-in hash)
  - Add cost optimizations: tool output truncation at source, proactive
    history trimming at 40%, summary_only support in aggregate.run()
  - Add cleanup() for destroying API caches on exit
This commit is contained in:
2026-02-23 20:06:13 -05:00
parent 75e1cf84fe
commit 69401365be
6 changed files with 556 additions and 152 deletions

View File

@@ -15,6 +15,8 @@ import tomllib
import json
import time
import datetime
import hashlib
import difflib
from pathlib import Path
import file_cache
import mcp_client
@@ -435,6 +437,13 @@ def _run_script(script: str, base_dir: str) -> str:
return output
def _truncate_tool_output(output: str) -> str:
"""Truncate tool output to _history_trunc_limit chars before sending to API."""
if _history_trunc_limit > 0 and len(output) > _history_trunc_limit:
return output[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS.]"
return output
# ------------------------------------------------------------------ dynamic file context refresh
def _reread_file_items(file_items: list[dict]) -> tuple[list[dict], list[dict]]:
@@ -460,7 +469,7 @@ def _reread_file_items(file_items: list[dict]) -> tuple[list[dict], list[dict]]:
refreshed.append(item) # unchanged — skip re-read
continue
content = p.read_text(encoding="utf-8")
new_item = {**item, "content": content, "error": False, "mtime": current_mtime}
new_item = {**item, "old_content": item.get("content", ""), "content": content, "error": False, "mtime": current_mtime}
refreshed.append(new_item)
changed.append(new_item)
except Exception as e:
@@ -486,6 +495,35 @@ def _build_file_context_text(file_items: list[dict]) -> str:
return "\n\n---\n\n".join(parts)
_DIFF_LINE_THRESHOLD = 200
def _build_file_diff_text(changed_items: list[dict]) -> str:
"""
Build text for changed files. Small files (<= _DIFF_LINE_THRESHOLD lines)
get full content; large files get a unified diff against old_content.
"""
if not changed_items:
return ""
parts = []
for item in changed_items:
path = item.get("path") or item.get("entry", "unknown")
content = item.get("content", "")
old_content = item.get("old_content", "")
new_lines = content.splitlines(keepends=True)
if len(new_lines) <= _DIFF_LINE_THRESHOLD or not old_content:
suffix = str(path).rsplit(".", 1)[-1] if "." in str(path) else "text"
parts.append(f"### `{path}` (full)\n\n```{suffix}\n{content}\n```")
else:
old_lines = old_content.splitlines(keepends=True)
diff = difflib.unified_diff(old_lines, new_lines, fromfile=str(path), tofile=str(path), lineterm="")
diff_text = "\n".join(diff)
if diff_text:
parts.append(f"### `{path}` (diff)\n\n```diff\n{diff_text}\n```")
else:
parts.append(f"### `{path}` (no changes detected)")
return "\n\n---\n\n".join(parts)
# ------------------------------------------------------------------ content block serialisation
def _content_block_to_dict(block) -> dict:
@@ -530,22 +568,26 @@ def _get_gemini_history_list(chat):
return chat.get_history()
return []
def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
def _send_gemini(md_content: str, user_message: str, base_dir: str,
file_items: list[dict] | None = None,
discussion_history: str = "") -> str:
global _gemini_chat, _gemini_cache, _gemini_cache_md_hash, _gemini_cache_created_at
try:
_ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir])
# Only stable content (files + screenshots) goes in the cached system instruction.
# Discussion history is sent as conversation messages so the cache isn't invalidated every turn.
sys_instr = f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"
tools_decl = [_gemini_tool_declaration()]
# DYNAMIC CONTEXT: Check if files/context changed mid-session
current_md_hash = hash(md_content)
current_md_hash = hashlib.md5(md_content.encode()).hexdigest()
old_history = None
if _gemini_chat and _gemini_cache_md_hash != current_md_hash:
old_history = list(_get_gemini_history_list(_gemini_chat)) if _get_gemini_history_list(_gemini_chat) else []
if _gemini_cache:
try: _gemini_client.caches.delete(name=_gemini_cache.name)
except: pass
except Exception as e: _append_comms("OUT", "request", {"message": f"[CACHE DELETE WARN] {e}"})
_gemini_chat = None
_gemini_cache = None
_gemini_cache_created_at = None
@@ -558,7 +600,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
if elapsed > _GEMINI_CACHE_TTL * 0.9:
old_history = list(_get_gemini_history_list(_gemini_chat)) if _get_gemini_history_list(_gemini_chat) else []
try: _gemini_client.caches.delete(name=_gemini_cache.name)
except: pass
except Exception as e: _append_comms("OUT", "request", {"message": f"[CACHE DELETE WARN] {e}"})
_gemini_chat = None
_gemini_cache = None
_gemini_cache_created_at = None
@@ -601,7 +643,13 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
_gemini_chat = _gemini_client.chats.create(**kwargs)
_gemini_cache_md_hash = current_md_hash
# Inject discussion history as a user message on first chat creation
# (only when there's no old_history being restored, i.e., fresh session)
if discussion_history and not old_history:
_gemini_chat.send_message(f"[DISCUSSION HISTORY]\n\n{discussion_history}")
_append_comms("OUT", "request", {"message": f"[HISTORY INJECTED] {len(discussion_history)} chars"})
_append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
payload, all_text = user_message, []
@@ -634,26 +682,19 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
if cached_tokens:
usage["cache_read_input_tokens"] = cached_tokens
# Fetch cache stats in the background thread to avoid blocking GUI
cache_stats = None
try:
cache_stats = get_gemini_cache_stats()
except Exception:
pass
events.emit("response_received", payload={"provider": "gemini", "model": _model, "usage": usage, "round": r_idx, "cache_stats": cache_stats})
events.emit("response_received", payload={"provider": "gemini", "model": _model, "usage": usage, "round": r_idx})
reason = resp.candidates[0].finish_reason.name if resp.candidates and hasattr(resp.candidates[0], "finish_reason") else "STOP"
_append_comms("IN", "response", {"round": r_idx, "stop_reason": reason, "text": txt, "tool_calls": [{"name": c.name, "args": dict(c.args)} for c in calls], "usage": usage})
# Guard: if Gemini reports input tokens approaching the limit, drop oldest history pairs
# Guard: proactively trim history when input tokens exceed 40% of limit
total_in = usage.get("input_tokens", 0)
if total_in > _GEMINI_MAX_INPUT_TOKENS and _gemini_chat and _get_gemini_history_list(_gemini_chat):
if total_in > _GEMINI_MAX_INPUT_TOKENS * 0.4 and _gemini_chat and _get_gemini_history_list(_gemini_chat):
hist = _get_gemini_history_list(_gemini_chat)
dropped = 0
# Drop oldest pairs (user+model) but keep at least the last 2 entries
while len(hist) > 4 and total_in > _GEMINI_MAX_INPUT_TOKENS * 0.7:
while len(hist) > 4 and total_in > _GEMINI_MAX_INPUT_TOKENS * 0.3:
# Drop in pairs (user + model) to maintain alternating roles required by Gemini
saved = 0
for _ in range(2):
@@ -689,11 +730,12 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
if i == len(calls) - 1:
if file_items:
file_items, changed = _reread_file_items(file_items)
ctx = _build_file_context_text(changed)
ctx = _build_file_diff_text(changed)
if ctx:
out += f"\n\n[SYSTEM: FILES UPDATED]\n\n{ctx}"
if r_idx == MAX_TOOL_ROUNDS: out += "\n\n[SYSTEM: MAX ROUNDS. PROVIDE FINAL ANSWER.]"
out = _truncate_tool_output(out)
f_resps.append(types.Part.from_function_response(name=name, response={"output": out}))
log.append({"tool_use_id": name, "content": out})
events.emit("tool_execution", payload={"status": "completed", "tool": name, "result": out, "round": r_idx})
@@ -955,7 +997,7 @@ def _repair_anthropic_history(history: list[dict]):
})
def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None, discussion_history: str = "") -> str:
try:
_ensure_anthropic_client()
mcp_client.configure(file_items or [], [base_dir])
@@ -969,7 +1011,11 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
context_blocks = _build_chunked_context_blocks(context_text)
system_blocks = stable_blocks + context_blocks
user_content = [{"type": "text", "text": user_message}]
# Prepend discussion history to the first user message if this is a fresh session
if discussion_history and not _anthropic_history:
user_content = [{"type": "text", "text": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"}]
else:
user_content = [{"type": "text", "text": user_message}]
# COMPRESS HISTORY: Truncate massive tool outputs from previous turns
for msg in _anthropic_history:
@@ -1089,7 +1135,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
tool_results.append({
"type": "tool_result",
"tool_use_id": b_id,
"content": output,
"content": _truncate_tool_output(output),
})
events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})
elif b_name == TOOL_NAME:
@@ -1108,14 +1154,14 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
tool_results.append({
"type": "tool_result",
"tool_use_id": b_id,
"content": output,
"content": _truncate_tool_output(output),
})
events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})
# Refresh file context after tool calls — only inject CHANGED files
if file_items:
file_items, changed = _reread_file_items(file_items)
refreshed_ctx = _build_file_context_text(changed)
refreshed_ctx = _build_file_diff_text(changed)
if refreshed_ctx:
tool_results.append({
"type": "text",
@@ -1160,20 +1206,24 @@ def send(
user_message: str,
base_dir: str = ".",
file_items: list[dict] | None = None,
discussion_history: str = "",
) -> str:
"""
Send a message to the active provider.
md_content : aggregated markdown string from aggregate.run()
user_message: the user question / instruction
base_dir : project base directory (for PowerShell tool calls)
file_items : list of file dicts from aggregate.build_file_items() for
dynamic context refresh after tool calls
md_content : aggregated markdown string (for Gemini: stable content only,
for Anthropic: full content including history)
user_message : the user question / instruction
base_dir : project base directory (for PowerShell tool calls)
file_items : list of file dicts from aggregate.build_file_items() for
dynamic context refresh after tool calls
discussion_history : discussion history text (used by Gemini to inject as
conversation message instead of caching it)
"""
if _provider == "gemini":
return _send_gemini(md_content, user_message, base_dir, file_items)
return _send_gemini(md_content, user_message, base_dir, file_items, discussion_history)
elif _provider == "anthropic":
return _send_anthropic(md_content, user_message, base_dir, file_items)
return _send_anthropic(md_content, user_message, base_dir, file_items, discussion_history)
raise ValueError(f"unknown provider: {_provider}")
def get_history_bleed_stats() -> dict: