Port missing features to gui_2 and optimize caching

- Port 10 missing features from gui.py to gui_2.py: performance diagnostics, prior session log viewing, token budget visualization, agent tools config, API hooks server, GUI task queue, discussion truncation, THINKING/LIVE indicators, event subscriptions, and session usage tracking - Persist window visibility state in config.toml - Fix Gemini cache invalidation by separating discussion history from cached context (use MD5 hash instead of built-in hash) - Add cost optimizations: tool output truncation at source, proactive history trimming at 40%, summary_only support in aggregate.run() - Add cleanup() for destroying API caches on exit
2026-02-23 20:06:13 -05:00
parent 75e1cf84fe
commit 69401365be
6 changed files with 556 additions and 152 deletions
--- a/ai_client.py
+++ b/ai_client.py
@@ -15,6 +15,8 @@ import tomllib
 import json
 import time
 import datetime
+import hashlib
+import difflib
 from pathlib import Path
 import file_cache
 import mcp_client
@@ -435,6 +437,13 @@ def _run_script(script: str, base_dir: str) -> str:
    return output


+def _truncate_tool_output(output: str) -> str:
+    """Truncate tool output to _history_trunc_limit chars before sending to API."""
+    if _history_trunc_limit > 0 and len(output) > _history_trunc_limit:
+        return output[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS.]"
+    return output
+
+
 # ------------------------------------------------------------------ dynamic file context refresh

 def _reread_file_items(file_items: list[dict]) -> tuple[list[dict], list[dict]]:
@@ -460,7 +469,7 @@ def _reread_file_items(file_items: list[dict]) -> tuple[list[dict], list[dict]]:
                refreshed.append(item)  # unchanged — skip re-read
                continue
            content = p.read_text(encoding="utf-8")
-            new_item = {**item, "content": content, "error": False, "mtime": current_mtime}
+            new_item = {**item, "old_content": item.get("content", ""), "content": content, "error": False, "mtime": current_mtime}
            refreshed.append(new_item)
            changed.append(new_item)
        except Exception as e:
@@ -486,6 +495,35 @@ def _build_file_context_text(file_items: list[dict]) -> str:
    return "\n\n---\n\n".join(parts)


+_DIFF_LINE_THRESHOLD = 200
+
+def _build_file_diff_text(changed_items: list[dict]) -> str:
+    """
+    Build text for changed files. Small files (<= _DIFF_LINE_THRESHOLD lines)
+    get full content; large files get a unified diff against old_content.
+    """
+    if not changed_items:
+        return ""
+    parts = []
+    for item in changed_items:
+        path = item.get("path") or item.get("entry", "unknown")
+        content = item.get("content", "")
+        old_content = item.get("old_content", "")
+        new_lines = content.splitlines(keepends=True)
+        if len(new_lines) <= _DIFF_LINE_THRESHOLD or not old_content:
+            suffix = str(path).rsplit(".", 1)[-1] if "." in str(path) else "text"
+            parts.append(f"### `{path}` (full)\n\n```{suffix}\n{content}\n```")
+        else:
+            old_lines = old_content.splitlines(keepends=True)
+            diff = difflib.unified_diff(old_lines, new_lines, fromfile=str(path), tofile=str(path), lineterm="")
+            diff_text = "\n".join(diff)
+            if diff_text:
+                parts.append(f"### `{path}` (diff)\n\n```diff\n{diff_text}\n```")
+            else:
+                parts.append(f"### `{path}` (no changes detected)")
+    return "\n\n---\n\n".join(parts)
+
+
 # ------------------------------------------------------------------ content block serialisation

 def _content_block_to_dict(block) -> dict:
@@ -530,22 +568,26 @@ def _get_gemini_history_list(chat):
        return chat.get_history()
    return []

-def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
+def _send_gemini(md_content: str, user_message: str, base_dir: str,
+                 file_items: list[dict] | None = None,
+                 discussion_history: str = "") -> str:
    global _gemini_chat, _gemini_cache, _gemini_cache_md_hash, _gemini_cache_created_at

    try:
        _ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir])
+        # Only stable content (files + screenshots) goes in the cached system instruction.
+        # Discussion history is sent as conversation messages so the cache isn't invalidated every turn.
        sys_instr = f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"
        tools_decl = [_gemini_tool_declaration()]

        # DYNAMIC CONTEXT: Check if files/context changed mid-session
-        current_md_hash = hash(md_content)
+        current_md_hash = hashlib.md5(md_content.encode()).hexdigest()
        old_history = None
        if _gemini_chat and _gemini_cache_md_hash != current_md_hash:
            old_history = list(_get_gemini_history_list(_gemini_chat)) if _get_gemini_history_list(_gemini_chat) else []
            if _gemini_cache:
                try: _gemini_client.caches.delete(name=_gemini_cache.name)
-                except: pass
+                except Exception as e: _append_comms("OUT", "request", {"message": f"[CACHE DELETE WARN] {e}"})
            _gemini_chat = None
            _gemini_cache = None
            _gemini_cache_created_at = None
@@ -558,7 +600,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
            if elapsed > _GEMINI_CACHE_TTL * 0.9:
                old_history = list(_get_gemini_history_list(_gemini_chat)) if _get_gemini_history_list(_gemini_chat) else []
                try: _gemini_client.caches.delete(name=_gemini_cache.name)
-                except: pass
+                except Exception as e: _append_comms("OUT", "request", {"message": f"[CACHE DELETE WARN] {e}"})
                _gemini_chat = None
                _gemini_cache = None
                _gemini_cache_created_at = None
@@ -601,7 +643,13 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:

            _gemini_chat = _gemini_client.chats.create(**kwargs)
            _gemini_cache_md_hash = current_md_hash
-        
+
+            # Inject discussion history as a user message on first chat creation
+            # (only when there's no old_history being restored, i.e., fresh session)
+            if discussion_history and not old_history:
+                _gemini_chat.send_message(f"[DISCUSSION HISTORY]\n\n{discussion_history}")
+                _append_comms("OUT", "request", {"message": f"[HISTORY INJECTED] {len(discussion_history)} chars"})
+
        _append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
        payload, all_text = user_message, []

@@ -634,26 +682,19 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
            if cached_tokens:
                usage["cache_read_input_tokens"] = cached_tokens

-            # Fetch cache stats in the background thread to avoid blocking GUI
-            cache_stats = None
-            try:
-                cache_stats = get_gemini_cache_stats()
-            except Exception:
-                pass
-
-            events.emit("response_received", payload={"provider": "gemini", "model": _model, "usage": usage, "round": r_idx, "cache_stats": cache_stats})
+            events.emit("response_received", payload={"provider": "gemini", "model": _model, "usage": usage, "round": r_idx})
            
            reason = resp.candidates[0].finish_reason.name if resp.candidates and hasattr(resp.candidates[0], "finish_reason") else "STOP"
            
            _append_comms("IN", "response", {"round": r_idx, "stop_reason": reason, "text": txt, "tool_calls": [{"name": c.name, "args": dict(c.args)} for c in calls], "usage": usage})
            
-            # Guard: if Gemini reports input tokens approaching the limit, drop oldest history pairs
+            # Guard: proactively trim history when input tokens exceed 40% of limit
            total_in = usage.get("input_tokens", 0)
-            if total_in > _GEMINI_MAX_INPUT_TOKENS and _gemini_chat and _get_gemini_history_list(_gemini_chat):
+            if total_in > _GEMINI_MAX_INPUT_TOKENS * 0.4 and _gemini_chat and _get_gemini_history_list(_gemini_chat):
                hist = _get_gemini_history_list(_gemini_chat)
                dropped = 0
                # Drop oldest pairs (user+model) but keep at least the last 2 entries
-                while len(hist) > 4 and total_in > _GEMINI_MAX_INPUT_TOKENS * 0.7:
+                while len(hist) > 4 and total_in > _GEMINI_MAX_INPUT_TOKENS * 0.3:
                    # Drop in pairs (user + model) to maintain alternating roles required by Gemini
                    saved = 0
                    for _ in range(2):
@@ -689,11 +730,12 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
                if i == len(calls) - 1:
                    if file_items:
                        file_items, changed = _reread_file_items(file_items)
-                        ctx = _build_file_context_text(changed)
+                        ctx = _build_file_diff_text(changed)
                        if ctx:
                            out += f"\n\n[SYSTEM: FILES UPDATED]\n\n{ctx}"
                    if r_idx == MAX_TOOL_ROUNDS: out += "\n\n[SYSTEM: MAX ROUNDS. PROVIDE FINAL ANSWER.]"

+                out = _truncate_tool_output(out)
                f_resps.append(types.Part.from_function_response(name=name, response={"output": out}))
                log.append({"tool_use_id": name, "content": out})
                events.emit("tool_execution", payload={"status": "completed", "tool": name, "result": out, "round": r_idx})
@@ -955,7 +997,7 @@ def _repair_anthropic_history(history: list[dict]):
    })


-def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
+def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None, discussion_history: str = "") -> str:
    try:
        _ensure_anthropic_client()
        mcp_client.configure(file_items or [], [base_dir])
@@ -969,7 +1011,11 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
        context_blocks = _build_chunked_context_blocks(context_text)
        system_blocks = stable_blocks + context_blocks

-        user_content = [{"type": "text", "text": user_message}]
+        # Prepend discussion history to the first user message if this is a fresh session
+        if discussion_history and not _anthropic_history:
+            user_content = [{"type": "text", "text": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"}]
+        else:
+            user_content = [{"type": "text", "text": user_message}]

        # COMPRESS HISTORY: Truncate massive tool outputs from previous turns
        for msg in _anthropic_history:
@@ -1089,7 +1135,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
                    tool_results.append({
                        "type":        "tool_result",
                        "tool_use_id": b_id,
-                        "content":     output,
+                        "content":     _truncate_tool_output(output),
                    })
                    events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})
                elif b_name == TOOL_NAME:
@@ -1108,14 +1154,14 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
                    tool_results.append({
                        "type":        "tool_result",
                        "tool_use_id": b_id,
-                        "content":     output,
+                        "content":     _truncate_tool_output(output),
                    })
                    events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})

            # Refresh file context after tool calls — only inject CHANGED files
            if file_items:
                file_items, changed = _reread_file_items(file_items)
-                refreshed_ctx = _build_file_context_text(changed)
+                refreshed_ctx = _build_file_diff_text(changed)
                if refreshed_ctx:
                    tool_results.append({
                        "type": "text",
@@ -1160,20 +1206,24 @@ def send(
    user_message: str,
    base_dir: str = ".",
    file_items: list[dict] | None = None,
+    discussion_history: str = "",
 ) -> str:
    """
    Send a message to the active provider.

-    md_content  : aggregated markdown string from aggregate.run()
-    user_message: the user question / instruction
-    base_dir    : project base directory (for PowerShell tool calls)
-    file_items  : list of file dicts from aggregate.build_file_items() for
-                  dynamic context refresh after tool calls
+    md_content         : aggregated markdown string (for Gemini: stable content only,
+                         for Anthropic: full content including history)
+    user_message       : the user question / instruction
+    base_dir           : project base directory (for PowerShell tool calls)
+    file_items         : list of file dicts from aggregate.build_file_items() for
+                         dynamic context refresh after tool calls
+    discussion_history : discussion history text (used by Gemini to inject as
+                         conversation message instead of caching it)
    """
    if _provider == "gemini":
-        return _send_gemini(md_content, user_message, base_dir, file_items)
+        return _send_gemini(md_content, user_message, base_dir, file_items, discussion_history)
    elif _provider == "anthropic":
-        return _send_anthropic(md_content, user_message, base_dir, file_items)
+        return _send_anthropic(md_content, user_message, base_dir, file_items, discussion_history)
    raise ValueError(f"unknown provider: {_provider}")

 def get_history_bleed_stats() -> dict: