From 4755f4b59068bf3c40bbe2751b117e24da72f46d Mon Sep 17 00:00:00 2001
From: Ed_ <edwardgz@gmail.com>
Date: Sun, 22 Feb 2026 11:28:18 -0500
Subject: [PATCH] claude final fix pass

---
 aggregate.py       |  2 --
 ai_client.py       | 82 ++++++++++++++++++++++++++++++----------------
 gui.py             |  2 +-
 project_manager.py |  1 -
 session_logger.py  |  2 --
 5 files changed, 54 insertions(+), 35 deletions(-)
diff --git a/aggregate.py b/aggregate.py
index 5cf6879..304ebc8 100644
--- a/aggregate.py
+++ b/aggregate.py
@@ -171,5 +171,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-
-
diff --git a/ai_client.py b/ai_client.py
index 74570ab..380006a 100644
--- a/ai_client.py
+++ b/ai_client.py
@@ -217,6 +217,7 @@ def cleanup():
 def reset_session():
     global _gemini_client, _gemini_chat, _gemini_cache
     global _anthropic_client, _anthropic_history
+    global _CACHED_ANTHROPIC_TOOLS
     if _gemini_client and _gemini_cache:
         try:
             _gemini_client.caches.delete(name=_gemini_cache.name)
@@ -227,6 +228,7 @@ def reset_session():
     _gemini_cache = None
     _anthropic_client = None
     _anthropic_history = []
+    _CACHED_ANTHROPIC_TOOLS = None
     file_cache.reset_client()
 
 
@@ -309,6 +311,15 @@ def _build_anthropic_tools() -> list[dict]:
 
 _ANTHROPIC_TOOLS = _build_anthropic_tools()
 
+_CACHED_ANTHROPIC_TOOLS = None
+
+def _get_anthropic_tools() -> list[dict]:
+    """Return the Anthropic tools list, rebuilding only once per session."""
+    global _CACHED_ANTHROPIC_TOOLS
+    if _CACHED_ANTHROPIC_TOOLS is None:
+        _CACHED_ANTHROPIC_TOOLS = _build_anthropic_tools()
+    return _CACHED_ANTHROPIC_TOOLS
+
 
 def _gemini_tool_declaration():
     from google.genai import types
@@ -443,15 +454,13 @@ def _ensure_gemini_client():
 
 
 def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
-    global _gemini_chat
+    global _gemini_chat, _gemini_cache
     from google.genai import types
     try:
         _ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir])
         sys_instr = f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"
         tools_decl = [_gemini_tool_declaration()]
         
-        global _gemini_cache, _gemini_chat
-        
         # DYNAMIC CONTEXT: Check if files/context changed mid-session
         current_md_hash = hash(md_content)
         old_history = None
@@ -490,8 +499,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
                 )
                 _append_comms("OUT", "request", {"message": f"[CACHE CREATED] {_gemini_cache.name}"})
             except Exception as e:
-                # Fallback if under token limit or API error
-                pass
+                _gemini_cache = None  # Ensure clean state on failure
                 
             kwargs = {"model": _model, "config": chat_config}
             if old_history:
@@ -500,27 +508,11 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
             _gemini_chat = _gemini_client.chats.create(**kwargs)
             _gemini_chat._last_md_hash = current_md_hash
         
-        # COMPRESS HISTORY: Truncate massive tool outputs from previous turns to stop token leaks
-        if _gemini_chat and getattr(_gemini_chat, "history", None):
-            for msg in _gemini_chat.history:
-                if msg.role == "user" and hasattr(msg, "parts"):
-                    for p in msg.parts:
-                        if hasattr(p, "function_response") and p.function_response and hasattr(p.function_response, "response"):
-                            r = p.function_response.response
-                            if isinstance(r, dict) and "output" in r:
-                                val = r["output"]
-                                if isinstance(val, str):
-                                    if "[SYSTEM: FILES UPDATED]" in val:
-                                        val = val.split("[SYSTEM: FILES UPDATED]")[0].strip()
-                                    if _history_trunc_limit > 0 and len(val) > _history_trunc_limit:
-                                        val = val[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS. Original output was too large.]"
-                                    r["output"] = val
-
         _append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
         payload, all_text = user_message, []
         
         for r_idx in range(MAX_TOOL_ROUNDS + 2):
-            # Strip stale file refreshes from Gemini history
+            # Strip stale file refreshes and truncate old tool outputs in Gemini history
             if _gemini_chat and _gemini_chat.history:
                 for msg in _gemini_chat.history:
                     if msg.role == "user" and hasattr(msg, "parts"):
@@ -529,8 +521,12 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
                                 r = p.function_response.response
                                 if isinstance(r, dict) and "output" in r:
                                     val = r["output"]
-                                    if isinstance(val, str) and "[SYSTEM: FILES UPDATED]" in val:
-                                        r["output"] = val.split("[SYSTEM: FILES UPDATED]")[0].strip()
+                                    if isinstance(val, str):
+                                        if "[SYSTEM: FILES UPDATED]" in val:
+                                            val = val.split("[SYSTEM: FILES UPDATED]")[0].strip()
+                                        if _history_trunc_limit > 0 and len(val) > _history_trunc_limit:
+                                            val = val[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS.]"
+                                        r["output"] = val
 
             resp = _gemini_chat.send_message(payload)
             txt = "\n".join(p.text for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "text") and p.text)
@@ -544,6 +540,29 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
             reason = resp.candidates[0].finish_reason.name if resp.candidates and hasattr(resp.candidates[0], "finish_reason") else "STOP"
             
             _append_comms("IN", "response", {"round": r_idx, "stop_reason": reason, "text": txt, "tool_calls": [{"name": c.name, "args": dict(c.args)} for c in calls], "usage": usage})
+            
+            # Guard: if Gemini reports input tokens approaching the limit, drop oldest history pairs
+            total_in = usage.get("input_tokens", 0)
+            if total_in > _GEMINI_MAX_INPUT_TOKENS and _gemini_chat and _gemini_chat.history:
+                hist = _gemini_chat.history
+                dropped = 0
+                # Drop oldest pairs (user+model) but keep at least the last 2 entries
+                while len(hist) > 4 and total_in > _GEMINI_MAX_INPUT_TOKENS * 0.7:
+                    # Rough estimate: each dropped message saves ~(chars/4) tokens
+                    saved = 0
+                    for p in hist[0].parts:
+                        if hasattr(p, "text") and p.text:
+                            saved += len(p.text) // 4
+                        elif hasattr(p, "function_response") and p.function_response:
+                            r = getattr(p.function_response, "response", {})
+                            if isinstance(r, dict):
+                                saved += len(str(r.get("output", ""))) // 4
+                    hist.pop(0)
+                    total_in -= max(saved, 100)
+                    dropped += 1
+                if dropped > 0:
+                    _append_comms("OUT", "request", {"message": f"[GEMINI HISTORY TRIMMED: dropped {dropped} old entries to stay within token budget]"})
+
             if not calls or r_idx > MAX_TOOL_ROUNDS: break
 
             f_resps, log = [], []
@@ -560,8 +579,10 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
 
                 if i == len(calls) - 1:
                     if file_items:
-                        ctx = _build_file_context_text(_reread_file_items(file_items))
-                        if ctx: out += f"\n\n[SYSTEM: FILES UPDATED]\n\n{ctx}"
+                        file_items = _reread_file_items(file_items)
+                        ctx = _build_file_context_text(file_items)
+                        if ctx:
+                            out += f"\n\n[SYSTEM: FILES UPDATED]\n\n{ctx}"
                     if r_idx == MAX_TOOL_ROUNDS: out += "\n\n[SYSTEM: MAX ROUNDS. PROVIDE FINAL ANSWER.]"
                 
                 f_resps.append(types.Part.from_function_response(name=name, response={"output": out}))
@@ -586,6 +607,10 @@ _CHARS_PER_TOKEN = 3.5
 # Anthropic's limit is 200k. We leave headroom for the response + tool schemas.
 _ANTHROPIC_MAX_PROMPT_TOKENS = 180_000
 
+# Gemini models have a 1M context window but we cap well below to leave headroom.
+# If the model reports input tokens exceeding this, we trim old history.
+_GEMINI_MAX_INPUT_TOKENS = 900_000
+
 # Marker prefix used to identify stale file-refresh injections in history
 _FILE_REFRESH_MARKER = "[FILES UPDATED"
 
@@ -830,7 +855,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
                 max_tokens=_max_tokens,
                 temperature=_temperature,
                 system=system_blocks,
-                tools=_build_anthropic_tools(),
+                tools=_get_anthropic_tools(),
                 messages=_anthropic_history,
             )
 
@@ -976,5 +1001,4 @@ def send(
         return _send_gemini(md_content, user_message, base_dir, file_items)
     elif _provider == "anthropic":
         return _send_anthropic(md_content, user_message, base_dir, file_items)
-    raise ValueError(f"unknown provider: {_provider}")
-
+    raise ValueError(f"unknown provider: {_provider}")
\ No newline at end of file
diff --git a/gui.py b/gui.py
index 6f17c85..495923b 100644
--- a/gui.py
+++ b/gui.py
@@ -2119,4 +2119,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file
diff --git a/project_manager.py b/project_manager.py
index 3e46f60..25b196b 100644
--- a/project_manager.py
+++ b/project_manager.py
@@ -154,4 +154,3 @@ def flat_config(proj: dict, disc_name: str | None = None) -> dict:
             "history": disc_data.get("history", []),
         },
     }
-
diff --git a/session_logger.py b/session_logger.py
index c2e9616..5c4b9f6 100644
--- a/session_logger.py
+++ b/session_logger.py
@@ -133,5 +133,3 @@ def log_tool_call(script: str, result: str, script_path: str | None):
         pass
 
     return str(ps1_path) if ps1_path else None
-
-