From 4755f4b59068bf3c40bbe2751b117e24da72f46d Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sun, 22 Feb 2026 11:28:18 -0500 Subject: [PATCH] claude final fix pass --- aggregate.py | 2 -- ai_client.py | 82 ++++++++++++++++++++++++++++++---------------- gui.py | 2 +- project_manager.py | 1 - session_logger.py | 2 -- 5 files changed, 54 insertions(+), 35 deletions(-) diff --git a/aggregate.py b/aggregate.py index 5cf6879..304ebc8 100644 --- a/aggregate.py +++ b/aggregate.py @@ -171,5 +171,3 @@ def main(): if __name__ == "__main__": main() - - diff --git a/ai_client.py b/ai_client.py index 74570ab..380006a 100644 --- a/ai_client.py +++ b/ai_client.py @@ -217,6 +217,7 @@ def cleanup(): def reset_session(): global _gemini_client, _gemini_chat, _gemini_cache global _anthropic_client, _anthropic_history + global _CACHED_ANTHROPIC_TOOLS if _gemini_client and _gemini_cache: try: _gemini_client.caches.delete(name=_gemini_cache.name) @@ -227,6 +228,7 @@ def reset_session(): _gemini_cache = None _anthropic_client = None _anthropic_history = [] + _CACHED_ANTHROPIC_TOOLS = None file_cache.reset_client() @@ -309,6 +311,15 @@ def _build_anthropic_tools() -> list[dict]: _ANTHROPIC_TOOLS = _build_anthropic_tools() +_CACHED_ANTHROPIC_TOOLS = None + +def _get_anthropic_tools() -> list[dict]: + """Return the Anthropic tools list, rebuilding only once per session.""" + global _CACHED_ANTHROPIC_TOOLS + if _CACHED_ANTHROPIC_TOOLS is None: + _CACHED_ANTHROPIC_TOOLS = _build_anthropic_tools() + return _CACHED_ANTHROPIC_TOOLS + def _gemini_tool_declaration(): from google.genai import types @@ -443,15 +454,13 @@ def _ensure_gemini_client(): def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str: - global _gemini_chat + global _gemini_chat, _gemini_cache from google.genai import types try: _ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir]) sys_instr = f"{_get_combined_system_prompt()}\n\n\n{md_content}\n" tools_decl = [_gemini_tool_declaration()] - global _gemini_cache, _gemini_chat - # DYNAMIC CONTEXT: Check if files/context changed mid-session current_md_hash = hash(md_content) old_history = None @@ -490,8 +499,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: ) _append_comms("OUT", "request", {"message": f"[CACHE CREATED] {_gemini_cache.name}"}) except Exception as e: - # Fallback if under token limit or API error - pass + _gemini_cache = None # Ensure clean state on failure kwargs = {"model": _model, "config": chat_config} if old_history: @@ -500,27 +508,11 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: _gemini_chat = _gemini_client.chats.create(**kwargs) _gemini_chat._last_md_hash = current_md_hash - # COMPRESS HISTORY: Truncate massive tool outputs from previous turns to stop token leaks - if _gemini_chat and getattr(_gemini_chat, "history", None): - for msg in _gemini_chat.history: - if msg.role == "user" and hasattr(msg, "parts"): - for p in msg.parts: - if hasattr(p, "function_response") and p.function_response and hasattr(p.function_response, "response"): - r = p.function_response.response - if isinstance(r, dict) and "output" in r: - val = r["output"] - if isinstance(val, str): - if "[SYSTEM: FILES UPDATED]" in val: - val = val.split("[SYSTEM: FILES UPDATED]")[0].strip() - if _history_trunc_limit > 0 and len(val) > _history_trunc_limit: - val = val[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS. Original output was too large.]" - r["output"] = val - _append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"}) payload, all_text = user_message, [] for r_idx in range(MAX_TOOL_ROUNDS + 2): - # Strip stale file refreshes from Gemini history + # Strip stale file refreshes and truncate old tool outputs in Gemini history if _gemini_chat and _gemini_chat.history: for msg in _gemini_chat.history: if msg.role == "user" and hasattr(msg, "parts"): @@ -529,8 +521,12 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: r = p.function_response.response if isinstance(r, dict) and "output" in r: val = r["output"] - if isinstance(val, str) and "[SYSTEM: FILES UPDATED]" in val: - r["output"] = val.split("[SYSTEM: FILES UPDATED]")[0].strip() + if isinstance(val, str): + if "[SYSTEM: FILES UPDATED]" in val: + val = val.split("[SYSTEM: FILES UPDATED]")[0].strip() + if _history_trunc_limit > 0 and len(val) > _history_trunc_limit: + val = val[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS.]" + r["output"] = val resp = _gemini_chat.send_message(payload) txt = "\n".join(p.text for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "text") and p.text) @@ -544,6 +540,29 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: reason = resp.candidates[0].finish_reason.name if resp.candidates and hasattr(resp.candidates[0], "finish_reason") else "STOP" _append_comms("IN", "response", {"round": r_idx, "stop_reason": reason, "text": txt, "tool_calls": [{"name": c.name, "args": dict(c.args)} for c in calls], "usage": usage}) + + # Guard: if Gemini reports input tokens approaching the limit, drop oldest history pairs + total_in = usage.get("input_tokens", 0) + if total_in > _GEMINI_MAX_INPUT_TOKENS and _gemini_chat and _gemini_chat.history: + hist = _gemini_chat.history + dropped = 0 + # Drop oldest pairs (user+model) but keep at least the last 2 entries + while len(hist) > 4 and total_in > _GEMINI_MAX_INPUT_TOKENS * 0.7: + # Rough estimate: each dropped message saves ~(chars/4) tokens + saved = 0 + for p in hist[0].parts: + if hasattr(p, "text") and p.text: + saved += len(p.text) // 4 + elif hasattr(p, "function_response") and p.function_response: + r = getattr(p.function_response, "response", {}) + if isinstance(r, dict): + saved += len(str(r.get("output", ""))) // 4 + hist.pop(0) + total_in -= max(saved, 100) + dropped += 1 + if dropped > 0: + _append_comms("OUT", "request", {"message": f"[GEMINI HISTORY TRIMMED: dropped {dropped} old entries to stay within token budget]"}) + if not calls or r_idx > MAX_TOOL_ROUNDS: break f_resps, log = [], [] @@ -560,8 +579,10 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: if i == len(calls) - 1: if file_items: - ctx = _build_file_context_text(_reread_file_items(file_items)) - if ctx: out += f"\n\n[SYSTEM: FILES UPDATED]\n\n{ctx}" + file_items = _reread_file_items(file_items) + ctx = _build_file_context_text(file_items) + if ctx: + out += f"\n\n[SYSTEM: FILES UPDATED]\n\n{ctx}" if r_idx == MAX_TOOL_ROUNDS: out += "\n\n[SYSTEM: MAX ROUNDS. PROVIDE FINAL ANSWER.]" f_resps.append(types.Part.from_function_response(name=name, response={"output": out})) @@ -586,6 +607,10 @@ _CHARS_PER_TOKEN = 3.5 # Anthropic's limit is 200k. We leave headroom for the response + tool schemas. _ANTHROPIC_MAX_PROMPT_TOKENS = 180_000 +# Gemini models have a 1M context window but we cap well below to leave headroom. +# If the model reports input tokens exceeding this, we trim old history. +_GEMINI_MAX_INPUT_TOKENS = 900_000 + # Marker prefix used to identify stale file-refresh injections in history _FILE_REFRESH_MARKER = "[FILES UPDATED" @@ -830,7 +855,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item max_tokens=_max_tokens, temperature=_temperature, system=system_blocks, - tools=_build_anthropic_tools(), + tools=_get_anthropic_tools(), messages=_anthropic_history, ) @@ -976,5 +1001,4 @@ def send( return _send_gemini(md_content, user_message, base_dir, file_items) elif _provider == "anthropic": return _send_anthropic(md_content, user_message, base_dir, file_items) - raise ValueError(f"unknown provider: {_provider}") - + raise ValueError(f"unknown provider: {_provider}") \ No newline at end of file diff --git a/gui.py b/gui.py index 6f17c85..495923b 100644 --- a/gui.py +++ b/gui.py @@ -2119,4 +2119,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/project_manager.py b/project_manager.py index 3e46f60..25b196b 100644 --- a/project_manager.py +++ b/project_manager.py @@ -154,4 +154,3 @@ def flat_config(proj: dict, disc_name: str | None = None) -> dict: "history": disc_data.get("history", []), }, } - diff --git a/session_logger.py b/session_logger.py index c2e9616..5c4b9f6 100644 --- a/session_logger.py +++ b/session_logger.py @@ -133,5 +133,3 @@ def log_tool_call(script: str, result: str, script_path: str | None): pass return str(ps1_path) if ps1_path else None - -