claude final fix pass
This commit is contained in:
@@ -171,5 +171,3 @@ def main():
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
80
ai_client.py
80
ai_client.py
@@ -217,6 +217,7 @@ def cleanup():
|
|||||||
def reset_session():
|
def reset_session():
|
||||||
global _gemini_client, _gemini_chat, _gemini_cache
|
global _gemini_client, _gemini_chat, _gemini_cache
|
||||||
global _anthropic_client, _anthropic_history
|
global _anthropic_client, _anthropic_history
|
||||||
|
global _CACHED_ANTHROPIC_TOOLS
|
||||||
if _gemini_client and _gemini_cache:
|
if _gemini_client and _gemini_cache:
|
||||||
try:
|
try:
|
||||||
_gemini_client.caches.delete(name=_gemini_cache.name)
|
_gemini_client.caches.delete(name=_gemini_cache.name)
|
||||||
@@ -227,6 +228,7 @@ def reset_session():
|
|||||||
_gemini_cache = None
|
_gemini_cache = None
|
||||||
_anthropic_client = None
|
_anthropic_client = None
|
||||||
_anthropic_history = []
|
_anthropic_history = []
|
||||||
|
_CACHED_ANTHROPIC_TOOLS = None
|
||||||
file_cache.reset_client()
|
file_cache.reset_client()
|
||||||
|
|
||||||
|
|
||||||
@@ -309,6 +311,15 @@ def _build_anthropic_tools() -> list[dict]:
|
|||||||
|
|
||||||
_ANTHROPIC_TOOLS = _build_anthropic_tools()
|
_ANTHROPIC_TOOLS = _build_anthropic_tools()
|
||||||
|
|
||||||
|
_CACHED_ANTHROPIC_TOOLS = None
|
||||||
|
|
||||||
|
def _get_anthropic_tools() -> list[dict]:
|
||||||
|
"""Return the Anthropic tools list, rebuilding only once per session."""
|
||||||
|
global _CACHED_ANTHROPIC_TOOLS
|
||||||
|
if _CACHED_ANTHROPIC_TOOLS is None:
|
||||||
|
_CACHED_ANTHROPIC_TOOLS = _build_anthropic_tools()
|
||||||
|
return _CACHED_ANTHROPIC_TOOLS
|
||||||
|
|
||||||
|
|
||||||
def _gemini_tool_declaration():
|
def _gemini_tool_declaration():
|
||||||
from google.genai import types
|
from google.genai import types
|
||||||
@@ -443,15 +454,13 @@ def _ensure_gemini_client():
|
|||||||
|
|
||||||
|
|
||||||
def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
|
def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
|
||||||
global _gemini_chat
|
global _gemini_chat, _gemini_cache
|
||||||
from google.genai import types
|
from google.genai import types
|
||||||
try:
|
try:
|
||||||
_ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir])
|
_ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir])
|
||||||
sys_instr = f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"
|
sys_instr = f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"
|
||||||
tools_decl = [_gemini_tool_declaration()]
|
tools_decl = [_gemini_tool_declaration()]
|
||||||
|
|
||||||
global _gemini_cache, _gemini_chat
|
|
||||||
|
|
||||||
# DYNAMIC CONTEXT: Check if files/context changed mid-session
|
# DYNAMIC CONTEXT: Check if files/context changed mid-session
|
||||||
current_md_hash = hash(md_content)
|
current_md_hash = hash(md_content)
|
||||||
old_history = None
|
old_history = None
|
||||||
@@ -490,8 +499,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
|
|||||||
)
|
)
|
||||||
_append_comms("OUT", "request", {"message": f"[CACHE CREATED] {_gemini_cache.name}"})
|
_append_comms("OUT", "request", {"message": f"[CACHE CREATED] {_gemini_cache.name}"})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Fallback if under token limit or API error
|
_gemini_cache = None # Ensure clean state on failure
|
||||||
pass
|
|
||||||
|
|
||||||
kwargs = {"model": _model, "config": chat_config}
|
kwargs = {"model": _model, "config": chat_config}
|
||||||
if old_history:
|
if old_history:
|
||||||
@@ -500,8 +508,12 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
|
|||||||
_gemini_chat = _gemini_client.chats.create(**kwargs)
|
_gemini_chat = _gemini_client.chats.create(**kwargs)
|
||||||
_gemini_chat._last_md_hash = current_md_hash
|
_gemini_chat._last_md_hash = current_md_hash
|
||||||
|
|
||||||
# COMPRESS HISTORY: Truncate massive tool outputs from previous turns to stop token leaks
|
_append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
|
||||||
if _gemini_chat and getattr(_gemini_chat, "history", None):
|
payload, all_text = user_message, []
|
||||||
|
|
||||||
|
for r_idx in range(MAX_TOOL_ROUNDS + 2):
|
||||||
|
# Strip stale file refreshes and truncate old tool outputs in Gemini history
|
||||||
|
if _gemini_chat and _gemini_chat.history:
|
||||||
for msg in _gemini_chat.history:
|
for msg in _gemini_chat.history:
|
||||||
if msg.role == "user" and hasattr(msg, "parts"):
|
if msg.role == "user" and hasattr(msg, "parts"):
|
||||||
for p in msg.parts:
|
for p in msg.parts:
|
||||||
@@ -513,25 +525,9 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
|
|||||||
if "[SYSTEM: FILES UPDATED]" in val:
|
if "[SYSTEM: FILES UPDATED]" in val:
|
||||||
val = val.split("[SYSTEM: FILES UPDATED]")[0].strip()
|
val = val.split("[SYSTEM: FILES UPDATED]")[0].strip()
|
||||||
if _history_trunc_limit > 0 and len(val) > _history_trunc_limit:
|
if _history_trunc_limit > 0 and len(val) > _history_trunc_limit:
|
||||||
val = val[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS. Original output was too large.]"
|
val = val[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS.]"
|
||||||
r["output"] = val
|
r["output"] = val
|
||||||
|
|
||||||
_append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
|
|
||||||
payload, all_text = user_message, []
|
|
||||||
|
|
||||||
for r_idx in range(MAX_TOOL_ROUNDS + 2):
|
|
||||||
# Strip stale file refreshes from Gemini history
|
|
||||||
if _gemini_chat and _gemini_chat.history:
|
|
||||||
for msg in _gemini_chat.history:
|
|
||||||
if msg.role == "user" and hasattr(msg, "parts"):
|
|
||||||
for p in msg.parts:
|
|
||||||
if hasattr(p, "function_response") and p.function_response and hasattr(p.function_response, "response"):
|
|
||||||
r = p.function_response.response
|
|
||||||
if isinstance(r, dict) and "output" in r:
|
|
||||||
val = r["output"]
|
|
||||||
if isinstance(val, str) and "[SYSTEM: FILES UPDATED]" in val:
|
|
||||||
r["output"] = val.split("[SYSTEM: FILES UPDATED]")[0].strip()
|
|
||||||
|
|
||||||
resp = _gemini_chat.send_message(payload)
|
resp = _gemini_chat.send_message(payload)
|
||||||
txt = "\n".join(p.text for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "text") and p.text)
|
txt = "\n".join(p.text for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "text") and p.text)
|
||||||
if txt: all_text.append(txt)
|
if txt: all_text.append(txt)
|
||||||
@@ -544,6 +540,29 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
|
|||||||
reason = resp.candidates[0].finish_reason.name if resp.candidates and hasattr(resp.candidates[0], "finish_reason") else "STOP"
|
reason = resp.candidates[0].finish_reason.name if resp.candidates and hasattr(resp.candidates[0], "finish_reason") else "STOP"
|
||||||
|
|
||||||
_append_comms("IN", "response", {"round": r_idx, "stop_reason": reason, "text": txt, "tool_calls": [{"name": c.name, "args": dict(c.args)} for c in calls], "usage": usage})
|
_append_comms("IN", "response", {"round": r_idx, "stop_reason": reason, "text": txt, "tool_calls": [{"name": c.name, "args": dict(c.args)} for c in calls], "usage": usage})
|
||||||
|
|
||||||
|
# Guard: if Gemini reports input tokens approaching the limit, drop oldest history pairs
|
||||||
|
total_in = usage.get("input_tokens", 0)
|
||||||
|
if total_in > _GEMINI_MAX_INPUT_TOKENS and _gemini_chat and _gemini_chat.history:
|
||||||
|
hist = _gemini_chat.history
|
||||||
|
dropped = 0
|
||||||
|
# Drop oldest pairs (user+model) but keep at least the last 2 entries
|
||||||
|
while len(hist) > 4 and total_in > _GEMINI_MAX_INPUT_TOKENS * 0.7:
|
||||||
|
# Rough estimate: each dropped message saves ~(chars/4) tokens
|
||||||
|
saved = 0
|
||||||
|
for p in hist[0].parts:
|
||||||
|
if hasattr(p, "text") and p.text:
|
||||||
|
saved += len(p.text) // 4
|
||||||
|
elif hasattr(p, "function_response") and p.function_response:
|
||||||
|
r = getattr(p.function_response, "response", {})
|
||||||
|
if isinstance(r, dict):
|
||||||
|
saved += len(str(r.get("output", ""))) // 4
|
||||||
|
hist.pop(0)
|
||||||
|
total_in -= max(saved, 100)
|
||||||
|
dropped += 1
|
||||||
|
if dropped > 0:
|
||||||
|
_append_comms("OUT", "request", {"message": f"[GEMINI HISTORY TRIMMED: dropped {dropped} old entries to stay within token budget]"})
|
||||||
|
|
||||||
if not calls or r_idx > MAX_TOOL_ROUNDS: break
|
if not calls or r_idx > MAX_TOOL_ROUNDS: break
|
||||||
|
|
||||||
f_resps, log = [], []
|
f_resps, log = [], []
|
||||||
@@ -560,8 +579,10 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
|
|||||||
|
|
||||||
if i == len(calls) - 1:
|
if i == len(calls) - 1:
|
||||||
if file_items:
|
if file_items:
|
||||||
ctx = _build_file_context_text(_reread_file_items(file_items))
|
file_items = _reread_file_items(file_items)
|
||||||
if ctx: out += f"\n\n[SYSTEM: FILES UPDATED]\n\n{ctx}"
|
ctx = _build_file_context_text(file_items)
|
||||||
|
if ctx:
|
||||||
|
out += f"\n\n[SYSTEM: FILES UPDATED]\n\n{ctx}"
|
||||||
if r_idx == MAX_TOOL_ROUNDS: out += "\n\n[SYSTEM: MAX ROUNDS. PROVIDE FINAL ANSWER.]"
|
if r_idx == MAX_TOOL_ROUNDS: out += "\n\n[SYSTEM: MAX ROUNDS. PROVIDE FINAL ANSWER.]"
|
||||||
|
|
||||||
f_resps.append(types.Part.from_function_response(name=name, response={"output": out}))
|
f_resps.append(types.Part.from_function_response(name=name, response={"output": out}))
|
||||||
@@ -586,6 +607,10 @@ _CHARS_PER_TOKEN = 3.5
|
|||||||
# Anthropic's limit is 200k. We leave headroom for the response + tool schemas.
|
# Anthropic's limit is 200k. We leave headroom for the response + tool schemas.
|
||||||
_ANTHROPIC_MAX_PROMPT_TOKENS = 180_000
|
_ANTHROPIC_MAX_PROMPT_TOKENS = 180_000
|
||||||
|
|
||||||
|
# Gemini models have a 1M context window but we cap well below to leave headroom.
|
||||||
|
# If the model reports input tokens exceeding this, we trim old history.
|
||||||
|
_GEMINI_MAX_INPUT_TOKENS = 900_000
|
||||||
|
|
||||||
# Marker prefix used to identify stale file-refresh injections in history
|
# Marker prefix used to identify stale file-refresh injections in history
|
||||||
_FILE_REFRESH_MARKER = "[FILES UPDATED"
|
_FILE_REFRESH_MARKER = "[FILES UPDATED"
|
||||||
|
|
||||||
@@ -830,7 +855,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
|
|||||||
max_tokens=_max_tokens,
|
max_tokens=_max_tokens,
|
||||||
temperature=_temperature,
|
temperature=_temperature,
|
||||||
system=system_blocks,
|
system=system_blocks,
|
||||||
tools=_build_anthropic_tools(),
|
tools=_get_anthropic_tools(),
|
||||||
messages=_anthropic_history,
|
messages=_anthropic_history,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -977,4 +1002,3 @@ def send(
|
|||||||
elif _provider == "anthropic":
|
elif _provider == "anthropic":
|
||||||
return _send_anthropic(md_content, user_message, base_dir, file_items)
|
return _send_anthropic(md_content, user_message, base_dir, file_items)
|
||||||
raise ValueError(f"unknown provider: {_provider}")
|
raise ValueError(f"unknown provider: {_provider}")
|
||||||
|
|
||||||
|
|||||||
@@ -154,4 +154,3 @@ def flat_config(proj: dict, disc_name: str | None = None) -> dict:
|
|||||||
"history": disc_data.get("history", []),
|
"history": disc_data.get("history", []),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -133,5 +133,3 @@ def log_tool_call(script: str, result: str, script_path: str | None):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
return str(ps1_path) if ps1_path else None
|
return str(ps1_path) if ps1_path else None
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user