Doing final pass of adjustments with anythingllm

This commit is contained in:
2026-02-22 09:54:36 -05:00
parent 34ed257cd6
commit 254ca8cbda
5 changed files with 77 additions and 31 deletions

View File

@@ -23,6 +23,7 @@ _model: str = "gemini-2.0-flash"
_gemini_client = None
_gemini_chat = None
_gemini_cache = None
_anthropic_client = None
_anthropic_history: list[dict] = []
@@ -194,10 +195,16 @@ def set_provider(provider: str, model: str):
def reset_session():
global _gemini_client, _gemini_chat
global _gemini_client, _gemini_chat, _gemini_cache
global _anthropic_client, _anthropic_history
if _gemini_client and _gemini_cache:
try:
_gemini_client.caches.delete(name=_gemini_cache.name)
except Exception:
pass
_gemini_client = None
_gemini_chat = None
_gemini_cache = None
_anthropic_client = None
_anthropic_history = []
file_cache.reset_client()
@@ -421,19 +428,54 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
try:
_ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir])
sys_instr = f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"
tools_decl = [_gemini_tool_declaration()]
global _gemini_cache
if not _gemini_chat:
_gemini_chat = _gemini_client.chats.create(model=_model, config=types.GenerateContentConfig(system_instruction=sys_instr, tools=[_gemini_tool_declaration()]))
chat_config = types.GenerateContentConfig(system_instruction=sys_instr, tools=tools_decl)
try:
# Gemini requires >= 32,768 tokens for caching. We try to cache, and fallback if it fails.
_gemini_cache = _gemini_client.caches.create(
model=_model,
config=types.CreateCachedContentConfig(
system_instruction=sys_instr,
tools=tools_decl,
ttl="3600s",
)
)
chat_config = types.GenerateContentConfig(cached_content=_gemini_cache.name)
_append_comms("OUT", "request", {"message": f"[CACHE CREATED] {_gemini_cache.name}"})
except Exception as e:
# Fallback to standard request if under 32k tokens or cache creation fails
pass
_gemini_chat = _gemini_client.chats.create(model=_model, config=chat_config)
_append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
payload, all_text = user_message, []
for r_idx in range(MAX_TOOL_ROUNDS + 2):
# Strip stale file refreshes from Gemini history
if _gemini_chat and _gemini_chat.history:
for msg in _gemini_chat.history:
if msg.role == "user" and hasattr(msg, "parts"):
for p in msg.parts:
if hasattr(p, "function_response") and p.function_response and hasattr(p.function_response, "response"):
r = p.function_response.response
if isinstance(r, dict) and "output" in r:
val = r["output"]
if isinstance(val, str) and "[SYSTEM: FILES UPDATED]" in val:
r["output"] = val.split("[SYSTEM: FILES UPDATED]")[0].strip()
resp = _gemini_chat.send_message(payload)
txt = "\n".join(p.text for c in resp.candidates for p in c.content.parts if hasattr(p, "text") and p.text)
txt = "\n".join(p.text for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "text") and p.text)
if txt: all_text.append(txt)
calls = [p.function_call for c in resp.candidates for p in c.content.parts if hasattr(p, "function_call") and p.function_call]
calls = [p.function_call for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "function_call") and p.function_call]
usage = {"input_tokens": getattr(resp.usage_metadata, "prompt_token_count", 0), "output_tokens": getattr(resp.usage_metadata, "candidates_token_count", 0)}
cached_tokens = getattr(resp.usage_metadata, "cached_content_token_count", None)
if cached_tokens:
usage["cache_read_input_tokens"] = cached_tokens
reason = resp.candidates[0].finish_reason.name if resp.candidates and hasattr(resp.candidates[0], "finish_reason") else "STOP"
_append_comms("IN", "response", {"round": r_idx, "stop_reason": reason, "text": txt, "tool_calls": [{"name": c.name, "args": dict(c.args)} for c in calls], "usage": usage})
@@ -568,33 +610,28 @@ def _trim_anthropic_history(system_blocks: list[dict], history: list[dict]):
# Phase 2: drop oldest turn pairs until within budget
dropped = 0
while len(history) > 2 and est > _ANTHROPIC_MAX_PROMPT_TOKENS:
# Always drop from the front in pairs (user, assistant) to maintain alternation
# But be careful: the first message might be user, followed by assistant
if history[0].get("role") == "user" and len(history) > 1 and history[1].get("role") == "assistant":
removed_user = history.pop(0)
removed_asst = history.pop(0)
while len(history) > 3 and est > _ANTHROPIC_MAX_PROMPT_TOKENS:
# Protect history[0] (original user prompt). Drop from history[1] (assistant) and history[2] (user)
if history[1].get("role") == "assistant" and len(history) > 2 and history[2].get("role") == "user":
removed_asst = history.pop(1)
removed_user = history.pop(1)
dropped += 2
est -= _estimate_message_tokens(removed_user)
est -= _estimate_message_tokens(removed_asst)
# If the next message is a user tool_result that belonged to the dropped assistant,
# we need to drop it too to avoid dangling tool_results
while history and history[0].get("role") == "user":
content = history[0].get("content", [])
est -= _estimate_message_tokens(removed_user)
# Also drop dangling tool_results if the next message is an assistant and the removed user was just tool results
while len(history) > 2 and history[1].get("role") == "assistant" and history[2].get("role") == "user":
content = history[2].get("content", [])
if isinstance(content, list) and content and isinstance(content[0], dict) and content[0].get("type") == "tool_result":
removed_tr = history.pop(0)
dropped += 1
est -= _estimate_message_tokens(removed_tr)
# And the assistant reply that followed it
if history and history[0].get("role") == "assistant":
removed_a2 = history.pop(0)
dropped += 1
est -= _estimate_message_tokens(removed_a2)
r_a = history.pop(1)
r_u = history.pop(1)
dropped += 2
est -= _estimate_message_tokens(r_a)
est -= _estimate_message_tokens(r_u)
else:
break
else:
# Edge case: history starts with something unexpected. Drop one message.
removed = history.pop(0)
# Edge case fallback: drop index 1 (protecting index 0)
removed = history.pop(1)
dropped += 1
est -= _estimate_message_tokens(removed)