cache improvement (gemini)

This commit is contained in:
2026-02-22 10:09:19 -05:00
parent 7d294da942
commit 4a6721c3be
2 changed files with 40 additions and 8 deletions

View File

@@ -430,11 +430,24 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
sys_instr = f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"
tools_decl = [_gemini_tool_declaration()]
global _gemini_cache
global _gemini_cache, _gemini_chat
# DYNAMIC CONTEXT: Check if files/context changed mid-session
current_md_hash = hash(md_content)
old_history = None
if _gemini_chat and getattr(_gemini_chat, "_last_md_hash", None) != current_md_hash:
old_history = list(_gemini_chat.history) if _gemini_chat.history else []
if _gemini_cache:
try: _gemini_client.caches.delete(name=_gemini_cache.name)
except: pass
_gemini_chat = None
_gemini_cache = None
_append_comms("OUT", "request", {"message": "[CONTEXT CHANGED] Rebuilding cache and chat session..."})
if not _gemini_chat:
chat_config = types.GenerateContentConfig(system_instruction=sys_instr, tools=tools_decl)
try:
# Gemini requires >= 32,768 tokens for caching. We try to cache, and fallback if it fails.
# Gemini requires 1024 (Flash) or 4096 (Pro) tokens to cache.
_gemini_cache = _gemini_client.caches.create(
model=_model,
config=types.CreateCachedContentConfig(
@@ -446,10 +459,15 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
chat_config = types.GenerateContentConfig(cached_content=_gemini_cache.name)
_append_comms("OUT", "request", {"message": f"[CACHE CREATED] {_gemini_cache.name}"})
except Exception as e:
# Fallback to standard request if under 32k tokens or cache creation fails
# Fallback if under token limit or API error
pass
_gemini_chat = _gemini_client.chats.create(model=_model, config=chat_config)
kwargs = {"model": _model, "config": chat_config}
if old_history:
kwargs["history"] = old_history
_gemini_chat = _gemini_client.chats.create(**kwargs)
_gemini_chat._last_md_hash = current_md_hash
_append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
payload, all_text = user_message, []