Port missing features to gui_2 and optimize caching
- Port 10 missing features from gui.py to gui_2.py: performance
diagnostics, prior session log viewing, token budget visualization,
agent tools config, API hooks server, GUI task queue, discussion
truncation, THINKING/LIVE indicators, event subscriptions, and
session usage tracking
- Persist window visibility state in config.toml
- Fix Gemini cache invalidation by separating discussion history
from cached context (use MD5 hash instead of built-in hash)
- Add cost optimizations: tool output truncation at source, proactive
history trimming at 40%, summary_only support in aggregate.run()
- Add cleanup() for destroying API caches on exit
This commit is contained in:
110
ai_client.py
110
ai_client.py
@@ -15,6 +15,8 @@ import tomllib
|
||||
import json
|
||||
import time
|
||||
import datetime
|
||||
import hashlib
|
||||
import difflib
|
||||
from pathlib import Path
|
||||
import file_cache
|
||||
import mcp_client
|
||||
@@ -435,6 +437,13 @@ def _run_script(script: str, base_dir: str) -> str:
|
||||
return output
|
||||
|
||||
|
||||
def _truncate_tool_output(output: str) -> str:
|
||||
"""Truncate tool output to _history_trunc_limit chars before sending to API."""
|
||||
if _history_trunc_limit > 0 and len(output) > _history_trunc_limit:
|
||||
return output[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS.]"
|
||||
return output
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ dynamic file context refresh
|
||||
|
||||
def _reread_file_items(file_items: list[dict]) -> tuple[list[dict], list[dict]]:
|
||||
@@ -460,7 +469,7 @@ def _reread_file_items(file_items: list[dict]) -> tuple[list[dict], list[dict]]:
|
||||
refreshed.append(item) # unchanged — skip re-read
|
||||
continue
|
||||
content = p.read_text(encoding="utf-8")
|
||||
new_item = {**item, "content": content, "error": False, "mtime": current_mtime}
|
||||
new_item = {**item, "old_content": item.get("content", ""), "content": content, "error": False, "mtime": current_mtime}
|
||||
refreshed.append(new_item)
|
||||
changed.append(new_item)
|
||||
except Exception as e:
|
||||
@@ -486,6 +495,35 @@ def _build_file_context_text(file_items: list[dict]) -> str:
|
||||
return "\n\n---\n\n".join(parts)
|
||||
|
||||
|
||||
_DIFF_LINE_THRESHOLD = 200
|
||||
|
||||
def _build_file_diff_text(changed_items: list[dict]) -> str:
|
||||
"""
|
||||
Build text for changed files. Small files (<= _DIFF_LINE_THRESHOLD lines)
|
||||
get full content; large files get a unified diff against old_content.
|
||||
"""
|
||||
if not changed_items:
|
||||
return ""
|
||||
parts = []
|
||||
for item in changed_items:
|
||||
path = item.get("path") or item.get("entry", "unknown")
|
||||
content = item.get("content", "")
|
||||
old_content = item.get("old_content", "")
|
||||
new_lines = content.splitlines(keepends=True)
|
||||
if len(new_lines) <= _DIFF_LINE_THRESHOLD or not old_content:
|
||||
suffix = str(path).rsplit(".", 1)[-1] if "." in str(path) else "text"
|
||||
parts.append(f"### `{path}` (full)\n\n```{suffix}\n{content}\n```")
|
||||
else:
|
||||
old_lines = old_content.splitlines(keepends=True)
|
||||
diff = difflib.unified_diff(old_lines, new_lines, fromfile=str(path), tofile=str(path), lineterm="")
|
||||
diff_text = "\n".join(diff)
|
||||
if diff_text:
|
||||
parts.append(f"### `{path}` (diff)\n\n```diff\n{diff_text}\n```")
|
||||
else:
|
||||
parts.append(f"### `{path}` (no changes detected)")
|
||||
return "\n\n---\n\n".join(parts)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ content block serialisation
|
||||
|
||||
def _content_block_to_dict(block) -> dict:
|
||||
@@ -530,22 +568,26 @@ def _get_gemini_history_list(chat):
|
||||
return chat.get_history()
|
||||
return []
|
||||
|
||||
def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
|
||||
def _send_gemini(md_content: str, user_message: str, base_dir: str,
|
||||
file_items: list[dict] | None = None,
|
||||
discussion_history: str = "") -> str:
|
||||
global _gemini_chat, _gemini_cache, _gemini_cache_md_hash, _gemini_cache_created_at
|
||||
|
||||
try:
|
||||
_ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir])
|
||||
# Only stable content (files + screenshots) goes in the cached system instruction.
|
||||
# Discussion history is sent as conversation messages so the cache isn't invalidated every turn.
|
||||
sys_instr = f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"
|
||||
tools_decl = [_gemini_tool_declaration()]
|
||||
|
||||
# DYNAMIC CONTEXT: Check if files/context changed mid-session
|
||||
current_md_hash = hash(md_content)
|
||||
current_md_hash = hashlib.md5(md_content.encode()).hexdigest()
|
||||
old_history = None
|
||||
if _gemini_chat and _gemini_cache_md_hash != current_md_hash:
|
||||
old_history = list(_get_gemini_history_list(_gemini_chat)) if _get_gemini_history_list(_gemini_chat) else []
|
||||
if _gemini_cache:
|
||||
try: _gemini_client.caches.delete(name=_gemini_cache.name)
|
||||
except: pass
|
||||
except Exception as e: _append_comms("OUT", "request", {"message": f"[CACHE DELETE WARN] {e}"})
|
||||
_gemini_chat = None
|
||||
_gemini_cache = None
|
||||
_gemini_cache_created_at = None
|
||||
@@ -558,7 +600,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
|
||||
if elapsed > _GEMINI_CACHE_TTL * 0.9:
|
||||
old_history = list(_get_gemini_history_list(_gemini_chat)) if _get_gemini_history_list(_gemini_chat) else []
|
||||
try: _gemini_client.caches.delete(name=_gemini_cache.name)
|
||||
except: pass
|
||||
except Exception as e: _append_comms("OUT", "request", {"message": f"[CACHE DELETE WARN] {e}"})
|
||||
_gemini_chat = None
|
||||
_gemini_cache = None
|
||||
_gemini_cache_created_at = None
|
||||
@@ -601,7 +643,13 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
|
||||
|
||||
_gemini_chat = _gemini_client.chats.create(**kwargs)
|
||||
_gemini_cache_md_hash = current_md_hash
|
||||
|
||||
|
||||
# Inject discussion history as a user message on first chat creation
|
||||
# (only when there's no old_history being restored, i.e., fresh session)
|
||||
if discussion_history and not old_history:
|
||||
_gemini_chat.send_message(f"[DISCUSSION HISTORY]\n\n{discussion_history}")
|
||||
_append_comms("OUT", "request", {"message": f"[HISTORY INJECTED] {len(discussion_history)} chars"})
|
||||
|
||||
_append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
|
||||
payload, all_text = user_message, []
|
||||
|
||||
@@ -634,26 +682,19 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
|
||||
if cached_tokens:
|
||||
usage["cache_read_input_tokens"] = cached_tokens
|
||||
|
||||
# Fetch cache stats in the background thread to avoid blocking GUI
|
||||
cache_stats = None
|
||||
try:
|
||||
cache_stats = get_gemini_cache_stats()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
events.emit("response_received", payload={"provider": "gemini", "model": _model, "usage": usage, "round": r_idx, "cache_stats": cache_stats})
|
||||
events.emit("response_received", payload={"provider": "gemini", "model": _model, "usage": usage, "round": r_idx})
|
||||
|
||||
reason = resp.candidates[0].finish_reason.name if resp.candidates and hasattr(resp.candidates[0], "finish_reason") else "STOP"
|
||||
|
||||
_append_comms("IN", "response", {"round": r_idx, "stop_reason": reason, "text": txt, "tool_calls": [{"name": c.name, "args": dict(c.args)} for c in calls], "usage": usage})
|
||||
|
||||
# Guard: if Gemini reports input tokens approaching the limit, drop oldest history pairs
|
||||
# Guard: proactively trim history when input tokens exceed 40% of limit
|
||||
total_in = usage.get("input_tokens", 0)
|
||||
if total_in > _GEMINI_MAX_INPUT_TOKENS and _gemini_chat and _get_gemini_history_list(_gemini_chat):
|
||||
if total_in > _GEMINI_MAX_INPUT_TOKENS * 0.4 and _gemini_chat and _get_gemini_history_list(_gemini_chat):
|
||||
hist = _get_gemini_history_list(_gemini_chat)
|
||||
dropped = 0
|
||||
# Drop oldest pairs (user+model) but keep at least the last 2 entries
|
||||
while len(hist) > 4 and total_in > _GEMINI_MAX_INPUT_TOKENS * 0.7:
|
||||
while len(hist) > 4 and total_in > _GEMINI_MAX_INPUT_TOKENS * 0.3:
|
||||
# Drop in pairs (user + model) to maintain alternating roles required by Gemini
|
||||
saved = 0
|
||||
for _ in range(2):
|
||||
@@ -689,11 +730,12 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
|
||||
if i == len(calls) - 1:
|
||||
if file_items:
|
||||
file_items, changed = _reread_file_items(file_items)
|
||||
ctx = _build_file_context_text(changed)
|
||||
ctx = _build_file_diff_text(changed)
|
||||
if ctx:
|
||||
out += f"\n\n[SYSTEM: FILES UPDATED]\n\n{ctx}"
|
||||
if r_idx == MAX_TOOL_ROUNDS: out += "\n\n[SYSTEM: MAX ROUNDS. PROVIDE FINAL ANSWER.]"
|
||||
|
||||
out = _truncate_tool_output(out)
|
||||
f_resps.append(types.Part.from_function_response(name=name, response={"output": out}))
|
||||
log.append({"tool_use_id": name, "content": out})
|
||||
events.emit("tool_execution", payload={"status": "completed", "tool": name, "result": out, "round": r_idx})
|
||||
@@ -955,7 +997,7 @@ def _repair_anthropic_history(history: list[dict]):
|
||||
})
|
||||
|
||||
|
||||
def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
|
||||
def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None, discussion_history: str = "") -> str:
|
||||
try:
|
||||
_ensure_anthropic_client()
|
||||
mcp_client.configure(file_items or [], [base_dir])
|
||||
@@ -969,7 +1011,11 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
|
||||
context_blocks = _build_chunked_context_blocks(context_text)
|
||||
system_blocks = stable_blocks + context_blocks
|
||||
|
||||
user_content = [{"type": "text", "text": user_message}]
|
||||
# Prepend discussion history to the first user message if this is a fresh session
|
||||
if discussion_history and not _anthropic_history:
|
||||
user_content = [{"type": "text", "text": f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"}]
|
||||
else:
|
||||
user_content = [{"type": "text", "text": user_message}]
|
||||
|
||||
# COMPRESS HISTORY: Truncate massive tool outputs from previous turns
|
||||
for msg in _anthropic_history:
|
||||
@@ -1089,7 +1135,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
|
||||
tool_results.append({
|
||||
"type": "tool_result",
|
||||
"tool_use_id": b_id,
|
||||
"content": output,
|
||||
"content": _truncate_tool_output(output),
|
||||
})
|
||||
events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})
|
||||
elif b_name == TOOL_NAME:
|
||||
@@ -1108,14 +1154,14 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
|
||||
tool_results.append({
|
||||
"type": "tool_result",
|
||||
"tool_use_id": b_id,
|
||||
"content": output,
|
||||
"content": _truncate_tool_output(output),
|
||||
})
|
||||
events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})
|
||||
|
||||
# Refresh file context after tool calls — only inject CHANGED files
|
||||
if file_items:
|
||||
file_items, changed = _reread_file_items(file_items)
|
||||
refreshed_ctx = _build_file_context_text(changed)
|
||||
refreshed_ctx = _build_file_diff_text(changed)
|
||||
if refreshed_ctx:
|
||||
tool_results.append({
|
||||
"type": "text",
|
||||
@@ -1160,20 +1206,24 @@ def send(
|
||||
user_message: str,
|
||||
base_dir: str = ".",
|
||||
file_items: list[dict] | None = None,
|
||||
discussion_history: str = "",
|
||||
) -> str:
|
||||
"""
|
||||
Send a message to the active provider.
|
||||
|
||||
md_content : aggregated markdown string from aggregate.run()
|
||||
user_message: the user question / instruction
|
||||
base_dir : project base directory (for PowerShell tool calls)
|
||||
file_items : list of file dicts from aggregate.build_file_items() for
|
||||
dynamic context refresh after tool calls
|
||||
md_content : aggregated markdown string (for Gemini: stable content only,
|
||||
for Anthropic: full content including history)
|
||||
user_message : the user question / instruction
|
||||
base_dir : project base directory (for PowerShell tool calls)
|
||||
file_items : list of file dicts from aggregate.build_file_items() for
|
||||
dynamic context refresh after tool calls
|
||||
discussion_history : discussion history text (used by Gemini to inject as
|
||||
conversation message instead of caching it)
|
||||
"""
|
||||
if _provider == "gemini":
|
||||
return _send_gemini(md_content, user_message, base_dir, file_items)
|
||||
return _send_gemini(md_content, user_message, base_dir, file_items, discussion_history)
|
||||
elif _provider == "anthropic":
|
||||
return _send_anthropic(md_content, user_message, base_dir, file_items)
|
||||
return _send_anthropic(md_content, user_message, base_dir, file_items, discussion_history)
|
||||
raise ValueError(f"unknown provider: {_provider}")
|
||||
|
||||
def get_history_bleed_stats() -> dict:
|
||||
|
||||
Reference in New Issue
Block a user