gemini "fixes"
This commit is contained in:
26
aggregate.py
26
aggregate.py
@@ -126,9 +126,8 @@ def build_summary_section(base_dir: Path, files: list[str]) -> str:
|
|||||||
items = build_file_items(base_dir, files)
|
items = build_file_items(base_dir, files)
|
||||||
return summarize.build_summary_markdown(items)
|
return summarize.build_summary_markdown(items)
|
||||||
|
|
||||||
def build_markdown(base_dir: Path, files: list[str], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
|
def build_static_markdown(base_dir: Path, files: list[str], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str:
|
||||||
parts = []
|
parts = []
|
||||||
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
|
|
||||||
if files:
|
if files:
|
||||||
if summary_only:
|
if summary_only:
|
||||||
parts.append("## Files (Summary)\n\n" + build_summary_section(base_dir, files))
|
parts.append("## Files (Summary)\n\n" + build_summary_section(base_dir, files))
|
||||||
@@ -136,12 +135,12 @@ def build_markdown(base_dir: Path, files: list[str], screenshot_base_dir: Path,
|
|||||||
parts.append("## Files\n\n" + build_files_section(base_dir, files))
|
parts.append("## Files\n\n" + build_files_section(base_dir, files))
|
||||||
if screenshots:
|
if screenshots:
|
||||||
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
|
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
|
||||||
# DYNAMIC SUFFIX: History changes every turn, must go last
|
return "\n\n---\n\n".join(parts) if parts else ""
|
||||||
if history:
|
|
||||||
parts.append("## Discussion History\n\n" + build_discussion_section(history))
|
|
||||||
return "\n\n---\n\n".join(parts)
|
|
||||||
|
|
||||||
def run(config: dict) -> tuple[str, Path]:
|
def build_dynamic_markdown(history: list[str]) -> str:
|
||||||
|
return "## Discussion History\n\n" + build_discussion_section(history) if history else ""
|
||||||
|
|
||||||
|
def run(config: dict) -> tuple[str, str, Path, list[dict]]:
|
||||||
namespace = config.get("project", {}).get("name")
|
namespace = config.get("project", {}).get("name")
|
||||||
if not namespace:
|
if not namespace:
|
||||||
namespace = config.get("output", {}).get("namespace", "project")
|
namespace = config.get("output", {}).get("namespace", "project")
|
||||||
@@ -155,18 +154,21 @@ def run(config: dict) -> tuple[str, Path]:
|
|||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
increment = find_next_increment(output_dir, namespace)
|
increment = find_next_increment(output_dir, namespace)
|
||||||
output_file = output_dir / f"{namespace}_{increment:03d}.md"
|
output_file = output_dir / f"{namespace}_{increment:03d}.md"
|
||||||
# Provide full files to trigger Gemini's 32k cache threshold and give the AI immediate context
|
|
||||||
markdown = build_markdown(base_dir, files, screenshot_base_dir, screenshots, history,
|
static_md = build_static_markdown(base_dir, files, screenshot_base_dir, screenshots, summary_only=False)
|
||||||
summary_only=False)
|
dynamic_md = build_dynamic_markdown(history)
|
||||||
|
|
||||||
|
markdown = f"{static_md}\n\n---\n\n{dynamic_md}" if static_md and dynamic_md else static_md or dynamic_md
|
||||||
output_file.write_text(markdown, encoding="utf-8")
|
output_file.write_text(markdown, encoding="utf-8")
|
||||||
|
|
||||||
file_items = build_file_items(base_dir, files)
|
file_items = build_file_items(base_dir, files)
|
||||||
return markdown, output_file, file_items
|
return static_md, dynamic_md, output_file, file_items
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
with open("config.toml", "rb") as f:
|
with open("config.toml", "rb") as f:
|
||||||
import tomllib
|
import tomllib
|
||||||
config = tomllib.load(f)
|
config = tomllib.load(f)
|
||||||
markdown, output_file, _ = run(config)
|
static_md, dynamic_md, output_file, _ = run(config)
|
||||||
print(f"Written: {output_file}")
|
print(f"Written: {output_file}")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
333
ai_client.py
333
ai_client.py
@@ -453,81 +453,67 @@ def _ensure_gemini_client():
|
|||||||
_gemini_client = genai.Client(api_key=creds["gemini"]["api_key"])
|
_gemini_client = genai.Client(api_key=creds["gemini"]["api_key"])
|
||||||
|
|
||||||
|
|
||||||
def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
|
def _send_gemini(static_md: str, dynamic_md: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
|
||||||
global _gemini_chat, _gemini_cache
|
global _gemini_chat, _gemini_cache
|
||||||
from google.genai import types
|
from google.genai import types
|
||||||
try:
|
try:
|
||||||
_ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir])
|
_ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir])
|
||||||
sys_instr = f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"
|
sys_instr = f"{_get_combined_system_prompt()}\n\n<context>\n{static_md}\n</context>"
|
||||||
tools_decl = [_gemini_tool_declaration()]
|
tools_decl = [_gemini_tool_declaration()]
|
||||||
|
|
||||||
# DYNAMIC CONTEXT: Check if files/context changed mid-session
|
current_md_hash = hash(static_md)
|
||||||
current_md_hash = hash(md_content)
|
|
||||||
old_history = None
|
old_history = None
|
||||||
if _gemini_chat and getattr(_gemini_chat, "_last_md_hash", None) != current_md_hash:
|
if _gemini_chat and getattr(_gemini_chat, "_last_md_hash", None) != current_md_hash:
|
||||||
old_history = list(_gemini_chat.history) if _gemini_chat.history else []
|
old_history = list(_gemini_chat.history) if _gemini_chat.history else []
|
||||||
if _gemini_cache:
|
if _gemini_cache:
|
||||||
try: _gemini_client.caches.delete(name=_gemini_cache.name)
|
try: _gemini_client.caches.delete(name=_gemini_cache.name)
|
||||||
except: pass
|
except: pass
|
||||||
_gemini_chat = None
|
_gemini_chat, _gemini_cache = None, None
|
||||||
_gemini_cache = None
|
_append_comms("OUT", "request", {"message": "[STATIC CONTEXT CHANGED] Rebuilding cache and chat session..."})
|
||||||
_append_comms("OUT", "request", {"message": "[CONTEXT CHANGED] Rebuilding cache and chat session..."})
|
|
||||||
|
|
||||||
if not _gemini_chat:
|
if not _gemini_chat:
|
||||||
chat_config = types.GenerateContentConfig(
|
chat_config = types.GenerateContentConfig(
|
||||||
system_instruction=sys_instr,
|
system_instruction=sys_instr, tools=tools_decl, temperature=_temperature, max_output_tokens=_max_tokens,
|
||||||
tools=tools_decl,
|
|
||||||
temperature=_temperature,
|
|
||||||
max_output_tokens=_max_tokens,
|
|
||||||
safety_settings=[types.SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_ONLY_HIGH")]
|
safety_settings=[types.SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_ONLY_HIGH")]
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
# Gemini requires 1024 (Flash) or 4096 (Pro) tokens to cache.
|
_gemini_cache = _gemini_client.caches.create(model=_model, config=types.CreateCachedContentConfig(system_instruction=sys_instr, tools=tools_decl, ttl="3600s"))
|
||||||
_gemini_cache = _gemini_client.caches.create(
|
|
||||||
model=_model,
|
|
||||||
config=types.CreateCachedContentConfig(
|
|
||||||
system_instruction=sys_instr,
|
|
||||||
tools=tools_decl,
|
|
||||||
ttl="3600s",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
chat_config = types.GenerateContentConfig(
|
chat_config = types.GenerateContentConfig(
|
||||||
cached_content=_gemini_cache.name,
|
cached_content=_gemini_cache.name, temperature=_temperature, max_output_tokens=_max_tokens,
|
||||||
temperature=_temperature,
|
|
||||||
max_output_tokens=_max_tokens,
|
|
||||||
safety_settings=[types.SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_ONLY_HIGH")]
|
safety_settings=[types.SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_ONLY_HIGH")]
|
||||||
)
|
)
|
||||||
_append_comms("OUT", "request", {"message": f"[CACHE CREATED] {_gemini_cache.name}"})
|
_append_comms("OUT", "request", {"message": f"[CACHE CREATED] {_gemini_cache.name}"})
|
||||||
except Exception as e:
|
except Exception: _gemini_cache = None
|
||||||
_gemini_cache = None # Ensure clean state on failure
|
|
||||||
|
|
||||||
kwargs = {"model": _model, "config": chat_config}
|
kwargs = {"model": _model, "config": chat_config}
|
||||||
if old_history:
|
if old_history: kwargs["history"] = old_history
|
||||||
kwargs["history"] = old_history
|
|
||||||
|
|
||||||
_gemini_chat = _gemini_client.chats.create(**kwargs)
|
_gemini_chat = _gemini_client.chats.create(**kwargs)
|
||||||
_gemini_chat._last_md_hash = current_md_hash
|
_gemini_chat._last_md_hash = current_md_hash
|
||||||
|
|
||||||
|
import re
|
||||||
|
if _gemini_chat and _gemini_chat.history:
|
||||||
|
for msg in _gemini_chat.history:
|
||||||
|
if msg.role == "user" and hasattr(msg, "parts"):
|
||||||
|
for p in msg.parts:
|
||||||
|
if hasattr(p, "text") and p.text and "<discussion>" in p.text:
|
||||||
|
p.text = re.sub(r"<discussion>.*?</discussion>\n\n", "", p.text, flags=re.DOTALL)
|
||||||
|
if hasattr(p, "function_response") and p.function_response and hasattr(p.function_response, "response"):
|
||||||
|
r = p.function_response.response
|
||||||
|
r_dict = r if isinstance(r, dict) else getattr(r, "__dict__", {})
|
||||||
|
val = r_dict.get("output") if isinstance(r_dict, dict) else getattr(r, "output", None)
|
||||||
|
if isinstance(val, str):
|
||||||
|
if "[SYSTEM: FILES UPDATED]" in val: val = val.split("[SYSTEM: FILES UPDATED]")[0].strip()
|
||||||
|
if _history_trunc_limit > 0 and len(val) > _history_trunc_limit:
|
||||||
|
val = val[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS.]"
|
||||||
|
if isinstance(r, dict): r["output"] = val
|
||||||
|
else: setattr(r, "output", val)
|
||||||
|
|
||||||
|
full_user_msg = f"<discussion>\n{dynamic_md}\n</discussion>\n\n{user_message}" if dynamic_md else user_message
|
||||||
|
_append_comms("OUT", "request", {"message": f"[ctx {len(static_md)} static + {len(dynamic_md)} dynamic + msg {len(user_message)}]"})
|
||||||
|
|
||||||
_append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
|
payload, all_text = full_user_msg, []
|
||||||
payload, all_text = user_message, []
|
|
||||||
|
|
||||||
for r_idx in range(MAX_TOOL_ROUNDS + 2):
|
for r_idx in range(MAX_TOOL_ROUNDS + 2):
|
||||||
# Strip stale file refreshes and truncate old tool outputs in Gemini history
|
|
||||||
if _gemini_chat and _gemini_chat.history:
|
|
||||||
for msg in _gemini_chat.history:
|
|
||||||
if msg.role == "user" and hasattr(msg, "parts"):
|
|
||||||
for p in msg.parts:
|
|
||||||
if hasattr(p, "function_response") and p.function_response and hasattr(p.function_response, "response"):
|
|
||||||
r = p.function_response.response
|
|
||||||
if isinstance(r, dict) and "output" in r:
|
|
||||||
val = r["output"]
|
|
||||||
if isinstance(val, str):
|
|
||||||
if "[SYSTEM: FILES UPDATED]" in val:
|
|
||||||
val = val.split("[SYSTEM: FILES UPDATED]")[0].strip()
|
|
||||||
if _history_trunc_limit > 0 and len(val) > _history_trunc_limit:
|
|
||||||
val = val[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS.]"
|
|
||||||
r["output"] = val
|
|
||||||
|
|
||||||
resp = _gemini_chat.send_message(payload)
|
resp = _gemini_chat.send_message(payload)
|
||||||
txt = "\n".join(p.text for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "text") and p.text)
|
txt = "\n".join(p.text for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "text") and p.text)
|
||||||
if txt: all_text.append(txt)
|
if txt: all_text.append(txt)
|
||||||
@@ -535,32 +521,27 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
|
|||||||
calls = [p.function_call for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "function_call") and p.function_call]
|
calls = [p.function_call for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "function_call") and p.function_call]
|
||||||
usage = {"input_tokens": getattr(resp.usage_metadata, "prompt_token_count", 0), "output_tokens": getattr(resp.usage_metadata, "candidates_token_count", 0)}
|
usage = {"input_tokens": getattr(resp.usage_metadata, "prompt_token_count", 0), "output_tokens": getattr(resp.usage_metadata, "candidates_token_count", 0)}
|
||||||
cached_tokens = getattr(resp.usage_metadata, "cached_content_token_count", None)
|
cached_tokens = getattr(resp.usage_metadata, "cached_content_token_count", None)
|
||||||
if cached_tokens:
|
if cached_tokens: usage["cache_read_input_tokens"] = cached_tokens
|
||||||
usage["cache_read_input_tokens"] = cached_tokens
|
|
||||||
reason = resp.candidates[0].finish_reason.name if resp.candidates and hasattr(resp.candidates[0], "finish_reason") else "STOP"
|
reason = resp.candidates[0].finish_reason.name if resp.candidates and hasattr(resp.candidates[0], "finish_reason") else "STOP"
|
||||||
|
|
||||||
_append_comms("IN", "response", {"round": r_idx, "stop_reason": reason, "text": txt, "tool_calls": [{"name": c.name, "args": dict(c.args)} for c in calls], "usage": usage})
|
_append_comms("IN", "response", {"round": r_idx, "stop_reason": reason, "text": txt, "tool_calls": [{"name": c.name, "args": dict(c.args)} for c in calls], "usage": usage})
|
||||||
|
|
||||||
# Guard: if Gemini reports input tokens approaching the limit, drop oldest history pairs
|
|
||||||
total_in = usage.get("input_tokens", 0)
|
total_in = usage.get("input_tokens", 0)
|
||||||
if total_in > _GEMINI_MAX_INPUT_TOKENS and _gemini_chat and _gemini_chat.history:
|
if total_in > _GEMINI_MAX_INPUT_TOKENS and _gemini_chat and _gemini_chat.history:
|
||||||
hist = _gemini_chat.history
|
hist = list(_gemini_chat.history)
|
||||||
dropped = 0
|
dropped = 0
|
||||||
# Drop oldest pairs (user+model) but keep at least the last 2 entries
|
|
||||||
while len(hist) > 4 and total_in > _GEMINI_MAX_INPUT_TOKENS * 0.7:
|
while len(hist) > 4 and total_in > _GEMINI_MAX_INPUT_TOKENS * 0.7:
|
||||||
# Rough estimate: each dropped message saves ~(chars/4) tokens
|
saved = sum(len(p.text)//4 for p in hist[0].parts if hasattr(p, "text") and p.text)
|
||||||
saved = 0
|
|
||||||
for p in hist[0].parts:
|
for p in hist[0].parts:
|
||||||
if hasattr(p, "text") and p.text:
|
if hasattr(p, "function_response") and p.function_response:
|
||||||
saved += len(p.text) // 4
|
|
||||||
elif hasattr(p, "function_response") and p.function_response:
|
|
||||||
r = getattr(p.function_response, "response", {})
|
r = getattr(p.function_response, "response", {})
|
||||||
if isinstance(r, dict):
|
val = r.get("output", "") if isinstance(r, dict) else getattr(r, "output", "")
|
||||||
saved += len(str(r.get("output", ""))) // 4
|
saved += len(str(val)) // 4
|
||||||
hist.pop(0)
|
hist.pop(0)
|
||||||
total_in -= max(saved, 100)
|
total_in -= max(saved, 100)
|
||||||
dropped += 1
|
dropped += 1
|
||||||
if dropped > 0:
|
if dropped > 0:
|
||||||
|
_gemini_chat.history = hist
|
||||||
_append_comms("OUT", "request", {"message": f"[GEMINI HISTORY TRIMMED: dropped {dropped} old entries to stay within token budget]"})
|
_append_comms("OUT", "request", {"message": f"[GEMINI HISTORY TRIMMED: dropped {dropped} old entries to stay within token budget]"})
|
||||||
|
|
||||||
if not calls or r_idx > MAX_TOOL_ROUNDS: break
|
if not calls or r_idx > MAX_TOOL_ROUNDS: break
|
||||||
@@ -581,8 +562,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
|
|||||||
if file_items:
|
if file_items:
|
||||||
file_items = _reread_file_items(file_items)
|
file_items = _reread_file_items(file_items)
|
||||||
ctx = _build_file_context_text(file_items)
|
ctx = _build_file_context_text(file_items)
|
||||||
if ctx:
|
if ctx: out += f"\n\n[SYSTEM: FILES UPDATED]\n\n{ctx}"
|
||||||
out += f"\n\n[SYSTEM: FILES UPDATED]\n\n{ctx}"
|
|
||||||
if r_idx == MAX_TOOL_ROUNDS: out += "\n\n[SYSTEM: MAX ROUNDS. PROVIDE FINAL ANSWER.]"
|
if r_idx == MAX_TOOL_ROUNDS: out += "\n\n[SYSTEM: MAX ROUNDS. PROVIDE FINAL ANSWER.]"
|
||||||
|
|
||||||
f_resps.append(types.Part.from_function_response(name=name, response={"output": out}))
|
f_resps.append(types.Part.from_function_response(name=name, response={"output": out}))
|
||||||
@@ -653,78 +633,41 @@ def _estimate_prompt_tokens(system_blocks: list[dict], history: list[dict]) -> i
|
|||||||
|
|
||||||
|
|
||||||
def _strip_stale_file_refreshes(history: list[dict]):
|
def _strip_stale_file_refreshes(history: list[dict]):
|
||||||
"""
|
|
||||||
Remove [FILES UPDATED ...] text blocks from all history turns EXCEPT
|
|
||||||
the very last user message. These are stale snapshots from previous
|
|
||||||
tool rounds that bloat the context without providing value.
|
|
||||||
"""
|
|
||||||
if len(history) < 2:
|
if len(history) < 2:
|
||||||
return
|
return
|
||||||
# Find the index of the last user message — we keep its file refresh intact
|
last_user_idx = next((i for i in range(len(history)-1, -1, -1) if history[i].get("role") == "user"), -1)
|
||||||
last_user_idx = -1
|
|
||||||
for i in range(len(history) - 1, -1, -1):
|
|
||||||
if history[i].get("role") == "user":
|
|
||||||
last_user_idx = i
|
|
||||||
break
|
|
||||||
for i, msg in enumerate(history):
|
for i, msg in enumerate(history):
|
||||||
if msg.get("role") != "user" or i == last_user_idx:
|
if msg.get("role") != "user" or i == last_user_idx:
|
||||||
continue
|
continue
|
||||||
content = msg.get("content")
|
content = msg.get("content")
|
||||||
if not isinstance(content, list):
|
if not isinstance(content, list):
|
||||||
continue
|
continue
|
||||||
cleaned = []
|
cleaned = [b for b in content if not (isinstance(b, dict) and b.get("type") == "text" and b.get("text", "").startswith(_FILE_REFRESH_MARKER))]
|
||||||
for block in content:
|
|
||||||
if isinstance(block, dict) and block.get("type") == "text":
|
|
||||||
text = block.get("text", "")
|
|
||||||
if text.startswith(_FILE_REFRESH_MARKER):
|
|
||||||
continue # drop this stale file refresh block
|
|
||||||
cleaned.append(block)
|
|
||||||
if len(cleaned) < len(content):
|
if len(cleaned) < len(content):
|
||||||
msg["content"] = cleaned
|
msg["content"] = cleaned
|
||||||
|
|
||||||
|
|
||||||
def _trim_anthropic_history(system_blocks: list[dict], history: list[dict]):
|
def _trim_anthropic_history(system_blocks: list[dict], history: list[dict]) -> int:
|
||||||
"""
|
|
||||||
Trim the Anthropic history to fit within the token budget.
|
|
||||||
Strategy:
|
|
||||||
1. Strip stale file-refresh injections from old turns.
|
|
||||||
2. If still over budget, drop oldest turn pairs (user + assistant).
|
|
||||||
Returns the number of messages dropped.
|
|
||||||
"""
|
|
||||||
# Phase 1: strip stale file refreshes
|
|
||||||
_strip_stale_file_refreshes(history)
|
_strip_stale_file_refreshes(history)
|
||||||
|
|
||||||
est = _estimate_prompt_tokens(system_blocks, history)
|
est = _estimate_prompt_tokens(system_blocks, history)
|
||||||
if est <= _ANTHROPIC_MAX_PROMPT_TOKENS:
|
if est <= _ANTHROPIC_MAX_PROMPT_TOKENS:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
# Phase 2: drop oldest turn pairs until within budget
|
|
||||||
dropped = 0
|
dropped = 0
|
||||||
while len(history) > 3 and est > _ANTHROPIC_MAX_PROMPT_TOKENS:
|
while len(history) > 3 and est > _ANTHROPIC_MAX_PROMPT_TOKENS:
|
||||||
# Protect history[0] (original user prompt). Drop from history[1] (assistant) and history[2] (user)
|
|
||||||
if history[1].get("role") == "assistant" and len(history) > 2 and history[2].get("role") == "user":
|
if history[1].get("role") == "assistant" and len(history) > 2 and history[2].get("role") == "user":
|
||||||
removed_asst = history.pop(1)
|
est -= _estimate_message_tokens(history.pop(1))
|
||||||
removed_user = history.pop(1)
|
est -= _estimate_message_tokens(history.pop(1))
|
||||||
dropped += 2
|
dropped += 2
|
||||||
est -= _estimate_message_tokens(removed_asst)
|
|
||||||
est -= _estimate_message_tokens(removed_user)
|
|
||||||
# Also drop dangling tool_results if the next message is an assistant and the removed user was just tool results
|
|
||||||
while len(history) > 2 and history[1].get("role") == "assistant" and history[2].get("role") == "user":
|
while len(history) > 2 and history[1].get("role") == "assistant" and history[2].get("role") == "user":
|
||||||
content = history[2].get("content", [])
|
c = history[2].get("content", [])
|
||||||
if isinstance(content, list) and content and isinstance(content[0], dict) and content[0].get("type") == "tool_result":
|
if isinstance(c, list) and c and isinstance(c[0], dict) and c[0].get("type") == "tool_result":
|
||||||
r_a = history.pop(1)
|
est -= _estimate_message_tokens(history.pop(1))
|
||||||
r_u = history.pop(1)
|
est -= _estimate_message_tokens(history.pop(1))
|
||||||
dropped += 2
|
dropped += 2
|
||||||
est -= _estimate_message_tokens(r_a)
|
else: break
|
||||||
est -= _estimate_message_tokens(r_u)
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
# Edge case fallback: drop index 1 (protecting index 0)
|
est -= _estimate_message_tokens(history.pop(1))
|
||||||
removed = history.pop(1)
|
|
||||||
dropped += 1
|
dropped += 1
|
||||||
est -= _estimate_message_tokens(removed)
|
|
||||||
|
|
||||||
return dropped
|
return dropped
|
||||||
|
|
||||||
|
|
||||||
@@ -804,17 +747,19 @@ def _repair_anthropic_history(history: list[dict]):
|
|||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
|
def _send_anthropic(static_md: str, dynamic_md: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
|
||||||
try:
|
try:
|
||||||
_ensure_anthropic_client()
|
_ensure_anthropic_client()
|
||||||
mcp_client.configure(file_items or [], [base_dir])
|
mcp_client.configure(file_items or [], [base_dir])
|
||||||
|
|
||||||
system_text = _get_combined_system_prompt() + f"\n\n<context>\n{md_content}\n</context>"
|
system_text = _get_combined_system_prompt() + f"\n\n<context>\n{static_md}\n</context>"
|
||||||
system_blocks = _build_chunked_context_blocks(system_text)
|
system_blocks = _build_chunked_context_blocks(system_text)
|
||||||
|
|
||||||
|
if dynamic_md:
|
||||||
|
system_blocks.append({"type": "text", "text": f"<discussion>\n{dynamic_md}\n</discussion>"})
|
||||||
|
|
||||||
user_content = [{"type": "text", "text": user_message}]
|
user_content = [{"type": "text", "text": user_message}]
|
||||||
|
|
||||||
# COMPRESS HISTORY: Truncate massive tool outputs from previous turns
|
|
||||||
for msg in _anthropic_history:
|
for msg in _anthropic_history:
|
||||||
if msg.get("role") == "user" and isinstance(msg.get("content"), list):
|
if msg.get("role") == "user" and isinstance(msg.get("content"), list):
|
||||||
for block in msg["content"]:
|
for block in msg["content"]:
|
||||||
@@ -825,180 +770,96 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
|
|||||||
|
|
||||||
_strip_cache_controls(_anthropic_history)
|
_strip_cache_controls(_anthropic_history)
|
||||||
_repair_anthropic_history(_anthropic_history)
|
_repair_anthropic_history(_anthropic_history)
|
||||||
|
|
||||||
|
user_content[-1]["cache_control"] = {"type": "ephemeral"}
|
||||||
_anthropic_history.append({"role": "user", "content": user_content})
|
_anthropic_history.append({"role": "user", "content": user_content})
|
||||||
|
|
||||||
n_chunks = len(system_blocks)
|
n_chunks = len(system_blocks)
|
||||||
_append_comms("OUT", "request", {
|
_append_comms("OUT", "request", {
|
||||||
"message": (
|
"message": (f"[system {n_chunks} chunk(s), {len(static_md)} static + {len(dynamic_md)} dynamic chars context] "
|
||||||
f"[system {n_chunks} chunk(s), {len(md_content)} chars context] "
|
f"{user_message[:200]}{'...' if len(user_message) > 200 else ''}"),
|
||||||
f"{user_message[:200]}{'...' if len(user_message) > 200 else ''}"
|
|
||||||
),
|
|
||||||
})
|
})
|
||||||
|
|
||||||
all_text_parts = []
|
all_text_parts = []
|
||||||
|
|
||||||
# We allow MAX_TOOL_ROUNDS, plus 1 final loop to get the text synthesis
|
|
||||||
for round_idx in range(MAX_TOOL_ROUNDS + 2):
|
for round_idx in range(MAX_TOOL_ROUNDS + 2):
|
||||||
# Trim history to fit within token budget before each API call
|
|
||||||
dropped = _trim_anthropic_history(system_blocks, _anthropic_history)
|
dropped = _trim_anthropic_history(system_blocks, _anthropic_history)
|
||||||
if dropped > 0:
|
if dropped > 0:
|
||||||
est_tokens = _estimate_prompt_tokens(system_blocks, _anthropic_history)
|
est_tokens = _estimate_prompt_tokens(system_blocks, _anthropic_history)
|
||||||
_append_comms("OUT", "request", {
|
_append_comms("OUT", "request", {"message": f"[HISTORY TRIMMED: dropped {dropped} old messages to fit token budget. Estimated {est_tokens} tokens remaining.]"})
|
||||||
"message": (
|
|
||||||
f"[HISTORY TRIMMED: dropped {dropped} old messages to fit token budget. "
|
|
||||||
f"Estimated {est_tokens} tokens remaining. {len(_anthropic_history)} messages in history.]"
|
|
||||||
),
|
|
||||||
})
|
|
||||||
|
|
||||||
response = _anthropic_client.messages.create(
|
response = _anthropic_client.messages.create(
|
||||||
model=_model,
|
model=_model, max_tokens=_max_tokens, temperature=_temperature,
|
||||||
max_tokens=_max_tokens,
|
system=system_blocks, tools=_get_anthropic_tools(), messages=_anthropic_history,
|
||||||
temperature=_temperature,
|
|
||||||
system=system_blocks,
|
|
||||||
tools=_get_anthropic_tools(),
|
|
||||||
messages=_anthropic_history,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Convert SDK content block objects to plain dicts before storing in history
|
|
||||||
serialised_content = [_content_block_to_dict(b) for b in response.content]
|
serialised_content = [_content_block_to_dict(b) for b in response.content]
|
||||||
|
_anthropic_history.append({"role": "assistant", "content": serialised_content})
|
||||||
_anthropic_history.append({
|
|
||||||
"role": "assistant",
|
|
||||||
"content": serialised_content,
|
|
||||||
})
|
|
||||||
|
|
||||||
text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text]
|
text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text]
|
||||||
if text_blocks:
|
if text_blocks: all_text_parts.append("\n".join(text_blocks))
|
||||||
all_text_parts.append("\n".join(text_blocks))
|
|
||||||
|
|
||||||
tool_use_blocks = [
|
tool_use_blocks = [{"id": b.id, "name": b.name, "input": b.input} for b in response.content if getattr(b, "type", None) == "tool_use"]
|
||||||
{"id": b.id, "name": b.name, "input": b.input}
|
|
||||||
for b in response.content
|
|
||||||
if getattr(b, "type", None) == "tool_use"
|
|
||||||
]
|
|
||||||
|
|
||||||
usage_dict: dict = {}
|
usage_dict = {}
|
||||||
if response.usage:
|
if response.usage:
|
||||||
usage_dict["input_tokens"] = response.usage.input_tokens
|
usage_dict.update({"input_tokens": response.usage.input_tokens, "output_tokens": response.usage.output_tokens})
|
||||||
usage_dict["output_tokens"] = response.usage.output_tokens
|
if getattr(response.usage, "cache_creation_input_tokens", None) is not None:
|
||||||
cache_creation = getattr(response.usage, "cache_creation_input_tokens", None)
|
usage_dict["cache_creation_input_tokens"] = response.usage.cache_creation_input_tokens
|
||||||
cache_read = getattr(response.usage, "cache_read_input_tokens", None)
|
if getattr(response.usage, "cache_read_input_tokens", None) is not None:
|
||||||
if cache_creation is not None:
|
usage_dict["cache_read_input_tokens"] = response.usage.cache_read_input_tokens
|
||||||
usage_dict["cache_creation_input_tokens"] = cache_creation
|
|
||||||
if cache_read is not None:
|
|
||||||
usage_dict["cache_read_input_tokens"] = cache_read
|
|
||||||
|
|
||||||
_append_comms("IN", "response", {
|
_append_comms("IN", "response", {"round": round_idx, "stop_reason": response.stop_reason, "text": "\n".join(text_blocks), "tool_calls": tool_use_blocks, "usage": usage_dict})
|
||||||
"round": round_idx,
|
|
||||||
"stop_reason": response.stop_reason,
|
|
||||||
"text": "\n".join(text_blocks),
|
|
||||||
"tool_calls": tool_use_blocks,
|
|
||||||
"usage": usage_dict,
|
|
||||||
})
|
|
||||||
|
|
||||||
if response.stop_reason != "tool_use" or not tool_use_blocks:
|
if response.stop_reason != "tool_use" or not tool_use_blocks: break
|
||||||
break
|
if round_idx > MAX_TOOL_ROUNDS: break
|
||||||
|
|
||||||
if round_idx > MAX_TOOL_ROUNDS:
|
|
||||||
# The model ignored the MAX ROUNDS warning and kept calling tools.
|
|
||||||
# Force abort to prevent infinite loop.
|
|
||||||
break
|
|
||||||
|
|
||||||
tool_results = []
|
tool_results = []
|
||||||
for block in response.content:
|
for block in response.content:
|
||||||
if getattr(block, "type", None) != "tool_use":
|
if getattr(block, "type", None) != "tool_use": continue
|
||||||
continue
|
b_name, b_id, b_input = getattr(block, "name", None), getattr(block, "id", ""), getattr(block, "input", {})
|
||||||
b_name = getattr(block, "name", None)
|
|
||||||
b_id = getattr(block, "id", "")
|
|
||||||
b_input = getattr(block, "input", {})
|
|
||||||
if b_name in mcp_client.TOOL_NAMES:
|
if b_name in mcp_client.TOOL_NAMES:
|
||||||
_append_comms("OUT", "tool_call", {"name": b_name, "id": b_id, "args": b_input})
|
_append_comms("OUT", "tool_call", {"name": b_name, "id": b_id, "args": b_input})
|
||||||
output = mcp_client.dispatch(b_name, b_input)
|
out = mcp_client.dispatch(b_name, b_input)
|
||||||
_append_comms("IN", "tool_result", {"name": b_name, "id": b_id, "output": output})
|
|
||||||
tool_results.append({
|
|
||||||
"type": "tool_result",
|
|
||||||
"tool_use_id": b_id,
|
|
||||||
"content": output,
|
|
||||||
})
|
|
||||||
elif b_name == TOOL_NAME:
|
elif b_name == TOOL_NAME:
|
||||||
script = b_input.get("script", "")
|
scr = b_input.get("script", "")
|
||||||
_append_comms("OUT", "tool_call", {
|
_append_comms("OUT", "tool_call", {"name": TOOL_NAME, "id": b_id, "script": scr})
|
||||||
"name": TOOL_NAME,
|
out = _run_script(scr, base_dir)
|
||||||
"id": b_id,
|
else: out = f"ERROR: unknown tool '{b_name}'"
|
||||||
"script": script,
|
|
||||||
})
|
_append_comms("IN", "tool_result", {"name": b_name, "id": b_id, "output": out})
|
||||||
output = _run_script(script, base_dir)
|
tool_results.append({"type": "tool_result", "tool_use_id": b_id, "content": out})
|
||||||
_append_comms("IN", "tool_result", {
|
|
||||||
"name": TOOL_NAME,
|
|
||||||
"id": b_id,
|
|
||||||
"output": output,
|
|
||||||
})
|
|
||||||
tool_results.append({
|
|
||||||
"type": "tool_result",
|
|
||||||
"tool_use_id": b_id,
|
|
||||||
"content": output,
|
|
||||||
})
|
|
||||||
|
|
||||||
# Refresh file context after tool calls and inject into tool result message
|
|
||||||
if file_items:
|
if file_items:
|
||||||
file_items = _reread_file_items(file_items)
|
file_items = _reread_file_items(file_items)
|
||||||
refreshed_ctx = _build_file_context_text(file_items)
|
refreshed_ctx = _build_file_context_text(file_items)
|
||||||
if refreshed_ctx:
|
if refreshed_ctx:
|
||||||
tool_results.append({
|
tool_results.append({"type": "text", "text": f"[{_FILE_REFRESH_MARKER} — current contents below. Do NOT re-read these files with PowerShell.]\n\n{refreshed_ctx}"})
|
||||||
"type": "text",
|
|
||||||
"text": (
|
|
||||||
"[FILES UPDATED — current contents below. "
|
|
||||||
"Do NOT re-read these files with PowerShell.]\n\n"
|
|
||||||
+ refreshed_ctx
|
|
||||||
),
|
|
||||||
})
|
|
||||||
|
|
||||||
if round_idx == MAX_TOOL_ROUNDS:
|
if round_idx == MAX_TOOL_ROUNDS:
|
||||||
tool_results.append({
|
tool_results.append({"type": "text", "text": "SYSTEM WARNING: MAX TOOL ROUNDS REACHED. YOU MUST PROVIDE YOUR FINAL ANSWER NOW WITHOUT CALLING ANY MORE TOOLS."})
|
||||||
"type": "text",
|
|
||||||
"text": "SYSTEM WARNING: MAX TOOL ROUNDS REACHED. YOU MUST PROVIDE YOUR FINAL ANSWER NOW WITHOUT CALLING ANY MORE TOOLS."
|
|
||||||
})
|
|
||||||
|
|
||||||
_anthropic_history.append({
|
_anthropic_history.append({"role": "user", "content": tool_results})
|
||||||
"role": "user",
|
_append_comms("OUT", "tool_result_send", {"results": [{"tool_use_id": r["tool_use_id"], "content": r["content"]} for r in tool_results if r.get("type") == "tool_result"]})
|
||||||
"content": tool_results,
|
|
||||||
})
|
|
||||||
|
|
||||||
_append_comms("OUT", "tool_result_send", {
|
|
||||||
"results": [
|
|
||||||
{"tool_use_id": r["tool_use_id"], "content": r["content"]}
|
|
||||||
for r in tool_results if r.get("type") == "tool_result"
|
|
||||||
],
|
|
||||||
})
|
|
||||||
|
|
||||||
final_text = "\n\n".join(all_text_parts)
|
final_text = "\n\n".join(all_text_parts)
|
||||||
return final_text if final_text.strip() else "(No text returned by the model)"
|
return final_text if final_text.strip() else "(No text returned by the model)"
|
||||||
|
except ProviderError: raise
|
||||||
except ProviderError:
|
except Exception as exc: raise _classify_anthropic_error(exc) from exc
|
||||||
raise
|
|
||||||
except Exception as exc:
|
|
||||||
raise _classify_anthropic_error(exc) from exc
|
|
||||||
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------ unified send
|
# ------------------------------------------------------------------ unified send
|
||||||
|
|
||||||
def send(
|
def send(
|
||||||
md_content: str,
|
static_md: str,
|
||||||
|
dynamic_md: str,
|
||||||
user_message: str,
|
user_message: str,
|
||||||
base_dir: str = ".",
|
base_dir: str = ".",
|
||||||
file_items: list[dict] | None = None,
|
file_items: list[dict] | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""Send a message to the active provider."""
|
||||||
Send a message to the active provider.
|
|
||||||
|
|
||||||
md_content : aggregated markdown string from aggregate.run()
|
|
||||||
user_message: the user question / instruction
|
|
||||||
base_dir : project base directory (for PowerShell tool calls)
|
|
||||||
file_items : list of file dicts from aggregate.build_file_items() for
|
|
||||||
dynamic context refresh after tool calls
|
|
||||||
"""
|
|
||||||
if _provider == "gemini":
|
if _provider == "gemini":
|
||||||
return _send_gemini(md_content, user_message, base_dir, file_items)
|
return _send_gemini(static_md, dynamic_md, user_message, base_dir, file_items)
|
||||||
elif _provider == "anthropic":
|
elif _provider == "anthropic":
|
||||||
return _send_anthropic(md_content, user_message, base_dir, file_items)
|
return _send_anthropic(static_md, dynamic_md, user_message, base_dir, file_items)
|
||||||
raise ValueError(f"unknown provider: {_provider}")
|
raise ValueError(f"unknown provider: {_provider}")
|
||||||
|
|||||||
28
gui.py
28
gui.py
@@ -121,10 +121,19 @@ def _add_kv_row(parent: str, key: str, val, val_color=None):
|
|||||||
|
|
||||||
|
|
||||||
def _render_usage(parent: str, usage: dict):
|
def _render_usage(parent: str, usage: dict):
|
||||||
"""Render Anthropic usage dict as a compact token table."""
|
"""Render Anthropic usage dict as a compact token table, with true totals."""
|
||||||
if not usage:
|
if not usage:
|
||||||
return
|
return
|
||||||
dpg.add_text("usage:", color=_SUBHDR_COLOR, parent=parent)
|
dpg.add_text("usage:", color=_SUBHDR_COLOR, parent=parent)
|
||||||
|
|
||||||
|
cache_read = usage.get("cache_read_input_tokens", 0)
|
||||||
|
cache_create = usage.get("cache_creation_input_tokens", 0)
|
||||||
|
raw_input = usage.get("input_tokens", 0)
|
||||||
|
total_in = cache_read + cache_create + raw_input
|
||||||
|
|
||||||
|
if total_in > raw_input:
|
||||||
|
_add_kv_row(parent, " total_input_tokens", total_in, _NUM_COLOR)
|
||||||
|
|
||||||
order = [
|
order = [
|
||||||
"input_tokens",
|
"input_tokens",
|
||||||
"cache_read_input_tokens",
|
"cache_read_input_tokens",
|
||||||
@@ -855,7 +864,7 @@ class App:
|
|||||||
}
|
}
|
||||||
theme.save_to_config(self.config)
|
theme.save_to_config(self.config)
|
||||||
|
|
||||||
def _do_generate(self) -> tuple[str, Path, list]:
|
def _do_generate(self) -> tuple[str, str, Path, list]:
|
||||||
self._flush_to_project()
|
self._flush_to_project()
|
||||||
self._save_active_project()
|
self._save_active_project()
|
||||||
self._flush_to_config()
|
self._flush_to_config()
|
||||||
@@ -1110,8 +1119,9 @@ class App:
|
|||||||
|
|
||||||
def cb_md_only(self):
|
def cb_md_only(self):
|
||||||
try:
|
try:
|
||||||
md, path, _file_items = self._do_generate()
|
s_md, d_md, path, _file_items = self._do_generate()
|
||||||
self.last_md = md
|
self.last_static_md = s_md
|
||||||
|
self.last_dynamic_md = d_md
|
||||||
self.last_md_path = path
|
self.last_md_path = path
|
||||||
self._update_status(f"md written: {path.name}")
|
self._update_status(f"md written: {path.name}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -1134,8 +1144,9 @@ class App:
|
|||||||
if self.send_thread and self.send_thread.is_alive():
|
if self.send_thread and self.send_thread.is_alive():
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
md, path, file_items = self._do_generate()
|
s_md, d_md, path, file_items = self._do_generate()
|
||||||
self.last_md = md
|
self.last_static_md = s_md
|
||||||
|
self.last_dynamic_md = d_md
|
||||||
self.last_md_path = path
|
self.last_md_path = path
|
||||||
self.last_file_items = file_items
|
self.last_file_items = file_items
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -1152,6 +1163,7 @@ class App:
|
|||||||
if global_sp: combined_sp.append(global_sp.strip())
|
if global_sp: combined_sp.append(global_sp.strip())
|
||||||
if project_sp: combined_sp.append(project_sp.strip())
|
if project_sp: combined_sp.append(project_sp.strip())
|
||||||
ai_client.set_custom_system_prompt("\n\n".join(combined_sp))
|
ai_client.set_custom_system_prompt("\n\n".join(combined_sp))
|
||||||
|
|
||||||
temp = dpg.get_value("ai_temperature") if dpg.does_item_exist("ai_temperature") else 0.0
|
temp = dpg.get_value("ai_temperature") if dpg.does_item_exist("ai_temperature") else 0.0
|
||||||
max_tok = dpg.get_value("ai_max_tokens") if dpg.does_item_exist("ai_max_tokens") else 8192
|
max_tok = dpg.get_value("ai_max_tokens") if dpg.does_item_exist("ai_max_tokens") else 8192
|
||||||
trunc = dpg.get_value("ai_history_trunc") if dpg.does_item_exist("ai_history_trunc") else 8000
|
trunc = dpg.get_value("ai_history_trunc") if dpg.does_item_exist("ai_history_trunc") else 8000
|
||||||
@@ -1162,7 +1174,7 @@ class App:
|
|||||||
if auto_add:
|
if auto_add:
|
||||||
self._queue_history_add("User", user_msg)
|
self._queue_history_add("User", user_msg)
|
||||||
try:
|
try:
|
||||||
response = ai_client.send(self.last_md, user_msg, base_dir, self.last_file_items)
|
response = ai_client.send(getattr(self, "last_static_md", ""), getattr(self, "last_dynamic_md", ""), user_msg, base_dir, self.last_file_items)
|
||||||
self._update_response(response)
|
self._update_response(response)
|
||||||
self._update_status("done")
|
self._update_status("done")
|
||||||
self._trigger_blink = True
|
self._trigger_blink = True
|
||||||
@@ -2119,4 +2131,4 @@ def main():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
Reference in New Issue
Block a user