diff --git a/aggregate.py b/aggregate.py index 304ebc8..4985832 100644 --- a/aggregate.py +++ b/aggregate.py @@ -126,9 +126,8 @@ def build_summary_section(base_dir: Path, files: list[str]) -> str: items = build_file_items(base_dir, files) return summarize.build_summary_markdown(items) -def build_markdown(base_dir: Path, files: list[str], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str: +def build_static_markdown(base_dir: Path, files: list[str], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str: parts = [] - # STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits if files: if summary_only: parts.append("## Files (Summary)\n\n" + build_summary_section(base_dir, files)) @@ -136,12 +135,12 @@ def build_markdown(base_dir: Path, files: list[str], screenshot_base_dir: Path, parts.append("## Files\n\n" + build_files_section(base_dir, files)) if screenshots: parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots)) - # DYNAMIC SUFFIX: History changes every turn, must go last - if history: - parts.append("## Discussion History\n\n" + build_discussion_section(history)) - return "\n\n---\n\n".join(parts) + return "\n\n---\n\n".join(parts) if parts else "" -def run(config: dict) -> tuple[str, Path]: +def build_dynamic_markdown(history: list[str]) -> str: + return "## Discussion History\n\n" + build_discussion_section(history) if history else "" + +def run(config: dict) -> tuple[str, str, Path, list[dict]]: namespace = config.get("project", {}).get("name") if not namespace: namespace = config.get("output", {}).get("namespace", "project") @@ -155,18 +154,21 @@ def run(config: dict) -> tuple[str, Path]: output_dir.mkdir(parents=True, exist_ok=True) increment = find_next_increment(output_dir, namespace) output_file = output_dir / f"{namespace}_{increment:03d}.md" - # Provide full files to trigger Gemini's 32k cache threshold and give the AI immediate context - markdown = build_markdown(base_dir, files, screenshot_base_dir, screenshots, history, - summary_only=False) + + static_md = build_static_markdown(base_dir, files, screenshot_base_dir, screenshots, summary_only=False) + dynamic_md = build_dynamic_markdown(history) + + markdown = f"{static_md}\n\n---\n\n{dynamic_md}" if static_md and dynamic_md else static_md or dynamic_md output_file.write_text(markdown, encoding="utf-8") + file_items = build_file_items(base_dir, files) - return markdown, output_file, file_items + return static_md, dynamic_md, output_file, file_items def main(): with open("config.toml", "rb") as f: import tomllib config = tomllib.load(f) - markdown, output_file, _ = run(config) + static_md, dynamic_md, output_file, _ = run(config) print(f"Written: {output_file}") if __name__ == "__main__": diff --git a/ai_client.py b/ai_client.py index 380006a..a89031b 100644 --- a/ai_client.py +++ b/ai_client.py @@ -453,81 +453,67 @@ def _ensure_gemini_client(): _gemini_client = genai.Client(api_key=creds["gemini"]["api_key"]) -def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str: +def _send_gemini(static_md: str, dynamic_md: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str: global _gemini_chat, _gemini_cache from google.genai import types try: _ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir]) - sys_instr = f"{_get_combined_system_prompt()}\n\n\n{md_content}\n" + sys_instr = f"{_get_combined_system_prompt()}\n\n\n{static_md}\n" tools_decl = [_gemini_tool_declaration()] - # DYNAMIC CONTEXT: Check if files/context changed mid-session - current_md_hash = hash(md_content) + current_md_hash = hash(static_md) old_history = None if _gemini_chat and getattr(_gemini_chat, "_last_md_hash", None) != current_md_hash: old_history = list(_gemini_chat.history) if _gemini_chat.history else [] if _gemini_cache: try: _gemini_client.caches.delete(name=_gemini_cache.name) except: pass - _gemini_chat = None - _gemini_cache = None - _append_comms("OUT", "request", {"message": "[CONTEXT CHANGED] Rebuilding cache and chat session..."}) + _gemini_chat, _gemini_cache = None, None + _append_comms("OUT", "request", {"message": "[STATIC CONTEXT CHANGED] Rebuilding cache and chat session..."}) if not _gemini_chat: chat_config = types.GenerateContentConfig( - system_instruction=sys_instr, - tools=tools_decl, - temperature=_temperature, - max_output_tokens=_max_tokens, + system_instruction=sys_instr, tools=tools_decl, temperature=_temperature, max_output_tokens=_max_tokens, safety_settings=[types.SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_ONLY_HIGH")] ) try: - # Gemini requires 1024 (Flash) or 4096 (Pro) tokens to cache. - _gemini_cache = _gemini_client.caches.create( - model=_model, - config=types.CreateCachedContentConfig( - system_instruction=sys_instr, - tools=tools_decl, - ttl="3600s", - ) - ) + _gemini_cache = _gemini_client.caches.create(model=_model, config=types.CreateCachedContentConfig(system_instruction=sys_instr, tools=tools_decl, ttl="3600s")) chat_config = types.GenerateContentConfig( - cached_content=_gemini_cache.name, - temperature=_temperature, - max_output_tokens=_max_tokens, + cached_content=_gemini_cache.name, temperature=_temperature, max_output_tokens=_max_tokens, safety_settings=[types.SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_ONLY_HIGH")] ) _append_comms("OUT", "request", {"message": f"[CACHE CREATED] {_gemini_cache.name}"}) - except Exception as e: - _gemini_cache = None # Ensure clean state on failure + except Exception: _gemini_cache = None kwargs = {"model": _model, "config": chat_config} - if old_history: - kwargs["history"] = old_history - + if old_history: kwargs["history"] = old_history _gemini_chat = _gemini_client.chats.create(**kwargs) _gemini_chat._last_md_hash = current_md_hash + + import re + if _gemini_chat and _gemini_chat.history: + for msg in _gemini_chat.history: + if msg.role == "user" and hasattr(msg, "parts"): + for p in msg.parts: + if hasattr(p, "text") and p.text and "" in p.text: + p.text = re.sub(r".*?\n\n", "", p.text, flags=re.DOTALL) + if hasattr(p, "function_response") and p.function_response and hasattr(p.function_response, "response"): + r = p.function_response.response + r_dict = r if isinstance(r, dict) else getattr(r, "__dict__", {}) + val = r_dict.get("output") if isinstance(r_dict, dict) else getattr(r, "output", None) + if isinstance(val, str): + if "[SYSTEM: FILES UPDATED]" in val: val = val.split("[SYSTEM: FILES UPDATED]")[0].strip() + if _history_trunc_limit > 0 and len(val) > _history_trunc_limit: + val = val[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS.]" + if isinstance(r, dict): r["output"] = val + else: setattr(r, "output", val) + + full_user_msg = f"\n{dynamic_md}\n\n\n{user_message}" if dynamic_md else user_message + _append_comms("OUT", "request", {"message": f"[ctx {len(static_md)} static + {len(dynamic_md)} dynamic + msg {len(user_message)}]"}) - _append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"}) - payload, all_text = user_message, [] + payload, all_text = full_user_msg, [] for r_idx in range(MAX_TOOL_ROUNDS + 2): - # Strip stale file refreshes and truncate old tool outputs in Gemini history - if _gemini_chat and _gemini_chat.history: - for msg in _gemini_chat.history: - if msg.role == "user" and hasattr(msg, "parts"): - for p in msg.parts: - if hasattr(p, "function_response") and p.function_response and hasattr(p.function_response, "response"): - r = p.function_response.response - if isinstance(r, dict) and "output" in r: - val = r["output"] - if isinstance(val, str): - if "[SYSTEM: FILES UPDATED]" in val: - val = val.split("[SYSTEM: FILES UPDATED]")[0].strip() - if _history_trunc_limit > 0 and len(val) > _history_trunc_limit: - val = val[:_history_trunc_limit] + "\n\n... [TRUNCATED BY SYSTEM TO SAVE TOKENS.]" - r["output"] = val - resp = _gemini_chat.send_message(payload) txt = "\n".join(p.text for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "text") and p.text) if txt: all_text.append(txt) @@ -535,32 +521,27 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: calls = [p.function_call for c in resp.candidates if getattr(c, "content", None) for p in c.content.parts if hasattr(p, "function_call") and p.function_call] usage = {"input_tokens": getattr(resp.usage_metadata, "prompt_token_count", 0), "output_tokens": getattr(resp.usage_metadata, "candidates_token_count", 0)} cached_tokens = getattr(resp.usage_metadata, "cached_content_token_count", None) - if cached_tokens: - usage["cache_read_input_tokens"] = cached_tokens + if cached_tokens: usage["cache_read_input_tokens"] = cached_tokens reason = resp.candidates[0].finish_reason.name if resp.candidates and hasattr(resp.candidates[0], "finish_reason") else "STOP" _append_comms("IN", "response", {"round": r_idx, "stop_reason": reason, "text": txt, "tool_calls": [{"name": c.name, "args": dict(c.args)} for c in calls], "usage": usage}) - # Guard: if Gemini reports input tokens approaching the limit, drop oldest history pairs total_in = usage.get("input_tokens", 0) if total_in > _GEMINI_MAX_INPUT_TOKENS and _gemini_chat and _gemini_chat.history: - hist = _gemini_chat.history + hist = list(_gemini_chat.history) dropped = 0 - # Drop oldest pairs (user+model) but keep at least the last 2 entries while len(hist) > 4 and total_in > _GEMINI_MAX_INPUT_TOKENS * 0.7: - # Rough estimate: each dropped message saves ~(chars/4) tokens - saved = 0 + saved = sum(len(p.text)//4 for p in hist[0].parts if hasattr(p, "text") and p.text) for p in hist[0].parts: - if hasattr(p, "text") and p.text: - saved += len(p.text) // 4 - elif hasattr(p, "function_response") and p.function_response: + if hasattr(p, "function_response") and p.function_response: r = getattr(p.function_response, "response", {}) - if isinstance(r, dict): - saved += len(str(r.get("output", ""))) // 4 + val = r.get("output", "") if isinstance(r, dict) else getattr(r, "output", "") + saved += len(str(val)) // 4 hist.pop(0) total_in -= max(saved, 100) dropped += 1 if dropped > 0: + _gemini_chat.history = hist _append_comms("OUT", "request", {"message": f"[GEMINI HISTORY TRIMMED: dropped {dropped} old entries to stay within token budget]"}) if not calls or r_idx > MAX_TOOL_ROUNDS: break @@ -581,8 +562,7 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: if file_items: file_items = _reread_file_items(file_items) ctx = _build_file_context_text(file_items) - if ctx: - out += f"\n\n[SYSTEM: FILES UPDATED]\n\n{ctx}" + if ctx: out += f"\n\n[SYSTEM: FILES UPDATED]\n\n{ctx}" if r_idx == MAX_TOOL_ROUNDS: out += "\n\n[SYSTEM: MAX ROUNDS. PROVIDE FINAL ANSWER.]" f_resps.append(types.Part.from_function_response(name=name, response={"output": out})) @@ -653,78 +633,41 @@ def _estimate_prompt_tokens(system_blocks: list[dict], history: list[dict]) -> i def _strip_stale_file_refreshes(history: list[dict]): - """ - Remove [FILES UPDATED ...] text blocks from all history turns EXCEPT - the very last user message. These are stale snapshots from previous - tool rounds that bloat the context without providing value. - """ if len(history) < 2: return - # Find the index of the last user message — we keep its file refresh intact - last_user_idx = -1 - for i in range(len(history) - 1, -1, -1): - if history[i].get("role") == "user": - last_user_idx = i - break + last_user_idx = next((i for i in range(len(history)-1, -1, -1) if history[i].get("role") == "user"), -1) for i, msg in enumerate(history): if msg.get("role") != "user" or i == last_user_idx: continue content = msg.get("content") if not isinstance(content, list): continue - cleaned = [] - for block in content: - if isinstance(block, dict) and block.get("type") == "text": - text = block.get("text", "") - if text.startswith(_FILE_REFRESH_MARKER): - continue # drop this stale file refresh block - cleaned.append(block) + cleaned = [b for b in content if not (isinstance(b, dict) and b.get("type") == "text" and b.get("text", "").startswith(_FILE_REFRESH_MARKER))] if len(cleaned) < len(content): msg["content"] = cleaned -def _trim_anthropic_history(system_blocks: list[dict], history: list[dict]): - """ - Trim the Anthropic history to fit within the token budget. - Strategy: - 1. Strip stale file-refresh injections from old turns. - 2. If still over budget, drop oldest turn pairs (user + assistant). - Returns the number of messages dropped. - """ - # Phase 1: strip stale file refreshes +def _trim_anthropic_history(system_blocks: list[dict], history: list[dict]) -> int: _strip_stale_file_refreshes(history) - est = _estimate_prompt_tokens(system_blocks, history) if est <= _ANTHROPIC_MAX_PROMPT_TOKENS: return 0 - - # Phase 2: drop oldest turn pairs until within budget dropped = 0 while len(history) > 3 and est > _ANTHROPIC_MAX_PROMPT_TOKENS: - # Protect history[0] (original user prompt). Drop from history[1] (assistant) and history[2] (user) if history[1].get("role") == "assistant" and len(history) > 2 and history[2].get("role") == "user": - removed_asst = history.pop(1) - removed_user = history.pop(1) + est -= _estimate_message_tokens(history.pop(1)) + est -= _estimate_message_tokens(history.pop(1)) dropped += 2 - est -= _estimate_message_tokens(removed_asst) - est -= _estimate_message_tokens(removed_user) - # Also drop dangling tool_results if the next message is an assistant and the removed user was just tool results while len(history) > 2 and history[1].get("role") == "assistant" and history[2].get("role") == "user": - content = history[2].get("content", []) - if isinstance(content, list) and content and isinstance(content[0], dict) and content[0].get("type") == "tool_result": - r_a = history.pop(1) - r_u = history.pop(1) + c = history[2].get("content", []) + if isinstance(c, list) and c and isinstance(c[0], dict) and c[0].get("type") == "tool_result": + est -= _estimate_message_tokens(history.pop(1)) + est -= _estimate_message_tokens(history.pop(1)) dropped += 2 - est -= _estimate_message_tokens(r_a) - est -= _estimate_message_tokens(r_u) - else: - break + else: break else: - # Edge case fallback: drop index 1 (protecting index 0) - removed = history.pop(1) + est -= _estimate_message_tokens(history.pop(1)) dropped += 1 - est -= _estimate_message_tokens(removed) - return dropped @@ -804,17 +747,19 @@ def _repair_anthropic_history(history: list[dict]): }) -def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str: +def _send_anthropic(static_md: str, dynamic_md: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str: try: _ensure_anthropic_client() mcp_client.configure(file_items or [], [base_dir]) - system_text = _get_combined_system_prompt() + f"\n\n\n{md_content}\n" + system_text = _get_combined_system_prompt() + f"\n\n\n{static_md}\n" system_blocks = _build_chunked_context_blocks(system_text) + + if dynamic_md: + system_blocks.append({"type": "text", "text": f"\n{dynamic_md}\n"}) user_content = [{"type": "text", "text": user_message}] - # COMPRESS HISTORY: Truncate massive tool outputs from previous turns for msg in _anthropic_history: if msg.get("role") == "user" and isinstance(msg.get("content"), list): for block in msg["content"]: @@ -825,180 +770,96 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item _strip_cache_controls(_anthropic_history) _repair_anthropic_history(_anthropic_history) + + user_content[-1]["cache_control"] = {"type": "ephemeral"} _anthropic_history.append({"role": "user", "content": user_content}) n_chunks = len(system_blocks) _append_comms("OUT", "request", { - "message": ( - f"[system {n_chunks} chunk(s), {len(md_content)} chars context] " - f"{user_message[:200]}{'...' if len(user_message) > 200 else ''}" - ), + "message": (f"[system {n_chunks} chunk(s), {len(static_md)} static + {len(dynamic_md)} dynamic chars context] " + f"{user_message[:200]}{'...' if len(user_message) > 200 else ''}"), }) all_text_parts = [] - # We allow MAX_TOOL_ROUNDS, plus 1 final loop to get the text synthesis for round_idx in range(MAX_TOOL_ROUNDS + 2): - # Trim history to fit within token budget before each API call dropped = _trim_anthropic_history(system_blocks, _anthropic_history) if dropped > 0: est_tokens = _estimate_prompt_tokens(system_blocks, _anthropic_history) - _append_comms("OUT", "request", { - "message": ( - f"[HISTORY TRIMMED: dropped {dropped} old messages to fit token budget. " - f"Estimated {est_tokens} tokens remaining. {len(_anthropic_history)} messages in history.]" - ), - }) + _append_comms("OUT", "request", {"message": f"[HISTORY TRIMMED: dropped {dropped} old messages to fit token budget. Estimated {est_tokens} tokens remaining.]"}) response = _anthropic_client.messages.create( - model=_model, - max_tokens=_max_tokens, - temperature=_temperature, - system=system_blocks, - tools=_get_anthropic_tools(), - messages=_anthropic_history, + model=_model, max_tokens=_max_tokens, temperature=_temperature, + system=system_blocks, tools=_get_anthropic_tools(), messages=_anthropic_history, ) - # Convert SDK content block objects to plain dicts before storing in history serialised_content = [_content_block_to_dict(b) for b in response.content] - - _anthropic_history.append({ - "role": "assistant", - "content": serialised_content, - }) + _anthropic_history.append({"role": "assistant", "content": serialised_content}) text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text] - if text_blocks: - all_text_parts.append("\n".join(text_blocks)) + if text_blocks: all_text_parts.append("\n".join(text_blocks)) - tool_use_blocks = [ - {"id": b.id, "name": b.name, "input": b.input} - for b in response.content - if getattr(b, "type", None) == "tool_use" - ] + tool_use_blocks = [{"id": b.id, "name": b.name, "input": b.input} for b in response.content if getattr(b, "type", None) == "tool_use"] - usage_dict: dict = {} + usage_dict = {} if response.usage: - usage_dict["input_tokens"] = response.usage.input_tokens - usage_dict["output_tokens"] = response.usage.output_tokens - cache_creation = getattr(response.usage, "cache_creation_input_tokens", None) - cache_read = getattr(response.usage, "cache_read_input_tokens", None) - if cache_creation is not None: - usage_dict["cache_creation_input_tokens"] = cache_creation - if cache_read is not None: - usage_dict["cache_read_input_tokens"] = cache_read + usage_dict.update({"input_tokens": response.usage.input_tokens, "output_tokens": response.usage.output_tokens}) + if getattr(response.usage, "cache_creation_input_tokens", None) is not None: + usage_dict["cache_creation_input_tokens"] = response.usage.cache_creation_input_tokens + if getattr(response.usage, "cache_read_input_tokens", None) is not None: + usage_dict["cache_read_input_tokens"] = response.usage.cache_read_input_tokens - _append_comms("IN", "response", { - "round": round_idx, - "stop_reason": response.stop_reason, - "text": "\n".join(text_blocks), - "tool_calls": tool_use_blocks, - "usage": usage_dict, - }) + _append_comms("IN", "response", {"round": round_idx, "stop_reason": response.stop_reason, "text": "\n".join(text_blocks), "tool_calls": tool_use_blocks, "usage": usage_dict}) - if response.stop_reason != "tool_use" or not tool_use_blocks: - break - - if round_idx > MAX_TOOL_ROUNDS: - # The model ignored the MAX ROUNDS warning and kept calling tools. - # Force abort to prevent infinite loop. - break + if response.stop_reason != "tool_use" or not tool_use_blocks: break + if round_idx > MAX_TOOL_ROUNDS: break tool_results = [] for block in response.content: - if getattr(block, "type", None) != "tool_use": - continue - b_name = getattr(block, "name", None) - b_id = getattr(block, "id", "") - b_input = getattr(block, "input", {}) + if getattr(block, "type", None) != "tool_use": continue + b_name, b_id, b_input = getattr(block, "name", None), getattr(block, "id", ""), getattr(block, "input", {}) if b_name in mcp_client.TOOL_NAMES: _append_comms("OUT", "tool_call", {"name": b_name, "id": b_id, "args": b_input}) - output = mcp_client.dispatch(b_name, b_input) - _append_comms("IN", "tool_result", {"name": b_name, "id": b_id, "output": output}) - tool_results.append({ - "type": "tool_result", - "tool_use_id": b_id, - "content": output, - }) + out = mcp_client.dispatch(b_name, b_input) elif b_name == TOOL_NAME: - script = b_input.get("script", "") - _append_comms("OUT", "tool_call", { - "name": TOOL_NAME, - "id": b_id, - "script": script, - }) - output = _run_script(script, base_dir) - _append_comms("IN", "tool_result", { - "name": TOOL_NAME, - "id": b_id, - "output": output, - }) - tool_results.append({ - "type": "tool_result", - "tool_use_id": b_id, - "content": output, - }) + scr = b_input.get("script", "") + _append_comms("OUT", "tool_call", {"name": TOOL_NAME, "id": b_id, "script": scr}) + out = _run_script(scr, base_dir) + else: out = f"ERROR: unknown tool '{b_name}'" + + _append_comms("IN", "tool_result", {"name": b_name, "id": b_id, "output": out}) + tool_results.append({"type": "tool_result", "tool_use_id": b_id, "content": out}) - # Refresh file context after tool calls and inject into tool result message if file_items: file_items = _reread_file_items(file_items) refreshed_ctx = _build_file_context_text(file_items) if refreshed_ctx: - tool_results.append({ - "type": "text", - "text": ( - "[FILES UPDATED — current contents below. " - "Do NOT re-read these files with PowerShell.]\n\n" - + refreshed_ctx - ), - }) + tool_results.append({"type": "text", "text": f"[{_FILE_REFRESH_MARKER} — current contents below. Do NOT re-read these files with PowerShell.]\n\n{refreshed_ctx}"}) if round_idx == MAX_TOOL_ROUNDS: - tool_results.append({ - "type": "text", - "text": "SYSTEM WARNING: MAX TOOL ROUNDS REACHED. YOU MUST PROVIDE YOUR FINAL ANSWER NOW WITHOUT CALLING ANY MORE TOOLS." - }) + tool_results.append({"type": "text", "text": "SYSTEM WARNING: MAX TOOL ROUNDS REACHED. YOU MUST PROVIDE YOUR FINAL ANSWER NOW WITHOUT CALLING ANY MORE TOOLS."}) - _anthropic_history.append({ - "role": "user", - "content": tool_results, - }) - - _append_comms("OUT", "tool_result_send", { - "results": [ - {"tool_use_id": r["tool_use_id"], "content": r["content"]} - for r in tool_results if r.get("type") == "tool_result" - ], - }) + _anthropic_history.append({"role": "user", "content": tool_results}) + _append_comms("OUT", "tool_result_send", {"results": [{"tool_use_id": r["tool_use_id"], "content": r["content"]} for r in tool_results if r.get("type") == "tool_result"]}) final_text = "\n\n".join(all_text_parts) return final_text if final_text.strip() else "(No text returned by the model)" - - except ProviderError: - raise - except Exception as exc: - raise _classify_anthropic_error(exc) from exc + except ProviderError: raise + except Exception as exc: raise _classify_anthropic_error(exc) from exc # ------------------------------------------------------------------ unified send def send( - md_content: str, + static_md: str, + dynamic_md: str, user_message: str, base_dir: str = ".", file_items: list[dict] | None = None, ) -> str: - """ - Send a message to the active provider. - - md_content : aggregated markdown string from aggregate.run() - user_message: the user question / instruction - base_dir : project base directory (for PowerShell tool calls) - file_items : list of file dicts from aggregate.build_file_items() for - dynamic context refresh after tool calls - """ + """Send a message to the active provider.""" if _provider == "gemini": - return _send_gemini(md_content, user_message, base_dir, file_items) + return _send_gemini(static_md, dynamic_md, user_message, base_dir, file_items) elif _provider == "anthropic": - return _send_anthropic(md_content, user_message, base_dir, file_items) - raise ValueError(f"unknown provider: {_provider}") \ No newline at end of file + return _send_anthropic(static_md, dynamic_md, user_message, base_dir, file_items) + raise ValueError(f"unknown provider: {_provider}") diff --git a/gui.py b/gui.py index 495923b..deff202 100644 --- a/gui.py +++ b/gui.py @@ -121,10 +121,19 @@ def _add_kv_row(parent: str, key: str, val, val_color=None): def _render_usage(parent: str, usage: dict): - """Render Anthropic usage dict as a compact token table.""" + """Render Anthropic usage dict as a compact token table, with true totals.""" if not usage: return dpg.add_text("usage:", color=_SUBHDR_COLOR, parent=parent) + + cache_read = usage.get("cache_read_input_tokens", 0) + cache_create = usage.get("cache_creation_input_tokens", 0) + raw_input = usage.get("input_tokens", 0) + total_in = cache_read + cache_create + raw_input + + if total_in > raw_input: + _add_kv_row(parent, " total_input_tokens", total_in, _NUM_COLOR) + order = [ "input_tokens", "cache_read_input_tokens", @@ -855,7 +864,7 @@ class App: } theme.save_to_config(self.config) - def _do_generate(self) -> tuple[str, Path, list]: + def _do_generate(self) -> tuple[str, str, Path, list]: self._flush_to_project() self._save_active_project() self._flush_to_config() @@ -1110,8 +1119,9 @@ class App: def cb_md_only(self): try: - md, path, _file_items = self._do_generate() - self.last_md = md + s_md, d_md, path, _file_items = self._do_generate() + self.last_static_md = s_md + self.last_dynamic_md = d_md self.last_md_path = path self._update_status(f"md written: {path.name}") except Exception as e: @@ -1134,8 +1144,9 @@ class App: if self.send_thread and self.send_thread.is_alive(): return try: - md, path, file_items = self._do_generate() - self.last_md = md + s_md, d_md, path, file_items = self._do_generate() + self.last_static_md = s_md + self.last_dynamic_md = d_md self.last_md_path = path self.last_file_items = file_items except Exception as e: @@ -1152,6 +1163,7 @@ class App: if global_sp: combined_sp.append(global_sp.strip()) if project_sp: combined_sp.append(project_sp.strip()) ai_client.set_custom_system_prompt("\n\n".join(combined_sp)) + temp = dpg.get_value("ai_temperature") if dpg.does_item_exist("ai_temperature") else 0.0 max_tok = dpg.get_value("ai_max_tokens") if dpg.does_item_exist("ai_max_tokens") else 8192 trunc = dpg.get_value("ai_history_trunc") if dpg.does_item_exist("ai_history_trunc") else 8000 @@ -1162,7 +1174,7 @@ class App: if auto_add: self._queue_history_add("User", user_msg) try: - response = ai_client.send(self.last_md, user_msg, base_dir, self.last_file_items) + response = ai_client.send(getattr(self, "last_static_md", ""), getattr(self, "last_dynamic_md", ""), user_msg, base_dir, self.last_file_items) self._update_response(response) self._update_status("done") self._trigger_blink = True @@ -2119,4 +2131,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main()