diff --git a/ai_client.py b/ai_client.py index 8d93e28..eb41c14 100644 --- a/ai_client.py +++ b/ai_client.py @@ -14,7 +14,7 @@ _anthropic_client = None _anthropic_history: list[dict] = [] # Injected by gui.py - called when AI wants to run a command. -# Signature: (script: str) -> str | None +# Signature: (script: str, base_dir: str) -> str | None # Returns the output string if approved, None if rejected. confirm_and_run_callback = None @@ -22,24 +22,26 @@ confirm_and_run_callback = None # Signature: (entry: dict) -> None comms_log_callback = None +# Injected by gui.py - called whenever a tool call completes (after run). +# Signature: (script: str, result: str) -> None +tool_log_callback = None + MAX_TOOL_ROUNDS = 5 +# Anthropic system prompt - sent with cache_control so it is cached after the +# first request and reused on every subsequent call within the TTL window. +_ANTHROPIC_SYSTEM = ( + "You are a helpful coding assistant with access to a PowerShell tool. " + "When asked to create or edit files, prefer targeted edits over full rewrites. " + "Always explain what you are doing before invoking the tool." +) + # ------------------------------------------------------------------ comms log _comms_log: list[dict] = [] MAX_FIELD_CHARS = 400 # beyond this we show a truncated preview in the UI -def _clamp(value, max_chars: int = MAX_FIELD_CHARS) -> tuple[str, bool]: - """Return (display_str, was_truncated).""" - if isinstance(value, (dict, list)): - s = json.dumps(value, ensure_ascii=False, indent=2) - else: - s = str(value) - if len(s) > max_chars: - return s[:max_chars], True - return s, False - def _append_comms(direction: str, kind: str, payload: dict): """ @@ -78,15 +80,6 @@ class ProviderError(Exception): """ Raised when the upstream API returns a hard error we want to surface distinctly in the UI (quota, rate-limit, auth, balance, etc.). - - Attributes - ---------- - kind : str - One of: "quota", "rate_limit", "auth", "balance", "network", "unknown" - provider : str - "gemini" or "anthropic" - original : Exception - The underlying SDK exception. """ def __init__(self, kind: str, provider: str, original: Exception): self.kind = kind @@ -94,7 +87,6 @@ class ProviderError(Exception): self.original = original super().__init__(str(original)) - # Human-readable banner shown in the Response panel def ui_message(self) -> str: labels = { "quota": "QUOTA EXHAUSTED", @@ -109,7 +101,6 @@ class ProviderError(Exception): def _classify_anthropic_error(exc: Exception) -> ProviderError: - """Map an anthropic SDK exception to a ProviderError.""" try: import anthropic if isinstance(exc, anthropic.RateLimitError): @@ -129,7 +120,6 @@ def _classify_anthropic_error(exc: Exception) -> ProviderError: return ProviderError("auth", "anthropic", exc) if status == 402: return ProviderError("balance", "anthropic", exc) - # Anthropic puts credit-balance errors in the body at 400 if "credit" in body or "balance" in body or "billing" in body: return ProviderError("balance", "anthropic", exc) if "quota" in body or "limit" in body or "exceeded" in body: @@ -140,10 +130,7 @@ def _classify_anthropic_error(exc: Exception) -> ProviderError: def _classify_gemini_error(exc: Exception) -> ProviderError: - """Map a google-genai SDK exception to a ProviderError.""" body = str(exc).lower() - # google-genai surfaces HTTP errors as google.api_core exceptions or - # google.genai exceptions; inspect the message text as a reliable fallback. try: from google.api_core import exceptions as gac if isinstance(exc, gac.ResourceExhausted): @@ -156,7 +143,6 @@ def _classify_gemini_error(exc: Exception) -> ProviderError: return ProviderError("network", "gemini", exc) except ImportError: pass - # Fallback: parse status code / message string if "429" in body or "quota" in body or "resource exhausted" in body: return ProviderError("quota", "gemini", exc) if "rate" in body and "limit" in body: @@ -226,6 +212,9 @@ def _list_anthropic_models() -> list[str]: TOOL_NAME = "run_powershell" +# The tool list for Anthropic. cache_control is placed on the last (only) tool +# so that the system-prompt + tools prefix is cached together after the first +# request and served from cache on every subsequent round. _ANTHROPIC_TOOLS = [ { "name": TOOL_NAME, @@ -245,7 +234,8 @@ _ANTHROPIC_TOOLS = [ } }, "required": ["script"] - } + }, + "cache_control": {"type": "ephemeral"}, } ] @@ -279,13 +269,18 @@ def _run_script(script: str, base_dir: str) -> str: """ Delegate to the GUI confirmation callback. Returns result string (stdout/stderr) or a rejection message. + Also fires tool_log_callback if registered. """ if confirm_and_run_callback is None: return "ERROR: no confirmation handler registered" result = confirm_and_run_callback(script, base_dir) if result is None: - return "USER REJECTED: command was not executed" - return result + output = "USER REJECTED: command was not executed" + else: + output = result + if tool_log_callback is not None: + tool_log_callback(script, output) + return output # ------------------------------------------------------------------ gemini @@ -321,7 +316,6 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str: response = _gemini_chat.send_message(full_message) for round_idx in range(MAX_TOOL_ROUNDS): - # Log the raw response candidates as text summary text_parts_raw = [ part.text for candidate in response.candidates @@ -383,6 +377,32 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str: raise _classify_gemini_error(exc) from exc # ------------------------------------------------------------------ anthropic +# +# Caching strategy (Anthropic prompt caching): +# +# The Anthropic API caches a contiguous prefix of the input. To maximise +# cache hits we structure every request as follows: +# +# system (array form): +# [0] _ANTHROPIC_SYSTEM text <- cache_control: ephemeral +# Stable across the whole session; cached after the first request. +# +# tools: +# Last tool has cache_control: ephemeral. +# Stable across the whole session; cached together with the system prompt. +# +# messages[0] (first user turn ever, or re-sent each call): +# content[0]: block <- cache_control: ephemeral +# The aggregated markdown. Changes only when the user regenerates. +# A new cache entry is created when it changes; otherwise it's a hit. +# content[1]: user question <- no cache_control (varies every turn) +# +# Subsequent turns (tool results, follow-up questions) are appended to +# _anthropic_history normally without extra cache markers. +# +# Token cost of cache creation is ~25 % more than a normal input token, but +# cache reads cost ~10 % of a normal input token, so steady-state (many +# rounds / sends per session) is much cheaper. def _ensure_anthropic_client(): global _anthropic_client @@ -391,6 +411,7 @@ def _ensure_anthropic_client(): creds = _load_credentials() _anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"]) + def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str: global _anthropic_history import anthropic @@ -398,19 +419,40 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str: try: _ensure_anthropic_client() - full_message = f"\n{md_content}\n\n\n{user_message}" - _anthropic_history.append({"role": "user", "content": full_message}) + # Build the user content: context block (cached) + question (not cached). + # The cache anchor is placed on the context block so the entire prefix + # (system + tools + context) is eligible for caching. + user_content = [ + { + "type": "text", + "text": f"\n{md_content}\n", + "cache_control": {"type": "ephemeral"}, + }, + { + "type": "text", + "text": user_message, + }, + ] + + _anthropic_history.append({"role": "user", "content": user_content}) _append_comms("OUT", "request", { - "message": full_message, + "message": f"\n{md_content}\n\n\n{user_message}", }) for round_idx in range(MAX_TOOL_ROUNDS): response = _anthropic_client.messages.create( model=_model, max_tokens=8096, + system=[ + { + "type": "text", + "text": _ANTHROPIC_SYSTEM, + "cache_control": {"type": "ephemeral"}, + } + ], tools=_ANTHROPIC_TOOLS, - messages=_anthropic_history + messages=_anthropic_history, ) _anthropic_history.append({ @@ -418,22 +460,31 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str: "content": response.content }) - # Summarise the response content for the log text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text] tool_use_blocks = [ {"id": b.id, "name": b.name, "input": b.input} for b in response.content if b.type == "tool_use" ] + + # Collect usage; cache fields are present when caching is active + usage_dict: dict = {} + if response.usage: + usage_dict["input_tokens"] = response.usage.input_tokens + usage_dict["output_tokens"] = response.usage.output_tokens + cache_creation = getattr(response.usage, "cache_creation_input_tokens", None) + cache_read = getattr(response.usage, "cache_read_input_tokens", None) + if cache_creation is not None: + usage_dict["cache_creation_input_tokens"] = cache_creation + if cache_read is not None: + usage_dict["cache_read_input_tokens"] = cache_read + _append_comms("IN", "response", { "round": round_idx, "stop_reason": response.stop_reason, "text": "\n".join(text_blocks), "tool_calls": tool_use_blocks, - "usage": { - "input_tokens": response.usage.input_tokens, - "output_tokens": response.usage.output_tokens, - } if response.usage else {}, + "usage": usage_dict, }) if response.stop_reason != "tool_use": @@ -455,21 +506,24 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str: "output": output, }) tool_results.append({ - "type": "tool_result", + "type": "tool_result", "tool_use_id": block.id, - "content": output + "content": output, }) if not tool_results: break _anthropic_history.append({ - "role": "user", - "content": tool_results + "role": "user", + "content": tool_results, }) _append_comms("OUT", "tool_result_send", { - "results": [{"tool_use_id": r["tool_use_id"], "content": r["content"]} for r in tool_results], + "results": [ + {"tool_use_id": r["tool_use_id"], "content": r["content"]} + for r in tool_results + ], }) text_parts = [ diff --git a/config.toml b/config.toml index 84cde57..34b7019 100644 --- a/config.toml +++ b/config.toml @@ -25,8 +25,12 @@ paths = [] [discussion] history = [ - "Make sure we are optimially using the anthropic api for this. \nI want to fully utilize caching if possible and just reduce overall loss of limits. \nAdd a log for comms history thats saved in ./logs and a the same for tool calls (scripts in ./scripts/generated, and their call equence in ./logs) these logs are closed in the next runtime of this gui program. \nOn open they amke new file buffers, each file buffer has a timestamp of when it was first made.", - "Now finish the gui portion: in gui.py or anything left (last made seesion_logger.py it seems). Caching strategy also looks to be updated in ai_client.py but not complete.", + "[User]\nMake sure we are optimially using the anthropic api for this. \nI want to fully utilize caching if possible and just reduce overall loss of limits. \nAdd a log for comms history thats saved in ./logs and a the same for tool calls (scripts in ./scripts/generated, and their call equence in ./logs) these logs are closed in the next runtime of this gui program. \nOn open they amke new file buffers, each file buffer has a timestamp of when it was first made.", + "[Claude]\nNow finish the gui portion: in gui.py or anything left (last made seesion_logger.py it seems). Caching strategy also looks to be updated in ai_client.py but not complete had to be reverted.", + "[User]\ncontinue. I had to revert ai_client.py becuase it was causing reponse code failures from the vendor. I added docs for anthropic into your context as well. For lasts version before revert of ai_client.py see:\n\n```py\n[Omitted already updated in your context as you fixed it last]\n```", + "[Claude]\nNow update `gui.py` to wire up `session_logger` and `tool_log_callback`:", + "[Claude]\nThe PowerShell `-replace` with a multi-line string replacement needs to be done differently. I'll write the complete updated `gui.py` directly with all the changes applied. The changes are targeted and well-understood:", + "[Claude]\nGood. Now let's verify all hooks are correctly in place and clean up the temp script:\n```", ] [ai] diff --git a/dpg_layout.ini b/dpg_layout.ini index 07827c4..c943ed2 100644 --- a/dpg_layout.ini +++ b/dpg_layout.ini @@ -16,7 +16,7 @@ DockId=0x00000005,0 [Window][###30] Pos=0,654 -Size=364,1342 +Size=364,766 Collapsed=0 DockId=0x0000001D,0 @@ -45,8 +45,8 @@ Collapsed=0 DockId=0x00000018,0 [Window][###103] -Pos=1870,1330 -Size=1970,807 +Pos=2004,1330 +Size=1836,807 Collapsed=0 DockId=0x0000001C,0 @@ -144,37 +144,37 @@ Collapsed=0 [Window][###118] Pos=366,0 -Size=841,2137 +Size=650,2137 Collapsed=0 DockId=0x00000018,0 [Window][###78] -Pos=0,1998 -Size=364,139 +Pos=0,1422 +Size=364,715 Collapsed=0 DockId=0x0000001E,0 [Window][###88] -Pos=1209,0 -Size=659,2137 +Pos=1018,0 +Size=984,2137 Collapsed=0 DockId=0x00000019,0 [Window][###95] Pos=366,0 -Size=841,2137 +Size=650,2137 Collapsed=0 DockId=0x00000018,1 [Window][###110] -Pos=1870,0 -Size=1970,1328 +Pos=2004,0 +Size=1836,1328 Collapsed=0 DockId=0x0000001B,0 [Window][###112] Pos=366,0 -Size=841,2137 +Size=650,2137 Collapsed=0 DockId=0x00000018,2 @@ -183,17 +183,37 @@ Pos=1578,868 Size=700,440 Collapsed=0 +[Window][###228] +Pos=1578,868 +Size=700,440 +Collapsed=0 + +[Window][###317] +Pos=1578,868 +Size=700,440 +Collapsed=0 + +[Window][###412] +Pos=1578,868 +Size=700,440 +Collapsed=0 + +[Window][###513] +Pos=1578,868 +Size=700,440 +Collapsed=0 + [Docking][Data] DockSpace ID=0x7C6B3D9B Window=0xA87D555D Pos=0,0 Size=3840,2137 Split=X Selected=0x40484D8F DockNode ID=0x00000003 Parent=0x7C6B3D9B SizeRef=364,1161 Split=Y Selected=0xEE087978 DockNode ID=0x00000005 Parent=0x00000003 SizeRef=235,354 Selected=0xEE087978 DockNode ID=0x00000006 Parent=0x00000003 SizeRef=235,805 Split=Y Selected=0x5F94F9BD DockNode ID=0x00000009 Parent=0x00000006 SizeRef=235,453 Split=Y Selected=0x5F94F9BD - DockNode ID=0x0000001D Parent=0x00000009 SizeRef=364,1342 Selected=0x5F94F9BD - DockNode ID=0x0000001E Parent=0x00000009 SizeRef=364,139 Selected=0xF475F06A + DockNode ID=0x0000001D Parent=0x00000009 SizeRef=364,766 Selected=0x5F94F9BD + DockNode ID=0x0000001E Parent=0x00000009 SizeRef=364,715 Selected=0xF475F06A DockNode ID=0x0000000A Parent=0x00000006 SizeRef=235,350 Selected=0x80199DAE DockNode ID=0x00000004 Parent=0x7C6B3D9B SizeRef=3474,1161 Split=X - DockNode ID=0x00000001 Parent=0x00000004 SizeRef=829,1161 Split=Y Selected=0x40484D8F + DockNode ID=0x00000001 Parent=0x00000004 SizeRef=650,1161 Split=Y Selected=0x40484D8F DockNode ID=0x00000007 Parent=0x00000001 SizeRef=595,492 Selected=0xBA13FCDE DockNode ID=0x00000008 Parent=0x00000001 SizeRef=595,1643 Split=X Selected=0x40484D8F DockNode ID=0x0000000F Parent=0x00000008 SizeRef=847,2137 Split=Y Selected=0x07E8375F @@ -204,13 +224,13 @@ DockSpace ID=0x7C6B3D9B Window=0xA87D555D Pos=0,0 Size=3840,2137 Spl DockNode ID=0x00000017 Parent=0x00000013 SizeRef=1314,1749 Selected=0x4B454E0B DockNode ID=0x00000018 Parent=0x00000013 SizeRef=1309,1749 CentralNode=1 Selected=0x73845A9B DockNode ID=0x00000014 Parent=0x00000010 SizeRef=1967,445 Selected=0xC36FF36B - DockNode ID=0x00000002 Parent=0x00000004 SizeRef=2631,1161 Split=X Selected=0x714F2F7B + DockNode ID=0x00000002 Parent=0x00000004 SizeRef=2822,1161 Split=X Selected=0x714F2F7B DockNode ID=0x0000000B Parent=0x00000002 SizeRef=968,1161 Selected=0xC915D9DA DockNode ID=0x0000000C Parent=0x00000002 SizeRef=1661,1161 Split=Y Selected=0x714F2F7B DockNode ID=0x0000000D Parent=0x0000000C SizeRef=396,342 Selected=0x714F2F7B DockNode ID=0x0000000E Parent=0x0000000C SizeRef=396,817 Split=X Selected=0xCF08B82F - DockNode ID=0x00000019 Parent=0x0000000E SizeRef=659,2137 Selected=0x052342BF - DockNode ID=0x0000001A Parent=0x0000000E SizeRef=1970,2137 Split=Y Selected=0xCF08B82F + DockNode ID=0x00000019 Parent=0x0000000E SizeRef=984,2137 Selected=0x052342BF + DockNode ID=0x0000001A Parent=0x0000000E SizeRef=1836,2137 Split=Y Selected=0xCF08B82F DockNode ID=0x0000001B Parent=0x0000001A SizeRef=2104,1328 Selected=0x43F4115A DockNode ID=0x0000001C Parent=0x0000001A SizeRef=2104,807 Selected=0xCF08B82F diff --git a/gui.py b/gui.py index 22808c2..35333ad 100644 --- a/gui.py +++ b/gui.py @@ -1,4 +1,4 @@ -# gui.py +# gui.py import dearpygui.dearpygui as dpg import tomllib import tomli_w @@ -9,6 +9,7 @@ import aggregate import ai_client from ai_client import ProviderError import shell_runner +import session_logger CONFIG_PATH = Path("config.toml") PROVIDERS = ["gemini", "anthropic"] @@ -206,17 +207,24 @@ class App: self._pending_comms_lock = threading.Lock() self._comms_entry_count = 0 + session_logger.open_session() ai_client.set_provider(self.current_provider, self.current_model) ai_client.confirm_and_run_callback = self._confirm_and_run ai_client.comms_log_callback = self._on_comms_entry + ai_client.tool_log_callback = self._on_tool_log # ---------------------------------------------------------------- comms log def _on_comms_entry(self, entry: dict): """Called from background thread; queue for main thread.""" + session_logger.log_comms(entry) with self._pending_comms_lock: self._pending_comms.append(entry) + def _on_tool_log(self, script: str, result: str): + """Called from background thread when a tool call completes.""" + session_logger.log_tool_call(script, result, None) + def _flush_pending_comms(self): """Called every frame from the main render loop.""" with self._pending_comms_lock: @@ -772,6 +780,7 @@ class App: dpg.render_dearpygui_frame() dpg.save_init_file("dpg_layout.ini") + session_logger.close_session() dpg.destroy_context() @@ -782,3 +791,4 @@ def main(): if __name__ == "__main__": main() +