progress

2026-02-21 15:51:08 -05:00
parent 07f5e5fed8
commit 600ddff092
7 changed files with 10891 additions and 49 deletions
--- a/ai_client.py
+++ b/ai_client.py
@@ -14,7 +14,7 @@ _anthropic_client = None
 _anthropic_history: list[dict] = []

 # Injected by gui.py - called when AI wants to run a command.
-# Signature: (script: str) -> str | None
+# Signature: (script: str, base_dir: str) -> str | None
 # Returns the output string if approved, None if rejected.
 confirm_and_run_callback = None

@@ -22,8 +22,20 @@ confirm_and_run_callback = None
 # Signature: (entry: dict) -> None
 comms_log_callback = None

+# Injected by gui.py - called whenever a tool call completes.
+# Signature: (script: str, result: str, script_path: str | None) -> None
+tool_log_callback = None
+
 MAX_TOOL_ROUNDS = 5

+# Anthropic system prompt - cached as the first turn so it counts toward
+# the prompt-cache prefix on every subsequent request.
+_ANTHROPIC_SYSTEM = (
+    "You are a helpful coding assistant with access to a PowerShell tool. "
+    "When asked to create or edit files, prefer targeted edits over full rewrites. "
+    "Always explain what you are doing before invoking the tool."
+)
+
 # ------------------------------------------------------------------ comms log

 _comms_log: list[dict] = []
@@ -142,8 +154,6 @@ def _classify_anthropic_error(exc: Exception) -> ProviderError:
 def _classify_gemini_error(exc: Exception) -> ProviderError:
    """Map a google-genai SDK exception to a ProviderError."""
    body = str(exc).lower()
-    # google-genai surfaces HTTP errors as google.api_core exceptions or
-    # google.genai exceptions; inspect the message text as a reliable fallback.
    try:
        from google.api_core import exceptions as gac
        if isinstance(exc, gac.ResourceExhausted):
@@ -156,7 +166,6 @@ def _classify_gemini_error(exc: Exception) -> ProviderError:
            return ProviderError("network", "gemini", exc)
    except ImportError:
        pass
-    # Fallback: parse status code / message string
    if "429" in body or "quota" in body or "resource exhausted" in body:
        return ProviderError("quota", "gemini", exc)
    if "rate" in body and "limit" in body:
@@ -279,12 +288,20 @@ def _run_script(script: str, base_dir: str) -> str:
    """
    Delegate to the GUI confirmation callback.
    Returns result string (stdout/stderr) or a rejection message.
+    Also fires tool_log_callback if registered.
    """
    if confirm_and_run_callback is None:
        return "ERROR: no confirmation handler registered"
-    result = confirm_and_run_callback(script, base_dir)
-    if result is None:
-        return "USER REJECTED: command was not executed"
+    # confirm_and_run_callback returns (result, script_path) or None
+    outcome = confirm_and_run_callback(script, base_dir)
+    if outcome is None:
+        result = "USER REJECTED: command was not executed"
+        if tool_log_callback is not None:
+            tool_log_callback(script, result, None)
+        return result
+    result, script_path = outcome
+    if tool_log_callback is not None:
+        tool_log_callback(script, result, script_path)
    return result

 # ------------------------------------------------------------------ gemini
@@ -321,7 +338,6 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str:
        response = _gemini_chat.send_message(full_message)

        for round_idx in range(MAX_TOOL_ROUNDS):
-            # Log the raw response candidates as text summary
            text_parts_raw = [
                part.text
                for candidate in response.candidates
@@ -383,6 +399,35 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str:
        raise _classify_gemini_error(exc) from exc

 # ------------------------------------------------------------------ anthropic
+#
+# Caching strategy (Anthropic prompt caching):
+#
+#   The Anthropic API caches a prefix of the input tokens.  To maximise hits:
+#
+#   1.  A persistent system prompt is sent on every request with
+#       cache_control={"type":"ephemeral"} so it is cached after the first call
+#       and reused on subsequent calls within the 5-minute TTL window.
+#
+#   2.  The context block (aggregated markdown) is placed as the FIRST user
+#       message in the history and also marked with cache_control.  Because the
+#       system prompt and the context are stable across tool-use rounds within a
+#       single send() call, the cache hit rate is very high after round 0.
+#
+#   3.  Tool definitions are passed with cache_control on the last tool so the
+#       entire tools array is also cached.
+#
+#   Token accounting: the response payload contains cache_creation_input_tokens
+#   and cache_read_input_tokens in addition to the regular input_tokens field.
+#   These are included in the comms log under "usage".
+
+def _anthropic_tools_with_cache() -> list[dict]:
+    """Return the tools list with cache_control on the last entry."""
+    import copy
+    tools = copy.deepcopy(_ANTHROPIC_TOOLS)
+    # Mark the last tool so the entire prefix (system + tools) gets cached
+    tools[-1]["cache_control"] = {"type": "ephemeral"}
+    return tools
+

 def _ensure_anthropic_client():
    global _anthropic_client
@@ -391,6 +436,7 @@ def _ensure_anthropic_client():
        creds = _load_credentials()
        _anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"])

+
 def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
    global _anthropic_history
    import anthropic
@@ -398,19 +444,54 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
    try:
        _ensure_anthropic_client()

-        full_message = f"<context>\n{md_content}\n</context>\n\n{user_message}"
-        _anthropic_history.append({"role": "user", "content": full_message})
+        # ----------------------------------------------------------------
+        # Build the user turn.
+        #
+        # Structure the content as two blocks so the large context portion
+        # can be cached independently of the user question:
+        #
+        #   [0] context block  <- cache_control applied here
+        #   [1] user question  <- not cached (changes every turn)
+        #
+        # The Anthropic cache anchors at the LAST cache_control marker in
+        # the prefix, so everything up to and including the context block
+        # will be served from cache on subsequent rounds.
+        # ----------------------------------------------------------------
+        user_content = [
+            {
+                "type": "text",
+                "text": f"<context>\n{md_content}\n</context>",
+                "cache_control": {"type": "ephemeral"},
+            },
+            {
+                "type": "text",
+                "text": user_message,
+            },
+        ]
+
+        _anthropic_history.append({"role": "user", "content": user_content})

        _append_comms("OUT", "request", {
-            "message": full_message,
+            "message": f"<context>\n{md_content}\n</context>\n\n{user_message}",
        })

        for round_idx in range(MAX_TOOL_ROUNDS):
            response = _anthropic_client.messages.create(
                model=_model,
                max_tokens=8096,
-                tools=_ANTHROPIC_TOOLS,
-                messages=_anthropic_history
+                system=[
+                    {
+                        "type": "text",
+                        "text": _ANTHROPIC_SYSTEM,
+                        "cache_control": {"type": "ephemeral"},
+                    }
+                ],
+                tools=_anthropic_tools_with_cache(),
+                messages=_anthropic_history,
+                # Ask the API to return cache token counts
+                # betas=["prompt-caching-2024-07-31"],
+                # TODO(Claude): betas is not a valid field:
+                # ERROR: Messages.create() got an unexpected keyword argument 'betas'
            )

            _anthropic_history.append({
@@ -418,22 +499,34 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
                "content": response.content
            })

-            # Summarise the response content for the log
            text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text]
            tool_use_blocks = [
                {"id": b.id, "name": b.name, "input": b.input}
                for b in response.content
                if b.type == "tool_use"
            ]
+
+            # Extended usage includes cache fields when the beta header is set
+            usage_dict: dict = {}
+            if response.usage:
+                usage_dict = {
+                    "input_tokens":              response.usage.input_tokens,
+                    "output_tokens":             response.usage.output_tokens,
+                }
+                # cache fields are present when the beta is active
+                cache_creation = getattr(response.usage, "cache_creation_input_tokens", None)
+                cache_read     = getattr(response.usage, "cache_read_input_tokens", None)
+                if cache_creation is not None:
+                    usage_dict["cache_creation_input_tokens"] = cache_creation
+                if cache_read is not None:
+                    usage_dict["cache_read_input_tokens"] = cache_read
+
            _append_comms("IN", "response", {
                "round":       round_idx,
                "stop_reason": response.stop_reason,
                "text":        "\n".join(text_blocks),
                "tool_calls":  tool_use_blocks,
-                "usage":       {
-                    "input_tokens":  response.usage.input_tokens,
-                    "output_tokens": response.usage.output_tokens,
-                } if response.usage else {},
+                "usage":       usage_dict,
            })

            if response.stop_reason != "tool_use":
--- a/config.toml
+++ b/config.toml
@@ -13,6 +13,7 @@ paths = [
    "pyproject.toml",
    "MainContext.md",
    "C:/projects/manual_slop/shell_runner.py",
+    "C:/projects/manual_slop/session_logger.py",
 ]

 [screenshots]
@@ -20,7 +21,10 @@ base_dir = "C:/Users/Ed/scoop/apps/sharex/current/ShareX/Screenshots/2026-02"
 paths = []

 [discussion]
-history = []
+history = [
+    "Make sure we are optimially using the anthropic api for this. \nI want to fully utilize caching if possible and just reduce overall loss of limits. \nAdd a log for comms history thats saved in ./logs and a the same for tool calls (scripts in ./scripts/generated, and their call equence in ./logs) these logs are closed in the next runtime of this gui program. \nOn open they amke new file buffers, each file buffer has a timestamp of when it was first made.",
+    "Now finish the gui portion: in gui.py or anything left (last made seesion_logger.py it seems). Caching strategy also looks to be updated in ai_client.py",
+]

 [ai]
 provider = "anthropic"
--- a/docs/anthropic_api_ref_create_message.md
+++ b/docs/anthropic_api_ref_create_message.md
--- a/docs/anthropic_api_ref_create_message_beta.md
+++ b/docs/anthropic_api_ref_create_message_beta.md
--- a/docs/anthropic_prompt_caching.md
+++ b/docs/anthropic_prompt_caching.md
--- a/dpg_layout.ini
+++ b/dpg_layout.ini
@@ -10,19 +10,19 @@ Collapsed=0

 [Window][###22]
 Pos=0,0
-Size=376,652
+Size=364,652
 Collapsed=0
 DockId=0x00000005,0

 [Window][###30]
 Pos=0,654
-Size=376,835
+Size=364,835
 Collapsed=0
 DockId=0x00000009,0

 [Window][###66]
 Pos=0,1491
-Size=376,646
+Size=364,646
 Collapsed=0
 DockId=0x0000000A,0

@@ -83,61 +83,84 @@ Collapsed=0

 [Window][###76]
 Pos=1215,0
-Size=868,1749
+Size=1314,1690
 Collapsed=0
 DockId=0x00000017,0

 [Window][###83]
-Pos=378,0
-Size=835,266
+Pos=366,0
+Size=847,425
 Collapsed=0
-DockId=0x00000015,0
+DockId=0x00000011,0

 [Window][###91]
-Pos=1215,1751
-Size=2625,386
+Pos=1215,1692
+Size=2625,445
 Collapsed=0
 DockId=0x00000014,0

 [Window][###98]
-Pos=2085,0
-Size=1755,1749
+Pos=2531,0
+Size=1309,1690
 Collapsed=0
 DockId=0x00000018,0

 [Window][###106]
-Pos=378,268
-Size=835,1068
-Collapsed=0
-DockId=0x00000016,0
-
-[Window][###100]
-Pos=378,1338
-Size=835,799
+Pos=366,427
+Size=847,1710
 Collapsed=0
 DockId=0x00000012,0

+[Window][###100]
+Pos=366,427
+Size=847,1710
+Collapsed=0
+DockId=0x00000012,1
+
+[Window][###133]
+Pos=1306,785
+Size=700,440
+Collapsed=0
+
+[Window][###216]
+Pos=1578,868
+Size=700,440
+Collapsed=0
+
+[Window][###305]
+Pos=1578,868
+Size=700,440
+Collapsed=0
+
+[Window][###400]
+Pos=1578,868
+Size=700,440
+Collapsed=0
+
+[Window][###501]
+Pos=1578,868
+Size=700,440
+Collapsed=0
+
 [Docking][Data]
 DockSpace             ID=0x7C6B3D9B Window=0xA87D555D Pos=0,0 Size=3840,2137 Split=X Selected=0x40484D8F
-  DockNode            ID=0x00000003 Parent=0x7C6B3D9B SizeRef=376,1161 Split=Y Selected=0xEE087978
+  DockNode            ID=0x00000003 Parent=0x7C6B3D9B SizeRef=364,1161 Split=Y Selected=0xEE087978
    DockNode          ID=0x00000005 Parent=0x00000003 SizeRef=235,354 Selected=0xEE087978
    DockNode          ID=0x00000006 Parent=0x00000003 SizeRef=235,805 Split=Y Selected=0x5F94F9BD
      DockNode        ID=0x00000009 Parent=0x00000006 SizeRef=235,453 Selected=0x5F94F9BD
      DockNode        ID=0x0000000A Parent=0x00000006 SizeRef=235,350 Selected=0x80199DAE
-  DockNode            ID=0x00000004 Parent=0x7C6B3D9B SizeRef=1286,1161 Split=X
+  DockNode            ID=0x00000004 Parent=0x7C6B3D9B SizeRef=3474,1161 Split=X
    DockNode          ID=0x00000001 Parent=0x00000004 SizeRef=829,1161 Split=Y Selected=0x40484D8F
      DockNode        ID=0x00000007 Parent=0x00000001 SizeRef=595,492 Selected=0xBA13FCDE
      DockNode        ID=0x00000008 Parent=0x00000001 SizeRef=595,1643 Split=X Selected=0x40484D8F
-        DockNode      ID=0x0000000F Parent=0x00000008 SizeRef=835,2137 Split=Y Selected=0x07E8375F
-          DockNode    ID=0x00000011 Parent=0x0000000F SizeRef=600,1336 Split=Y Selected=0x07E8375F
-            DockNode  ID=0x00000015 Parent=0x00000011 SizeRef=995,266 Selected=0x72F373AE
-            DockNode  ID=0x00000016 Parent=0x00000011 SizeRef=995,1068 Selected=0x07E8375F
-          DockNode    ID=0x00000012 Parent=0x0000000F SizeRef=600,799 Selected=0x88A8C2FF
+        DockNode      ID=0x0000000F Parent=0x00000008 SizeRef=847,2137 Split=Y Selected=0x07E8375F
+          DockNode    ID=0x00000011 Parent=0x0000000F SizeRef=835,425 Selected=0x72F373AE
+          DockNode    ID=0x00000012 Parent=0x0000000F SizeRef=835,1710 Selected=0x07E8375F
        DockNode      ID=0x00000010 Parent=0x00000008 SizeRef=2625,2137 Split=Y Selected=0xCE7F911A
-          DockNode    ID=0x00000013 Parent=0x00000010 SizeRef=1967,1749 Split=X Selected=0xCE7F911A
-            DockNode  ID=0x00000017 Parent=0x00000013 SizeRef=868,1749 Selected=0x4B454E0B
-            DockNode  ID=0x00000018 Parent=0x00000013 SizeRef=1755,1749 CentralNode=1 Selected=0xCE7F911A
-          DockNode    ID=0x00000014 Parent=0x00000010 SizeRef=1967,386 Selected=0xC36FF36B
+          DockNode    ID=0x00000013 Parent=0x00000010 SizeRef=1967,1690 Split=X Selected=0xCE7F911A
+            DockNode  ID=0x00000017 Parent=0x00000013 SizeRef=1314,1749 Selected=0x4B454E0B
+            DockNode  ID=0x00000018 Parent=0x00000013 SizeRef=1309,1749 CentralNode=1 Selected=0xCE7F911A
+          DockNode    ID=0x00000014 Parent=0x00000010 SizeRef=1967,445 Selected=0xC36FF36B
    DockNode          ID=0x00000002 Parent=0x00000004 SizeRef=2631,1161 Split=X Selected=0x714F2F7B
      DockNode        ID=0x0000000B Parent=0x00000002 SizeRef=968,1161 Selected=0xC915D9DA
      DockNode        ID=0x0000000C Parent=0x00000002 SizeRef=1661,1161 Split=Y Selected=0x714F2F7B
--- a/session_logger.py
+++ b/session_logger.py
@@ -0,0 +1,125 @@
+# session_logger.py
+"""
+Opens timestamped log/script files at startup and keeps them open for the
+lifetime of the process.  The next run of the GUI creates new files; the
+previous run's files are simply closed when the process exits.
+
+File layout
+-----------
+logs/
+    comms_<ts>.log      - every comms entry (direction/kind/payload) as JSON-L
+    toolcalls_<ts>.log  - sequential record of every tool invocation
+scripts/generated/
+    <ts>_<seq:04d>.ps1  - each PowerShell script the AI generated, in order
+
+Where <ts> = YYYYMMDD_HHMMSS of when this session was started.
+"""
+
+import datetime
+import json
+import threading
+from pathlib import Path
+
+_LOG_DIR = Path("./logs")
+_SCRIPTS_DIR = Path("./scripts/generated")
+
+_ts: str = ""            # session timestamp string  e.g. "20260301_142233"
+_seq: int = 0            # monotonic counter for script files this session
+_seq_lock = threading.Lock()
+
+_comms_fh = None         # file handle: logs/comms_<ts>.log
+_tool_fh  = None         # file handle: logs/toolcalls_<ts>.log
+
+
+def _now_ts() -> str:
+    return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+
+
+def open_session():
+    """
+    Called once at GUI startup.  Creates the log directories if needed and
+    opens the two log files for this session.  Idempotent - a second call is
+    ignored.
+    """
+    global _ts, _comms_fh, _tool_fh, _seq
+
+    if _comms_fh is not None:
+        return  # already open
+
+    _LOG_DIR.mkdir(parents=True, exist_ok=True)
+    _SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
+
+    _ts  = _now_ts()
+    _seq = 0
+
+    _comms_fh = open(_LOG_DIR / f"comms_{_ts}.log", "w", encoding="utf-8", buffering=1)
+    _tool_fh  = open(_LOG_DIR / f"toolcalls_{_ts}.log", "w", encoding="utf-8", buffering=1)
+
+    _tool_fh.write(f"# Tool-call log — session {_ts}\n\n")
+    _tool_fh.flush()
+
+
+def close_session():
+    """Flush and close both log files.  Called on clean exit (optional)."""
+    global _comms_fh, _tool_fh
+    if _comms_fh:
+        _comms_fh.close()
+        _comms_fh = None
+    if _tool_fh:
+        _tool_fh.close()
+        _tool_fh = None
+
+
+def log_comms(entry: dict):
+    """
+    Append one comms entry to the comms log file as a JSON-L line.
+    Thread-safe (GIL + line-buffered file).
+    """
+    if _comms_fh is None:
+        return
+    try:
+        _comms_fh.write(json.dumps(entry, ensure_ascii=False, default=str) + "\n")
+    except Exception:
+        pass
+
+
+def log_tool_call(script: str, result: str, script_path: str | None):
+    """
+    Append a tool-call record to the toolcalls log and write the PS1 script to
+    scripts/generated/.  Returns the path of the written script file.
+    """
+    global _seq
+
+    if _tool_fh is None:
+        return script_path  # logger not open yet
+
+    with _seq_lock:
+        _seq += 1
+        seq = _seq
+
+    ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
+
+    # Write the .ps1 file
+    ps1_name  = f"{_ts}_{seq:04d}.ps1"
+    ps1_path  = _SCRIPTS_DIR / ps1_name
+    try:
+        ps1_path.write_text(script, encoding="utf-8")
+    except Exception as exc:
+        ps1_path = None
+        ps1_name = f"(write error: {exc})"
+
+    # Append to the tool-call sequence log
+    try:
+        _tool_fh.write(
+            f"## Call #{seq}  [{ts_entry}]\n"
+            f"Script file: {ps1_path}\n\n"
+            f"```powershell\n{script}\n```\n\n"
+            f"### Result\n\n"
+            f"```\n{result}\n```\n\n"
+            f"---\n\n"
+        )
+        _tool_fh.flush()
+    except Exception:
+        pass
+
+    return str(ps1_path) if ps1_path else None