progress

2026-02-21 16:51:00 -05:00
parent ee1ee1c77e
commit 330c8604c9
5 changed files with 438 additions and 141 deletions
--- a/aggregate.py
+++ b/aggregate.py
@@ -1,4 +1,4 @@
-# aggregate.py
+# aggregate.py
 import tomllib
 import re
 import glob
@@ -135,7 +135,7 @@ def main():
    with open("config.toml", "rb") as f:
        import tomllib
        config = tomllib.load(f)
-    markdown, output_file = run(config)
+    markdown, output_file, _ = run(config)
    print(f"Written: {output_file}")
 if __name__ == "__main__":
--- a/ai_client.py
+++ b/ai_client.py
@@ -1,4 +1,4 @@
-# ai_client.py
+# ai_client.py
 import tomllib
 import json
 import datetime
@@ -29,6 +29,12 @@ tool_log_callback = None
 MAX_TOOL_ROUNDS = 5
 # Maximum characters per text chunk sent to Anthropic.
 # Anthropic's limit is ~200k tokens; we use 180k chars as a safe ceiling
 # (1 token ~ 3-4 chars, so 180k chars ~ 45-60k tokens, well within limits
 # even for very large aggregated markdown files).
 _ANTHROPIC_CHUNK_SIZE = 180_000
 # Anthropic system prompt - sent with cache_control so it is cached after the
 # first request and reused on every subsequent call within the TTL window.
 _ANTHROPIC_SYSTEM = (
@@ -170,6 +176,7 @@ def reset_session():
    _gemini_chat = None
    _anthropic_client = None
    _anthropic_history = []
    file_cache.reset_client()
 # ------------------------------------------------------------------ model listing
@@ -379,31 +386,32 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str:
 # ------------------------------------------------------------------ anthropic
 #
-# Caching strategy (Anthropic prompt caching):
+# Sending strategy for Anthropic:
 #
-#   The Anthropic API caches a contiguous prefix of the input.  To maximise
+#   PRIMARY PATH  (_send_anthropic_files)  - used when file_items are provided
-#   cache hits we structure every request as follows:
+#   ============
 #   Each file from config is uploaded via the Anthropic Files API
 #   (file_cache.get_file_id handles upload + caching by mtime/size).
 #   Files are sent as individual document/image content blocks in the first
 #   user message.  The discussion history section of the markdown (which is
 #   small and changes each session) is still sent as a text block.
 #   This keeps the per-message payload lean and lets the Files API handle
 #   the heavy lifting of large source files.
 #
-#   system (array form):
+#   FALLBACK PATH  (_send_anthropic_chunked)  - used when no file_items, or if
-#       [0] _ANTHROPIC_SYSTEM text   <- cache_control: ephemeral
+#   ============   the Files API path fails
-#           Stable across the whole session; cached after the first request.
+#   The full aggregated markdown is split into <=_ANTHROPIC_CHUNK_SIZE char
 #   chunks and sent as separate text content blocks.  cache_control:ephemeral
 #   is placed on the LAST chunk so the whole context prefix is cached together.
 #
-#   tools:
+#   Caching strategy (Anthropic prompt caching):
-#       Last tool has cache_control: ephemeral.
+#   - System prompt: cache_control:ephemeral on the text block
-#       Stable across the whole session; cached together with the system prompt.
+#   - Last tool in _ANTHROPIC_TOOLS: cache_control:ephemeral
 #   - Context content blocks: cache_control:ephemeral on the last block
 #   These three form a stable cached prefix that survives across turns.
 #
-#   messages[0]  (first user turn ever, or re-sent each call):
+#   Token cost: cache creation ~25% more than normal input; cache reads ~10%
-#       content[0]: <context> block  <- cache_control: ephemeral
+#   of normal input.  Steady-state use is much cheaper after the first request.
 #           The aggregated markdown.  Changes only when the user regenerates.
 #           A new cache entry is created when it changes; otherwise it's a hit.
 #       content[1]: user question    <- no cache_control (varies every turn)
 #
 #   Subsequent turns (tool results, follow-up questions) are appended to
 #   _anthropic_history normally without extra cache markers.
 #
 #   Token cost of cache creation is ~25 % more than a normal input token, but
 #   cache reads cost ~10 % of a normal input token, so steady-state (many
 #   rounds / sends per session) is much cheaper.
 def _ensure_anthropic_client():
    global _anthropic_client
@@ -413,127 +421,337 @@ def _ensure_anthropic_client():
        _anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"])
-def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
+def _chunk_text(text: str, chunk_size: int) -> list[str]:
-    global _anthropic_history
+    """Split text into chunks of at most chunk_size characters."""
-    import anthropic
+    return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
    try:
        _ensure_anthropic_client()
-        # Build the user content: context block (cached) + question (not cached).
+def _build_chunked_context_blocks(md_content: str) -> list[dict]:
-        # The cache anchor is placed on the context block so the entire prefix
+    """
-        # (system + tools + context) is eligible for caching.
+    Split md_content into <=_ANTHROPIC_CHUNK_SIZE char chunks and return
-        user_content = [
+    a list of Anthropic text content blocks.  cache_control:ephemeral is
-            {
+    placed only on the LAST block so the whole prefix is cached as one unit.
-                "type": "text",
+    """
-                "text": f"<context>\n{md_content}\n</context>",
+    chunks = _chunk_text(md_content, _ANTHROPIC_CHUNK_SIZE)
-                "cache_control": {"type": "ephemeral"},
+    blocks = []
-            },
+    for i, chunk in enumerate(chunks):
-            {
+        block: dict = {"type": "text", "text": chunk}
-                "type": "text",
+        if i == len(chunks) - 1:
-                "text": user_message,
+            block["cache_control"] = {"type": "ephemeral"}
-            },
+        blocks.append(block)
-        ]
+    return blocks
        _anthropic_history.append({"role": "user", "content": user_content})
-        _append_comms("OUT", "request", {
+def _build_files_context_blocks(
-            "message": f"<context>\n{md_content}\n</context>\n\n{user_message}",
+    md_header: str,
    file_items: list[dict],
    screenshot_items: list[dict] | None = None,
 ) -> list[dict]:
    """
    Build content blocks for the Files API path.
    - md_header  : the Discussion History section text (small, sent as text block)
    - file_items : list of dicts from aggregate.build_file_items()
                   each has: path (Path|None), entry (str), content (str), error (bool)
    - screenshot_items: list of screenshot paths (Path) to include as image blocks
    Returns a list of Anthropic content blocks.
    The last block gets cache_control:ephemeral.
    """
    blocks: list[dict] = []
    # Discussion history / header as a text block (small, always inline)
    if md_header.strip():
        blocks.append({
            "type": "text",
            "text": md_header,
        })
-        for round_idx in range(MAX_TOOL_ROUNDS):
+    # One document/image block per file
-            response = _anthropic_client.messages.create(
+    for item in file_items:
-                model=_model,
+        path: Path | None = item.get("path")
-                max_tokens=8096,
+        entry: str = item.get("entry", "")
-                system=[
+        error: bool = item.get("error", False)
                    {
                        "type": "text",
                        "text": _ANTHROPIC_SYSTEM,
                        "cache_control": {"type": "ephemeral"},
                    }
                ],
                tools=_ANTHROPIC_TOOLS,
                messages=_anthropic_history,
            )
-            _anthropic_history.append({
+        if error or path is None:
-                "role": "assistant",
+            # Fall back to inline text for error entries
-                "content": response.content
+            blocks.append({
                "type": "text",
                "text": f"### `{entry}`\n\nERROR: {item.get('content', 'unknown error')}",
            })
            continue
        block_type = file_cache.content_block_type(path)
        if block_type == "unsupported":
            # Inline as plain text
            blocks.append({
                "type": "text",
                "text": f"### `{entry}`\n\n```\n{item.get('content', '')}\n```",
            })
            continue
        # Try to get/upload via Files API
        file_id = file_cache.get_file_id(path)
        if file_id is None:
            # Unsupported or missing - inline fallback
            blocks.append({
                "type": "text",
                "text": f"### `{entry}`\n\n```\n{item.get('content', '')}\n```",
            })
            continue
        if block_type == "document":
            blocks.append({
                "type": "document",
                "source": {
                    "type": "file",
                    "file_id": file_id,
                },
                "title": path.name,
                "citations": {"enabled": False},
            })
        elif block_type == "image":
            blocks.append({
                "type": "image",
                "source": {
                    "type": "file",
                    "file_id": file_id,
                },
            })
-            text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text]
+    # Screenshots as image blocks
-            tool_use_blocks = [
+    for item in (screenshot_items or []):
-                {"id": b.id, "name": b.name, "input": b.input}
+        path = item.get("path")
-                for b in response.content
+        if path is None:
-                if b.type == "tool_use"
+            continue
-            ]
+        block_type = file_cache.content_block_type(path)
-
+        if block_type != "image":
-            # Collect usage; cache fields are present when caching is active
+            continue
-            usage_dict: dict = {}
+        file_id = file_cache.get_file_id(path)
-            if response.usage:
+        if file_id:
-                usage_dict["input_tokens"]  = response.usage.input_tokens
+            blocks.append({
-                usage_dict["output_tokens"] = response.usage.output_tokens
+                "type": "image",
-                cache_creation = getattr(response.usage, "cache_creation_input_tokens", None)
+                "source": {
-                cache_read     = getattr(response.usage, "cache_read_input_tokens",     None)
+                    "type": "file",
-                if cache_creation is not None:
+                    "file_id": file_id,
-                    usage_dict["cache_creation_input_tokens"] = cache_creation
+                },
                if cache_read is not None:
                    usage_dict["cache_read_input_tokens"] = cache_read
            _append_comms("IN", "response", {
                "round":       round_idx,
                "stop_reason": response.stop_reason,
                "text":        "\n".join(text_blocks),
                "tool_calls":  tool_use_blocks,
                "usage":       usage_dict,
            })
-            if response.stop_reason != "tool_use":
+    # Put cache_control on the last block
-                break
+    if blocks:
        blocks[-1]["cache_control"] = {"type": "ephemeral"}
-            tool_results = []
+    return blocks
            for block in response.content:
                if block.type == "tool_use" and block.name == TOOL_NAME:
                    script = block.input.get("script", "")
                    _append_comms("OUT", "tool_call", {
                        "name":   TOOL_NAME,
                        "id":     block.id,
                        "script": script,
                    })
                    output = _run_script(script, base_dir)
                    _append_comms("IN", "tool_result", {
                        "name":   TOOL_NAME,
                        "id":     block.id,
                        "output": output,
                    })
                    tool_results.append({
                        "type":        "tool_result",
                        "tool_use_id": block.id,
                        "content":     output,
                    })
            if not tool_results:
                break
-            _anthropic_history.append({
+def _run_anthropic_loop(
-                "role":    "user",
+    user_content: list[dict],
-                "content": tool_results,
+    user_message: str,
-            })
+    base_dir: str,
    log_summary: str,
 ) -> str:
    """
    Core Anthropic message loop shared by both send paths.
    Appends the user turn to _anthropic_history, runs the tool loop,
    and returns the final assistant text.
    """
    global _anthropic_history
-            _append_comms("OUT", "tool_result_send", {
+    _anthropic_history.append({"role": "user", "content": user_content})
                "results": [
                    {"tool_use_id": r["tool_use_id"], "content": r["content"]}
                    for r in tool_results
                ],
            })
-        text_parts = [
+    _append_comms("OUT", "request", {
-            block.text
+        "message": log_summary,
-            for block in response.content
+    })
-            if hasattr(block, "text") and block.text
+
    for round_idx in range(MAX_TOOL_ROUNDS):
        response = _anthropic_client.messages.create(
            model=_model,
            max_tokens=8096,
            system=[
                {
                    "type": "text",
                    "text": _ANTHROPIC_SYSTEM,
                    "cache_control": {"type": "ephemeral"},
                }
            ],
            tools=_ANTHROPIC_TOOLS,
            messages=_anthropic_history,
            extra_headers={"anthropic-beta": "files-api-2025-04-14"},
        )
        _anthropic_history.append({
            "role": "assistant",
            "content": response.content
        })
        text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text]
        tool_use_blocks = [
            {"id": b.id, "name": b.name, "input": b.input}
            for b in response.content
            if b.type == "tool_use"
        ]
        return "\n".join(text_parts)
        usage_dict: dict = {}
        if response.usage:
            usage_dict["input_tokens"]  = response.usage.input_tokens
            usage_dict["output_tokens"] = response.usage.output_tokens
            cache_creation = getattr(response.usage, "cache_creation_input_tokens", None)
            cache_read     = getattr(response.usage, "cache_read_input_tokens",     None)
            if cache_creation is not None:
                usage_dict["cache_creation_input_tokens"] = cache_creation
            if cache_read is not None:
                usage_dict["cache_read_input_tokens"] = cache_read
        _append_comms("IN", "response", {
            "round":       round_idx,
            "stop_reason": response.stop_reason,
            "text":        "\n".join(text_blocks),
            "tool_calls":  tool_use_blocks,
            "usage":       usage_dict,
        })
        if response.stop_reason != "tool_use":
            break
        tool_results = []
        for block in response.content:
            if block.type == "tool_use" and block.name == TOOL_NAME:
                script = block.input.get("script", "")
                _append_comms("OUT", "tool_call", {
                    "name":   TOOL_NAME,
                    "id":     block.id,
                    "script": script,
                })
                output = _run_script(script, base_dir)
                _append_comms("IN", "tool_result", {
                    "name":   TOOL_NAME,
                    "id":     block.id,
                    "output": output,
                })
                tool_results.append({
                    "type":        "tool_result",
                    "tool_use_id": block.id,
                    "content":     output,
                })
        if not tool_results:
            break
        _anthropic_history.append({
            "role":    "user",
            "content": tool_results,
        })
        _append_comms("OUT", "tool_result_send", {
            "results": [
                {"tool_use_id": r["tool_use_id"], "content": r["content"]}
                for r in tool_results
            ],
        })
    text_parts = [
        block.text
        for block in response.content
        if hasattr(block, "text") and block.text
    ]
    return "\n".join(text_parts)
 def _send_anthropic_files(
    md_content: str,
    user_message: str,
    base_dir: str,
    file_items: list[dict],
 ) -> str:
    """
    Files API send path.  Uploads each file individually and sends document/image
    blocks instead of inlining everything as text.  Falls back to chunked text
    on any upload error.
    The discussion history section of md_content is extracted and sent inline
    as a text block (it's small and changes each session so not worth uploading).
    """
    import anthropic
    _ensure_anthropic_client()
    # Extract just the Discussion History section to send inline.
    # Everything else comes via file blocks.
    discussion_section = ""
    files_marker = "\n\n---\n\n## Files\n\n"
    split_idx = md_content.find(files_marker)
    if split_idx != -1:
        discussion_section = md_content[:split_idx]
    else:
        # No files section - the whole thing is discussion/screenshots
        discussion_section = md_content
    try:
        context_blocks = _build_files_context_blocks(discussion_section, file_items)
    except Exception as upload_err:
        _append_comms("OUT", "request", {
            "message": f"[Files API upload failed: {upload_err}] falling back to chunked text",
        })
        return _send_anthropic_chunked(md_content, user_message, base_dir)
    user_content = context_blocks + [
        {
            "type": "text",
            "text": user_message,
        }
    ]
    log_summary = (
        f"[Files API: {len(file_items)} file(s) as document/image blocks, "
        f"discussion section {len(discussion_section)} chars inline]\n\n{user_message}"
    )
    return _run_anthropic_loop(user_content, user_message, base_dir, log_summary)
 def _send_anthropic_chunked(md_content: str, user_message: str, base_dir: str) -> str:
    """
    Chunked text fallback path.  Splits md_content into <=_ANTHROPIC_CHUNK_SIZE
    char blocks, sends them all as text content blocks with cache_control on
    the last one, then appends the user question.
    """
    _ensure_anthropic_client()
    context_blocks = _build_chunked_context_blocks(md_content)
    user_content = context_blocks + [
        {
            "type": "text",
            "text": user_message,
        }
    ]
    n_chunks = len(context_blocks)
    log_summary = (
        f"[Chunked text: {n_chunks} chunk(s), "
        f"{len(md_content)} chars total]\n\n{user_message}"
    )
    return _run_anthropic_loop(user_content, user_message, base_dir, log_summary)
 def _send_anthropic(
    md_content: str,
    user_message: str,
    base_dir: str,
    file_items: list[dict] | None = None,
 ) -> str:
    """
    Entry point for Anthropic sends.  Routes to the Files API path when
    file_items are provided, otherwise falls back to chunked text.
    """
    try:
        if file_items:
            return _send_anthropic_files(md_content, user_message, base_dir, file_items)
        else:
            return _send_anthropic_chunked(md_content, user_message, base_dir)
    except ProviderError:
        raise
    except Exception as exc:
@@ -541,10 +759,24 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
 # ------------------------------------------------------------------ unified send
-def send(md_content: str, user_message: str, base_dir: str = ".") -> str:
+def send(
    md_content: str,
    user_message: str,
    base_dir: str = ".",
    file_items: list[dict] | None = None,
 ) -> str:
    """
    Send a message to the active provider.
    md_content  : aggregated markdown string from aggregate.run()
    user_message: the user's question / instruction
    base_dir    : project base directory (for PowerShell tool calls)
    file_items  : optional list of file dicts from aggregate.build_file_items();
                  when provided and provider is anthropic, files are uploaded
                  via the Files API rather than inlined as text
    """
    if _provider == "gemini":
        return _send_gemini(md_content, user_message, base_dir)
    elif _provider == "anthropic":
-        return _send_anthropic(md_content, user_message, base_dir)
+        return _send_anthropic(md_content, user_message, base_dir, file_items)
    raise ValueError(f"unknown provider: {_provider}")
--- a/config.toml
+++ b/config.toml
@@ -19,15 +19,16 @@ paths = [
 [screenshots]
 base_dir = "C:/Users/Ed/scoop/apps/sharex/current/ShareX/Screenshots/2026-02"
-paths = [
+paths = []
    "C:/Users/Ed/scoop/apps/sharex/current/ShareX/Screenshots/2026-02/python_2026-02-21_16-38-51.png",
 ]
 [discussion]
 history = [
    "[User]\nAdd support for properly handling files that we have in config with anthropics api (your context will have documentation reguarding it). This will allow us to free up the main markdown file were sending as a message.\nWe also want to make sure the messaging of that giant markdown is chunked as anthropic does have an upper limit per messages.",
    "[AI]\nGood. Now the big rewrite of `ai_client.py`. The strategy:\n\n- Add a new `_send_anthropic_files` path that uses `file_cache.get_file_id()` to upload files individually as `document`/`image` content blocks, keeping the context message lean\n- The fallback (and also for oversized content) uses chunked text blocks — split the markdown into ≤180k-char chunks (well under the ~200k token limit with headroom for overhead), each sent as a separate `text` content block with `cache_control` on the last one\n- `send()` now accepts an optional `file_items` parameter from aggregate\n- `reset_session()` also resets the file_cache client",
-    "[User]\nI will check if this works now.... To test my next message will be a new task naturally...",
+    "[User]\ncontinue. Also got this error when trying to run latest version of this program. However I can use an old version for now.",
    "[Ai]\nNow fix `gui.py` — patch the three 2-tuple unpack sites and thread `file_items` through to `send()`:",
    "[User]\ncontinue. My bad the error in comms was shown int he screenshot I linked. in the md context. Make sure to update MainContext.md when done.",
    "[AI]\nfinished without sending a repose...",
 ]
 [ai]
--- a/dpg_layout.ini
+++ b/dpg_layout.ini
@@ -16,7 +16,7 @@ DockId=0x00000005,0
 [Window][###30]
 Pos=0,654
-Size=549,696
+Size=549,742
 Collapsed=0
 DockId=0x00000025,0
@@ -344,6 +344,66 @@ Size=1743,483
 Collapsed=0
 DockId=0x00000027,0
 [Window][###219]
 Pos=1578,868
 Size=700,440
 Collapsed=0
 [Window][###313]
 Pos=1578,868
 Size=700,440
 Collapsed=0
 [Window][###402]
 Pos=1578,868
 Size=700,440
 Collapsed=0
 [Window][###508]
 Pos=1578,868
 Size=700,440
 Collapsed=0
 [Window][###634]
 Pos=1578,868
 Size=700,440
 Collapsed=0
 [Window][###763]
 Pos=1578,868
 Size=700,440
 Collapsed=0
 [Window][###912]
 Pos=1578,868
 Size=700,440
 Collapsed=0
 [Window][###1031]
 Pos=1578,868
 Size=700,440
 Collapsed=0
 [Window][###1167]
 Pos=1578,868
 Size=700,440
 Collapsed=0
 [Window][###1323]
 Pos=1578,868
 Size=700,440
 Collapsed=0
 [Window][###1482]
 Pos=1578,868
 Size=700,440
 Collapsed=0
 [Window][###1650]
 Pos=1578,868
 Size=700,440
 Collapsed=0
 [Docking][Data]
 DockSpace                     ID=0x7C6B3D9B Window=0xA87D555D Pos=0,0 Size=3840,2137 Split=X Selected=0x40484D8F
  DockNode                    ID=0x00000003 Parent=0x7C6B3D9B SizeRef=549,1161 Split=Y Selected=0xEE087978
--- a/gui.py
+++ b/gui.py
@@ -303,6 +303,7 @@ class App:
        self.ai_response = ""
        self.last_md = ""
        self.last_md_path: Path | None = None
        self.last_file_items: list = []
        self.send_thread: threading.Thread | None = None
        self.models_thread: threading.Thread | None = None
@@ -349,7 +350,7 @@ class App:
        _render_comms_entry("comms_scroll", entry, idx)
    def _rebuild_comms_log(self):
-        """Full redraw from ai_client.get_comms_log() — used after clear/reset."""
+        """Full redraw from ai_client.get_comms_log() - used after clear/reset."""
        if not dpg.does_item_exist("comms_scroll"):
            return
        dpg.delete_item("comms_scroll", children_only=True)
@@ -425,7 +426,7 @@ class App:
            "model":    self.current_model,
        }
-    def _do_generate(self) -> tuple[str, Path]:
+    def _do_generate(self) -> tuple[str, Path, list]:
        self._flush_to_config()
        save_config(self.config)
        return aggregate.run(self.config)
@@ -576,7 +577,7 @@ class App:
    def cb_md_only(self):
        try:
-            md, path = self._do_generate()
+            md, path, _file_items = self._do_generate()
            self.last_md = md
            self.last_md_path = path
            self._update_status(f"md written: {path.name}")
@@ -601,9 +602,10 @@ class App:
        if self.send_thread and self.send_thread.is_alive():
            return
        try:
-            md, path = self._do_generate()
+            md, path, file_items = self._do_generate()
            self.last_md = md
            self.last_md_path = path
            self.last_file_items = file_items
        except Exception as e:
            self._update_status(f"generate error: {e}")
            return
@@ -612,9 +614,11 @@ class App:
        user_msg = dpg.get_value("ai_input")
        base_dir = dpg.get_value("files_base_dir")
        file_items_snap = self.last_file_items
        def do_send():
            try:
-                response = ai_client.send(self.last_md, user_msg, base_dir)
+                response = ai_client.send(self.last_md, user_msg, base_dir, file_items_snap)
                self._update_response(response)
                self._update_status("done")
            except Exception as e: