This commit is contained in:
2026-02-21 16:51:00 -05:00
parent ee1ee1c77e
commit 330c8604c9
5 changed files with 438 additions and 141 deletions

View File

@@ -1,4 +1,4 @@
# aggregate.py # aggregate.py
import tomllib import tomllib
import re import re
import glob import glob
@@ -135,7 +135,7 @@ def main():
with open("config.toml", "rb") as f: with open("config.toml", "rb") as f:
import tomllib import tomllib
config = tomllib.load(f) config = tomllib.load(f)
markdown, output_file = run(config) markdown, output_file, _ = run(config)
print(f"Written: {output_file}") print(f"Written: {output_file}")
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -1,4 +1,4 @@
# ai_client.py # ai_client.py
import tomllib import tomllib
import json import json
import datetime import datetime
@@ -29,6 +29,12 @@ tool_log_callback = None
MAX_TOOL_ROUNDS = 5 MAX_TOOL_ROUNDS = 5
# Maximum characters per text chunk sent to Anthropic.
# Anthropic's limit is ~200k tokens; we use 180k chars as a safe ceiling
# (1 token ~ 3-4 chars, so 180k chars ~ 45-60k tokens, well within limits
# even for very large aggregated markdown files).
_ANTHROPIC_CHUNK_SIZE = 180_000
# Anthropic system prompt - sent with cache_control so it is cached after the # Anthropic system prompt - sent with cache_control so it is cached after the
# first request and reused on every subsequent call within the TTL window. # first request and reused on every subsequent call within the TTL window.
_ANTHROPIC_SYSTEM = ( _ANTHROPIC_SYSTEM = (
@@ -170,6 +176,7 @@ def reset_session():
_gemini_chat = None _gemini_chat = None
_anthropic_client = None _anthropic_client = None
_anthropic_history = [] _anthropic_history = []
file_cache.reset_client()
# ------------------------------------------------------------------ model listing # ------------------------------------------------------------------ model listing
@@ -379,31 +386,32 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str:
# ------------------------------------------------------------------ anthropic # ------------------------------------------------------------------ anthropic
# #
# Caching strategy (Anthropic prompt caching): # Sending strategy for Anthropic:
# #
# The Anthropic API caches a contiguous prefix of the input. To maximise # PRIMARY PATH (_send_anthropic_files) - used when file_items are provided
# cache hits we structure every request as follows: # ============
# Each file from config is uploaded via the Anthropic Files API
# (file_cache.get_file_id handles upload + caching by mtime/size).
# Files are sent as individual document/image content blocks in the first
# user message. The discussion history section of the markdown (which is
# small and changes each session) is still sent as a text block.
# This keeps the per-message payload lean and lets the Files API handle
# the heavy lifting of large source files.
# #
# system (array form): # FALLBACK PATH (_send_anthropic_chunked) - used when no file_items, or if
# [0] _ANTHROPIC_SYSTEM text <- cache_control: ephemeral # ============ the Files API path fails
# Stable across the whole session; cached after the first request. # The full aggregated markdown is split into <=_ANTHROPIC_CHUNK_SIZE char
# chunks and sent as separate text content blocks. cache_control:ephemeral
# is placed on the LAST chunk so the whole context prefix is cached together.
# #
# tools: # Caching strategy (Anthropic prompt caching):
# Last tool has cache_control: ephemeral. # - System prompt: cache_control:ephemeral on the text block
# Stable across the whole session; cached together with the system prompt. # - Last tool in _ANTHROPIC_TOOLS: cache_control:ephemeral
# - Context content blocks: cache_control:ephemeral on the last block
# These three form a stable cached prefix that survives across turns.
# #
# messages[0] (first user turn ever, or re-sent each call): # Token cost: cache creation ~25% more than normal input; cache reads ~10%
# content[0]: <context> block <- cache_control: ephemeral # of normal input. Steady-state use is much cheaper after the first request.
# The aggregated markdown. Changes only when the user regenerates.
# A new cache entry is created when it changes; otherwise it's a hit.
# content[1]: user question <- no cache_control (varies every turn)
#
# Subsequent turns (tool results, follow-up questions) are appended to
# _anthropic_history normally without extra cache markers.
#
# Token cost of cache creation is ~25 % more than a normal input token, but
# cache reads cost ~10 % of a normal input token, so steady-state (many
# rounds / sends per session) is much cheaper.
def _ensure_anthropic_client(): def _ensure_anthropic_client():
global _anthropic_client global _anthropic_client
@@ -413,127 +421,337 @@ def _ensure_anthropic_client():
_anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"]) _anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"])
def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str: def _chunk_text(text: str, chunk_size: int) -> list[str]:
global _anthropic_history """Split text into chunks of at most chunk_size characters."""
import anthropic return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
try:
_ensure_anthropic_client()
# Build the user content: context block (cached) + question (not cached). def _build_chunked_context_blocks(md_content: str) -> list[dict]:
# The cache anchor is placed on the context block so the entire prefix """
# (system + tools + context) is eligible for caching. Split md_content into <=_ANTHROPIC_CHUNK_SIZE char chunks and return
user_content = [ a list of Anthropic text content blocks. cache_control:ephemeral is
{ placed only on the LAST block so the whole prefix is cached as one unit.
"type": "text", """
"text": f"<context>\n{md_content}\n</context>", chunks = _chunk_text(md_content, _ANTHROPIC_CHUNK_SIZE)
"cache_control": {"type": "ephemeral"}, blocks = []
}, for i, chunk in enumerate(chunks):
{ block: dict = {"type": "text", "text": chunk}
"type": "text", if i == len(chunks) - 1:
"text": user_message, block["cache_control"] = {"type": "ephemeral"}
}, blocks.append(block)
] return blocks
_anthropic_history.append({"role": "user", "content": user_content})
_append_comms("OUT", "request", { def _build_files_context_blocks(
"message": f"<context>\n{md_content}\n</context>\n\n{user_message}", md_header: str,
file_items: list[dict],
screenshot_items: list[dict] | None = None,
) -> list[dict]:
"""
Build content blocks for the Files API path.
- md_header : the Discussion History section text (small, sent as text block)
- file_items : list of dicts from aggregate.build_file_items()
each has: path (Path|None), entry (str), content (str), error (bool)
- screenshot_items: list of screenshot paths (Path) to include as image blocks
Returns a list of Anthropic content blocks.
The last block gets cache_control:ephemeral.
"""
blocks: list[dict] = []
# Discussion history / header as a text block (small, always inline)
if md_header.strip():
blocks.append({
"type": "text",
"text": md_header,
}) })
for round_idx in range(MAX_TOOL_ROUNDS): # One document/image block per file
response = _anthropic_client.messages.create( for item in file_items:
model=_model, path: Path | None = item.get("path")
max_tokens=8096, entry: str = item.get("entry", "")
system=[ error: bool = item.get("error", False)
{
"type": "text",
"text": _ANTHROPIC_SYSTEM,
"cache_control": {"type": "ephemeral"},
}
],
tools=_ANTHROPIC_TOOLS,
messages=_anthropic_history,
)
_anthropic_history.append({ if error or path is None:
"role": "assistant", # Fall back to inline text for error entries
"content": response.content blocks.append({
"type": "text",
"text": f"### `{entry}`\n\nERROR: {item.get('content', 'unknown error')}",
})
continue
block_type = file_cache.content_block_type(path)
if block_type == "unsupported":
# Inline as plain text
blocks.append({
"type": "text",
"text": f"### `{entry}`\n\n```\n{item.get('content', '')}\n```",
})
continue
# Try to get/upload via Files API
file_id = file_cache.get_file_id(path)
if file_id is None:
# Unsupported or missing - inline fallback
blocks.append({
"type": "text",
"text": f"### `{entry}`\n\n```\n{item.get('content', '')}\n```",
})
continue
if block_type == "document":
blocks.append({
"type": "document",
"source": {
"type": "file",
"file_id": file_id,
},
"title": path.name,
"citations": {"enabled": False},
})
elif block_type == "image":
blocks.append({
"type": "image",
"source": {
"type": "file",
"file_id": file_id,
},
}) })
text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text] # Screenshots as image blocks
tool_use_blocks = [ for item in (screenshot_items or []):
{"id": b.id, "name": b.name, "input": b.input} path = item.get("path")
for b in response.content if path is None:
if b.type == "tool_use" continue
] block_type = file_cache.content_block_type(path)
if block_type != "image":
# Collect usage; cache fields are present when caching is active continue
usage_dict: dict = {} file_id = file_cache.get_file_id(path)
if response.usage: if file_id:
usage_dict["input_tokens"] = response.usage.input_tokens blocks.append({
usage_dict["output_tokens"] = response.usage.output_tokens "type": "image",
cache_creation = getattr(response.usage, "cache_creation_input_tokens", None) "source": {
cache_read = getattr(response.usage, "cache_read_input_tokens", None) "type": "file",
if cache_creation is not None: "file_id": file_id,
usage_dict["cache_creation_input_tokens"] = cache_creation },
if cache_read is not None:
usage_dict["cache_read_input_tokens"] = cache_read
_append_comms("IN", "response", {
"round": round_idx,
"stop_reason": response.stop_reason,
"text": "\n".join(text_blocks),
"tool_calls": tool_use_blocks,
"usage": usage_dict,
}) })
if response.stop_reason != "tool_use": # Put cache_control on the last block
break if blocks:
blocks[-1]["cache_control"] = {"type": "ephemeral"}
tool_results = [] return blocks
for block in response.content:
if block.type == "tool_use" and block.name == TOOL_NAME:
script = block.input.get("script", "")
_append_comms("OUT", "tool_call", {
"name": TOOL_NAME,
"id": block.id,
"script": script,
})
output = _run_script(script, base_dir)
_append_comms("IN", "tool_result", {
"name": TOOL_NAME,
"id": block.id,
"output": output,
})
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
if not tool_results:
break
_anthropic_history.append({ def _run_anthropic_loop(
"role": "user", user_content: list[dict],
"content": tool_results, user_message: str,
}) base_dir: str,
log_summary: str,
) -> str:
"""
Core Anthropic message loop shared by both send paths.
Appends the user turn to _anthropic_history, runs the tool loop,
and returns the final assistant text.
"""
global _anthropic_history
_append_comms("OUT", "tool_result_send", { _anthropic_history.append({"role": "user", "content": user_content})
"results": [
{"tool_use_id": r["tool_use_id"], "content": r["content"]}
for r in tool_results
],
})
text_parts = [ _append_comms("OUT", "request", {
block.text "message": log_summary,
for block in response.content })
if hasattr(block, "text") and block.text
for round_idx in range(MAX_TOOL_ROUNDS):
response = _anthropic_client.messages.create(
model=_model,
max_tokens=8096,
system=[
{
"type": "text",
"text": _ANTHROPIC_SYSTEM,
"cache_control": {"type": "ephemeral"},
}
],
tools=_ANTHROPIC_TOOLS,
messages=_anthropic_history,
extra_headers={"anthropic-beta": "files-api-2025-04-14"},
)
_anthropic_history.append({
"role": "assistant",
"content": response.content
})
text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text]
tool_use_blocks = [
{"id": b.id, "name": b.name, "input": b.input}
for b in response.content
if b.type == "tool_use"
] ]
return "\n".join(text_parts)
usage_dict: dict = {}
if response.usage:
usage_dict["input_tokens"] = response.usage.input_tokens
usage_dict["output_tokens"] = response.usage.output_tokens
cache_creation = getattr(response.usage, "cache_creation_input_tokens", None)
cache_read = getattr(response.usage, "cache_read_input_tokens", None)
if cache_creation is not None:
usage_dict["cache_creation_input_tokens"] = cache_creation
if cache_read is not None:
usage_dict["cache_read_input_tokens"] = cache_read
_append_comms("IN", "response", {
"round": round_idx,
"stop_reason": response.stop_reason,
"text": "\n".join(text_blocks),
"tool_calls": tool_use_blocks,
"usage": usage_dict,
})
if response.stop_reason != "tool_use":
break
tool_results = []
for block in response.content:
if block.type == "tool_use" and block.name == TOOL_NAME:
script = block.input.get("script", "")
_append_comms("OUT", "tool_call", {
"name": TOOL_NAME,
"id": block.id,
"script": script,
})
output = _run_script(script, base_dir)
_append_comms("IN", "tool_result", {
"name": TOOL_NAME,
"id": block.id,
"output": output,
})
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
if not tool_results:
break
_anthropic_history.append({
"role": "user",
"content": tool_results,
})
_append_comms("OUT", "tool_result_send", {
"results": [
{"tool_use_id": r["tool_use_id"], "content": r["content"]}
for r in tool_results
],
})
text_parts = [
block.text
for block in response.content
if hasattr(block, "text") and block.text
]
return "\n".join(text_parts)
def _send_anthropic_files(
md_content: str,
user_message: str,
base_dir: str,
file_items: list[dict],
) -> str:
"""
Files API send path. Uploads each file individually and sends document/image
blocks instead of inlining everything as text. Falls back to chunked text
on any upload error.
The discussion history section of md_content is extracted and sent inline
as a text block (it's small and changes each session so not worth uploading).
"""
import anthropic
_ensure_anthropic_client()
# Extract just the Discussion History section to send inline.
# Everything else comes via file blocks.
discussion_section = ""
files_marker = "\n\n---\n\n## Files\n\n"
split_idx = md_content.find(files_marker)
if split_idx != -1:
discussion_section = md_content[:split_idx]
else:
# No files section - the whole thing is discussion/screenshots
discussion_section = md_content
try:
context_blocks = _build_files_context_blocks(discussion_section, file_items)
except Exception as upload_err:
_append_comms("OUT", "request", {
"message": f"[Files API upload failed: {upload_err}] falling back to chunked text",
})
return _send_anthropic_chunked(md_content, user_message, base_dir)
user_content = context_blocks + [
{
"type": "text",
"text": user_message,
}
]
log_summary = (
f"[Files API: {len(file_items)} file(s) as document/image blocks, "
f"discussion section {len(discussion_section)} chars inline]\n\n{user_message}"
)
return _run_anthropic_loop(user_content, user_message, base_dir, log_summary)
def _send_anthropic_chunked(md_content: str, user_message: str, base_dir: str) -> str:
"""
Chunked text fallback path. Splits md_content into <=_ANTHROPIC_CHUNK_SIZE
char blocks, sends them all as text content blocks with cache_control on
the last one, then appends the user question.
"""
_ensure_anthropic_client()
context_blocks = _build_chunked_context_blocks(md_content)
user_content = context_blocks + [
{
"type": "text",
"text": user_message,
}
]
n_chunks = len(context_blocks)
log_summary = (
f"[Chunked text: {n_chunks} chunk(s), "
f"{len(md_content)} chars total]\n\n{user_message}"
)
return _run_anthropic_loop(user_content, user_message, base_dir, log_summary)
def _send_anthropic(
md_content: str,
user_message: str,
base_dir: str,
file_items: list[dict] | None = None,
) -> str:
"""
Entry point for Anthropic sends. Routes to the Files API path when
file_items are provided, otherwise falls back to chunked text.
"""
try:
if file_items:
return _send_anthropic_files(md_content, user_message, base_dir, file_items)
else:
return _send_anthropic_chunked(md_content, user_message, base_dir)
except ProviderError: except ProviderError:
raise raise
except Exception as exc: except Exception as exc:
@@ -541,10 +759,24 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
# ------------------------------------------------------------------ unified send # ------------------------------------------------------------------ unified send
def send(md_content: str, user_message: str, base_dir: str = ".") -> str: def send(
md_content: str,
user_message: str,
base_dir: str = ".",
file_items: list[dict] | None = None,
) -> str:
"""
Send a message to the active provider.
md_content : aggregated markdown string from aggregate.run()
user_message: the user's question / instruction
base_dir : project base directory (for PowerShell tool calls)
file_items : optional list of file dicts from aggregate.build_file_items();
when provided and provider is anthropic, files are uploaded
via the Files API rather than inlined as text
"""
if _provider == "gemini": if _provider == "gemini":
return _send_gemini(md_content, user_message, base_dir) return _send_gemini(md_content, user_message, base_dir)
elif _provider == "anthropic": elif _provider == "anthropic":
return _send_anthropic(md_content, user_message, base_dir) return _send_anthropic(md_content, user_message, base_dir, file_items)
raise ValueError(f"unknown provider: {_provider}") raise ValueError(f"unknown provider: {_provider}")

View File

@@ -19,15 +19,16 @@ paths = [
[screenshots] [screenshots]
base_dir = "C:/Users/Ed/scoop/apps/sharex/current/ShareX/Screenshots/2026-02" base_dir = "C:/Users/Ed/scoop/apps/sharex/current/ShareX/Screenshots/2026-02"
paths = [ paths = []
"C:/Users/Ed/scoop/apps/sharex/current/ShareX/Screenshots/2026-02/python_2026-02-21_16-38-51.png",
]
[discussion] [discussion]
history = [ history = [
"[User]\nAdd support for properly handling files that we have in config with anthropics api (your context will have documentation reguarding it). This will allow us to free up the main markdown file were sending as a message.\nWe also want to make sure the messaging of that giant markdown is chunked as anthropic does have an upper limit per messages.", "[User]\nAdd support for properly handling files that we have in config with anthropics api (your context will have documentation reguarding it). This will allow us to free up the main markdown file were sending as a message.\nWe also want to make sure the messaging of that giant markdown is chunked as anthropic does have an upper limit per messages.",
"[AI]\nGood. Now the big rewrite of `ai_client.py`. The strategy:\n\n- Add a new `_send_anthropic_files` path that uses `file_cache.get_file_id()` to upload files individually as `document`/`image` content blocks, keeping the context message lean\n- The fallback (and also for oversized content) uses chunked text blocks — split the markdown into ≤180k-char chunks (well under the ~200k token limit with headroom for overhead), each sent as a separate `text` content block with `cache_control` on the last one\n- `send()` now accepts an optional `file_items` parameter from aggregate\n- `reset_session()` also resets the file_cache client", "[AI]\nGood. Now the big rewrite of `ai_client.py`. The strategy:\n\n- Add a new `_send_anthropic_files` path that uses `file_cache.get_file_id()` to upload files individually as `document`/`image` content blocks, keeping the context message lean\n- The fallback (and also for oversized content) uses chunked text blocks — split the markdown into ≤180k-char chunks (well under the ~200k token limit with headroom for overhead), each sent as a separate `text` content block with `cache_control` on the last one\n- `send()` now accepts an optional `file_items` parameter from aggregate\n- `reset_session()` also resets the file_cache client",
"[User]\nI will check if this works now.... To test my next message will be a new task naturally...", "[User]\ncontinue. Also got this error when trying to run latest version of this program. However I can use an old version for now.",
"[Ai]\nNow fix `gui.py` — patch the three 2-tuple unpack sites and thread `file_items` through to `send()`:",
"[User]\ncontinue. My bad the error in comms was shown int he screenshot I linked. in the md context. Make sure to update MainContext.md when done.",
"[AI]\nfinished without sending a repose...",
] ]
[ai] [ai]

View File

@@ -16,7 +16,7 @@ DockId=0x00000005,0
[Window][###30] [Window][###30]
Pos=0,654 Pos=0,654
Size=549,696 Size=549,742
Collapsed=0 Collapsed=0
DockId=0x00000025,0 DockId=0x00000025,0
@@ -344,6 +344,66 @@ Size=1743,483
Collapsed=0 Collapsed=0
DockId=0x00000027,0 DockId=0x00000027,0
[Window][###219]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###313]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###402]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###508]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###634]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###763]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###912]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###1031]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###1167]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###1323]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###1482]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###1650]
Pos=1578,868
Size=700,440
Collapsed=0
[Docking][Data] [Docking][Data]
DockSpace ID=0x7C6B3D9B Window=0xA87D555D Pos=0,0 Size=3840,2137 Split=X Selected=0x40484D8F DockSpace ID=0x7C6B3D9B Window=0xA87D555D Pos=0,0 Size=3840,2137 Split=X Selected=0x40484D8F
DockNode ID=0x00000003 Parent=0x7C6B3D9B SizeRef=549,1161 Split=Y Selected=0xEE087978 DockNode ID=0x00000003 Parent=0x7C6B3D9B SizeRef=549,1161 Split=Y Selected=0xEE087978

14
gui.py
View File

@@ -303,6 +303,7 @@ class App:
self.ai_response = "" self.ai_response = ""
self.last_md = "" self.last_md = ""
self.last_md_path: Path | None = None self.last_md_path: Path | None = None
self.last_file_items: list = []
self.send_thread: threading.Thread | None = None self.send_thread: threading.Thread | None = None
self.models_thread: threading.Thread | None = None self.models_thread: threading.Thread | None = None
@@ -349,7 +350,7 @@ class App:
_render_comms_entry("comms_scroll", entry, idx) _render_comms_entry("comms_scroll", entry, idx)
def _rebuild_comms_log(self): def _rebuild_comms_log(self):
"""Full redraw from ai_client.get_comms_log() used after clear/reset.""" """Full redraw from ai_client.get_comms_log() - used after clear/reset."""
if not dpg.does_item_exist("comms_scroll"): if not dpg.does_item_exist("comms_scroll"):
return return
dpg.delete_item("comms_scroll", children_only=True) dpg.delete_item("comms_scroll", children_only=True)
@@ -425,7 +426,7 @@ class App:
"model": self.current_model, "model": self.current_model,
} }
def _do_generate(self) -> tuple[str, Path]: def _do_generate(self) -> tuple[str, Path, list]:
self._flush_to_config() self._flush_to_config()
save_config(self.config) save_config(self.config)
return aggregate.run(self.config) return aggregate.run(self.config)
@@ -576,7 +577,7 @@ class App:
def cb_md_only(self): def cb_md_only(self):
try: try:
md, path = self._do_generate() md, path, _file_items = self._do_generate()
self.last_md = md self.last_md = md
self.last_md_path = path self.last_md_path = path
self._update_status(f"md written: {path.name}") self._update_status(f"md written: {path.name}")
@@ -601,9 +602,10 @@ class App:
if self.send_thread and self.send_thread.is_alive(): if self.send_thread and self.send_thread.is_alive():
return return
try: try:
md, path = self._do_generate() md, path, file_items = self._do_generate()
self.last_md = md self.last_md = md
self.last_md_path = path self.last_md_path = path
self.last_file_items = file_items
except Exception as e: except Exception as e:
self._update_status(f"generate error: {e}") self._update_status(f"generate error: {e}")
return return
@@ -612,9 +614,11 @@ class App:
user_msg = dpg.get_value("ai_input") user_msg = dpg.get_value("ai_input")
base_dir = dpg.get_value("files_base_dir") base_dir = dpg.get_value("files_base_dir")
file_items_snap = self.last_file_items
def do_send(): def do_send():
try: try:
response = ai_client.send(self.last_md, user_msg, base_dir) response = ai_client.send(self.last_md, user_msg, base_dir, file_items_snap)
self._update_response(response) self._update_response(response)
self._update_status("done") self._update_status("done")
except Exception as e: except Exception as e: