This commit is contained in:
2026-02-21 16:51:00 -05:00
parent ee1ee1c77e
commit 330c8604c9
5 changed files with 438 additions and 141 deletions

View File

@@ -1,4 +1,4 @@
# aggregate.py
# aggregate.py
import tomllib
import re
import glob
@@ -135,7 +135,7 @@ def main():
with open("config.toml", "rb") as f:
import tomllib
config = tomllib.load(f)
markdown, output_file = run(config)
markdown, output_file, _ = run(config)
print(f"Written: {output_file}")
if __name__ == "__main__":

View File

@@ -1,4 +1,4 @@
# ai_client.py
# ai_client.py
import tomllib
import json
import datetime
@@ -29,6 +29,12 @@ tool_log_callback = None
MAX_TOOL_ROUNDS = 5
# Maximum characters per text chunk sent to Anthropic.
# Anthropic's limit is ~200k tokens; we use 180k chars as a safe ceiling
# (1 token ~ 3-4 chars, so 180k chars ~ 45-60k tokens, well within limits
# even for very large aggregated markdown files).
_ANTHROPIC_CHUNK_SIZE = 180_000
# Anthropic system prompt - sent with cache_control so it is cached after the
# first request and reused on every subsequent call within the TTL window.
_ANTHROPIC_SYSTEM = (
@@ -170,6 +176,7 @@ def reset_session():
_gemini_chat = None
_anthropic_client = None
_anthropic_history = []
file_cache.reset_client()
# ------------------------------------------------------------------ model listing
@@ -379,31 +386,32 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str) -> str:
# ------------------------------------------------------------------ anthropic
#
# Caching strategy (Anthropic prompt caching):
# Sending strategy for Anthropic:
#
# The Anthropic API caches a contiguous prefix of the input. To maximise
# cache hits we structure every request as follows:
# PRIMARY PATH (_send_anthropic_files) - used when file_items are provided
# ============
# Each file from config is uploaded via the Anthropic Files API
# (file_cache.get_file_id handles upload + caching by mtime/size).
# Files are sent as individual document/image content blocks in the first
# user message. The discussion history section of the markdown (which is
# small and changes each session) is still sent as a text block.
# This keeps the per-message payload lean and lets the Files API handle
# the heavy lifting of large source files.
#
# system (array form):
# [0] _ANTHROPIC_SYSTEM text <- cache_control: ephemeral
# Stable across the whole session; cached after the first request.
# FALLBACK PATH (_send_anthropic_chunked) - used when no file_items, or if
# ============ the Files API path fails
# The full aggregated markdown is split into <=_ANTHROPIC_CHUNK_SIZE char
# chunks and sent as separate text content blocks. cache_control:ephemeral
# is placed on the LAST chunk so the whole context prefix is cached together.
#
# tools:
# Last tool has cache_control: ephemeral.
# Stable across the whole session; cached together with the system prompt.
# Caching strategy (Anthropic prompt caching):
# - System prompt: cache_control:ephemeral on the text block
# - Last tool in _ANTHROPIC_TOOLS: cache_control:ephemeral
# - Context content blocks: cache_control:ephemeral on the last block
# These three form a stable cached prefix that survives across turns.
#
# messages[0] (first user turn ever, or re-sent each call):
# content[0]: <context> block <- cache_control: ephemeral
# The aggregated markdown. Changes only when the user regenerates.
# A new cache entry is created when it changes; otherwise it's a hit.
# content[1]: user question <- no cache_control (varies every turn)
#
# Subsequent turns (tool results, follow-up questions) are appended to
# _anthropic_history normally without extra cache markers.
#
# Token cost of cache creation is ~25 % more than a normal input token, but
# cache reads cost ~10 % of a normal input token, so steady-state (many
# rounds / sends per session) is much cheaper.
# Token cost: cache creation ~25% more than normal input; cache reads ~10%
# of normal input. Steady-state use is much cheaper after the first request.
def _ensure_anthropic_client():
global _anthropic_client
@@ -413,127 +421,337 @@ def _ensure_anthropic_client():
_anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"])
def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
global _anthropic_history
import anthropic
def _chunk_text(text: str, chunk_size: int) -> list[str]:
"""Split text into chunks of at most chunk_size characters."""
return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
try:
_ensure_anthropic_client()
# Build the user content: context block (cached) + question (not cached).
# The cache anchor is placed on the context block so the entire prefix
# (system + tools + context) is eligible for caching.
user_content = [
{
"type": "text",
"text": f"<context>\n{md_content}\n</context>",
"cache_control": {"type": "ephemeral"},
},
{
"type": "text",
"text": user_message,
},
]
def _build_chunked_context_blocks(md_content: str) -> list[dict]:
"""
Split md_content into <=_ANTHROPIC_CHUNK_SIZE char chunks and return
a list of Anthropic text content blocks. cache_control:ephemeral is
placed only on the LAST block so the whole prefix is cached as one unit.
"""
chunks = _chunk_text(md_content, _ANTHROPIC_CHUNK_SIZE)
blocks = []
for i, chunk in enumerate(chunks):
block: dict = {"type": "text", "text": chunk}
if i == len(chunks) - 1:
block["cache_control"] = {"type": "ephemeral"}
blocks.append(block)
return blocks
_anthropic_history.append({"role": "user", "content": user_content})
_append_comms("OUT", "request", {
"message": f"<context>\n{md_content}\n</context>\n\n{user_message}",
def _build_files_context_blocks(
md_header: str,
file_items: list[dict],
screenshot_items: list[dict] | None = None,
) -> list[dict]:
"""
Build content blocks for the Files API path.
- md_header : the Discussion History section text (small, sent as text block)
- file_items : list of dicts from aggregate.build_file_items()
each has: path (Path|None), entry (str), content (str), error (bool)
- screenshot_items: list of screenshot paths (Path) to include as image blocks
Returns a list of Anthropic content blocks.
The last block gets cache_control:ephemeral.
"""
blocks: list[dict] = []
# Discussion history / header as a text block (small, always inline)
if md_header.strip():
blocks.append({
"type": "text",
"text": md_header,
})
for round_idx in range(MAX_TOOL_ROUNDS):
response = _anthropic_client.messages.create(
model=_model,
max_tokens=8096,
system=[
{
"type": "text",
"text": _ANTHROPIC_SYSTEM,
"cache_control": {"type": "ephemeral"},
}
],
tools=_ANTHROPIC_TOOLS,
messages=_anthropic_history,
)
# One document/image block per file
for item in file_items:
path: Path | None = item.get("path")
entry: str = item.get("entry", "")
error: bool = item.get("error", False)
_anthropic_history.append({
"role": "assistant",
"content": response.content
if error or path is None:
# Fall back to inline text for error entries
blocks.append({
"type": "text",
"text": f"### `{entry}`\n\nERROR: {item.get('content', 'unknown error')}",
})
continue
block_type = file_cache.content_block_type(path)
if block_type == "unsupported":
# Inline as plain text
blocks.append({
"type": "text",
"text": f"### `{entry}`\n\n```\n{item.get('content', '')}\n```",
})
continue
# Try to get/upload via Files API
file_id = file_cache.get_file_id(path)
if file_id is None:
# Unsupported or missing - inline fallback
blocks.append({
"type": "text",
"text": f"### `{entry}`\n\n```\n{item.get('content', '')}\n```",
})
continue
if block_type == "document":
blocks.append({
"type": "document",
"source": {
"type": "file",
"file_id": file_id,
},
"title": path.name,
"citations": {"enabled": False},
})
elif block_type == "image":
blocks.append({
"type": "image",
"source": {
"type": "file",
"file_id": file_id,
},
})
text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text]
tool_use_blocks = [
{"id": b.id, "name": b.name, "input": b.input}
for b in response.content
if b.type == "tool_use"
]
# Collect usage; cache fields are present when caching is active
usage_dict: dict = {}
if response.usage:
usage_dict["input_tokens"] = response.usage.input_tokens
usage_dict["output_tokens"] = response.usage.output_tokens
cache_creation = getattr(response.usage, "cache_creation_input_tokens", None)
cache_read = getattr(response.usage, "cache_read_input_tokens", None)
if cache_creation is not None:
usage_dict["cache_creation_input_tokens"] = cache_creation
if cache_read is not None:
usage_dict["cache_read_input_tokens"] = cache_read
_append_comms("IN", "response", {
"round": round_idx,
"stop_reason": response.stop_reason,
"text": "\n".join(text_blocks),
"tool_calls": tool_use_blocks,
"usage": usage_dict,
# Screenshots as image blocks
for item in (screenshot_items or []):
path = item.get("path")
if path is None:
continue
block_type = file_cache.content_block_type(path)
if block_type != "image":
continue
file_id = file_cache.get_file_id(path)
if file_id:
blocks.append({
"type": "image",
"source": {
"type": "file",
"file_id": file_id,
},
})
if response.stop_reason != "tool_use":
break
# Put cache_control on the last block
if blocks:
blocks[-1]["cache_control"] = {"type": "ephemeral"}
tool_results = []
for block in response.content:
if block.type == "tool_use" and block.name == TOOL_NAME:
script = block.input.get("script", "")
_append_comms("OUT", "tool_call", {
"name": TOOL_NAME,
"id": block.id,
"script": script,
})
output = _run_script(script, base_dir)
_append_comms("IN", "tool_result", {
"name": TOOL_NAME,
"id": block.id,
"output": output,
})
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
return blocks
if not tool_results:
break
_anthropic_history.append({
"role": "user",
"content": tool_results,
})
def _run_anthropic_loop(
user_content: list[dict],
user_message: str,
base_dir: str,
log_summary: str,
) -> str:
"""
Core Anthropic message loop shared by both send paths.
Appends the user turn to _anthropic_history, runs the tool loop,
and returns the final assistant text.
"""
global _anthropic_history
_append_comms("OUT", "tool_result_send", {
"results": [
{"tool_use_id": r["tool_use_id"], "content": r["content"]}
for r in tool_results
],
})
_anthropic_history.append({"role": "user", "content": user_content})
text_parts = [
block.text
for block in response.content
if hasattr(block, "text") and block.text
_append_comms("OUT", "request", {
"message": log_summary,
})
for round_idx in range(MAX_TOOL_ROUNDS):
response = _anthropic_client.messages.create(
model=_model,
max_tokens=8096,
system=[
{
"type": "text",
"text": _ANTHROPIC_SYSTEM,
"cache_control": {"type": "ephemeral"},
}
],
tools=_ANTHROPIC_TOOLS,
messages=_anthropic_history,
extra_headers={"anthropic-beta": "files-api-2025-04-14"},
)
_anthropic_history.append({
"role": "assistant",
"content": response.content
})
text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text]
tool_use_blocks = [
{"id": b.id, "name": b.name, "input": b.input}
for b in response.content
if b.type == "tool_use"
]
return "\n".join(text_parts)
usage_dict: dict = {}
if response.usage:
usage_dict["input_tokens"] = response.usage.input_tokens
usage_dict["output_tokens"] = response.usage.output_tokens
cache_creation = getattr(response.usage, "cache_creation_input_tokens", None)
cache_read = getattr(response.usage, "cache_read_input_tokens", None)
if cache_creation is not None:
usage_dict["cache_creation_input_tokens"] = cache_creation
if cache_read is not None:
usage_dict["cache_read_input_tokens"] = cache_read
_append_comms("IN", "response", {
"round": round_idx,
"stop_reason": response.stop_reason,
"text": "\n".join(text_blocks),
"tool_calls": tool_use_blocks,
"usage": usage_dict,
})
if response.stop_reason != "tool_use":
break
tool_results = []
for block in response.content:
if block.type == "tool_use" and block.name == TOOL_NAME:
script = block.input.get("script", "")
_append_comms("OUT", "tool_call", {
"name": TOOL_NAME,
"id": block.id,
"script": script,
})
output = _run_script(script, base_dir)
_append_comms("IN", "tool_result", {
"name": TOOL_NAME,
"id": block.id,
"output": output,
})
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": output,
})
if not tool_results:
break
_anthropic_history.append({
"role": "user",
"content": tool_results,
})
_append_comms("OUT", "tool_result_send", {
"results": [
{"tool_use_id": r["tool_use_id"], "content": r["content"]}
for r in tool_results
],
})
text_parts = [
block.text
for block in response.content
if hasattr(block, "text") and block.text
]
return "\n".join(text_parts)
def _send_anthropic_files(
md_content: str,
user_message: str,
base_dir: str,
file_items: list[dict],
) -> str:
"""
Files API send path. Uploads each file individually and sends document/image
blocks instead of inlining everything as text. Falls back to chunked text
on any upload error.
The discussion history section of md_content is extracted and sent inline
as a text block (it's small and changes each session so not worth uploading).
"""
import anthropic
_ensure_anthropic_client()
# Extract just the Discussion History section to send inline.
# Everything else comes via file blocks.
discussion_section = ""
files_marker = "\n\n---\n\n## Files\n\n"
split_idx = md_content.find(files_marker)
if split_idx != -1:
discussion_section = md_content[:split_idx]
else:
# No files section - the whole thing is discussion/screenshots
discussion_section = md_content
try:
context_blocks = _build_files_context_blocks(discussion_section, file_items)
except Exception as upload_err:
_append_comms("OUT", "request", {
"message": f"[Files API upload failed: {upload_err}] falling back to chunked text",
})
return _send_anthropic_chunked(md_content, user_message, base_dir)
user_content = context_blocks + [
{
"type": "text",
"text": user_message,
}
]
log_summary = (
f"[Files API: {len(file_items)} file(s) as document/image blocks, "
f"discussion section {len(discussion_section)} chars inline]\n\n{user_message}"
)
return _run_anthropic_loop(user_content, user_message, base_dir, log_summary)
def _send_anthropic_chunked(md_content: str, user_message: str, base_dir: str) -> str:
"""
Chunked text fallback path. Splits md_content into <=_ANTHROPIC_CHUNK_SIZE
char blocks, sends them all as text content blocks with cache_control on
the last one, then appends the user question.
"""
_ensure_anthropic_client()
context_blocks = _build_chunked_context_blocks(md_content)
user_content = context_blocks + [
{
"type": "text",
"text": user_message,
}
]
n_chunks = len(context_blocks)
log_summary = (
f"[Chunked text: {n_chunks} chunk(s), "
f"{len(md_content)} chars total]\n\n{user_message}"
)
return _run_anthropic_loop(user_content, user_message, base_dir, log_summary)
def _send_anthropic(
md_content: str,
user_message: str,
base_dir: str,
file_items: list[dict] | None = None,
) -> str:
"""
Entry point for Anthropic sends. Routes to the Files API path when
file_items are provided, otherwise falls back to chunked text.
"""
try:
if file_items:
return _send_anthropic_files(md_content, user_message, base_dir, file_items)
else:
return _send_anthropic_chunked(md_content, user_message, base_dir)
except ProviderError:
raise
except Exception as exc:
@@ -541,10 +759,24 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str) -> str:
# ------------------------------------------------------------------ unified send
def send(md_content: str, user_message: str, base_dir: str = ".") -> str:
def send(
md_content: str,
user_message: str,
base_dir: str = ".",
file_items: list[dict] | None = None,
) -> str:
"""
Send a message to the active provider.
md_content : aggregated markdown string from aggregate.run()
user_message: the user's question / instruction
base_dir : project base directory (for PowerShell tool calls)
file_items : optional list of file dicts from aggregate.build_file_items();
when provided and provider is anthropic, files are uploaded
via the Files API rather than inlined as text
"""
if _provider == "gemini":
return _send_gemini(md_content, user_message, base_dir)
elif _provider == "anthropic":
return _send_anthropic(md_content, user_message, base_dir)
raise ValueError(f"unknown provider: {_provider}")
return _send_anthropic(md_content, user_message, base_dir, file_items)
raise ValueError(f"unknown provider: {_provider}")

View File

@@ -19,15 +19,16 @@ paths = [
[screenshots]
base_dir = "C:/Users/Ed/scoop/apps/sharex/current/ShareX/Screenshots/2026-02"
paths = [
"C:/Users/Ed/scoop/apps/sharex/current/ShareX/Screenshots/2026-02/python_2026-02-21_16-38-51.png",
]
paths = []
[discussion]
history = [
"[User]\nAdd support for properly handling files that we have in config with anthropics api (your context will have documentation reguarding it). This will allow us to free up the main markdown file were sending as a message.\nWe also want to make sure the messaging of that giant markdown is chunked as anthropic does have an upper limit per messages.",
"[AI]\nGood. Now the big rewrite of `ai_client.py`. The strategy:\n\n- Add a new `_send_anthropic_files` path that uses `file_cache.get_file_id()` to upload files individually as `document`/`image` content blocks, keeping the context message lean\n- The fallback (and also for oversized content) uses chunked text blocks — split the markdown into ≤180k-char chunks (well under the ~200k token limit with headroom for overhead), each sent as a separate `text` content block with `cache_control` on the last one\n- `send()` now accepts an optional `file_items` parameter from aggregate\n- `reset_session()` also resets the file_cache client",
"[User]\nI will check if this works now.... To test my next message will be a new task naturally...",
"[User]\ncontinue. Also got this error when trying to run latest version of this program. However I can use an old version for now.",
"[Ai]\nNow fix `gui.py` — patch the three 2-tuple unpack sites and thread `file_items` through to `send()`:",
"[User]\ncontinue. My bad the error in comms was shown int he screenshot I linked. in the md context. Make sure to update MainContext.md when done.",
"[AI]\nfinished without sending a repose...",
]
[ai]

View File

@@ -16,7 +16,7 @@ DockId=0x00000005,0
[Window][###30]
Pos=0,654
Size=549,696
Size=549,742
Collapsed=0
DockId=0x00000025,0
@@ -344,6 +344,66 @@ Size=1743,483
Collapsed=0
DockId=0x00000027,0
[Window][###219]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###313]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###402]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###508]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###634]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###763]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###912]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###1031]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###1167]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###1323]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###1482]
Pos=1578,868
Size=700,440
Collapsed=0
[Window][###1650]
Pos=1578,868
Size=700,440
Collapsed=0
[Docking][Data]
DockSpace ID=0x7C6B3D9B Window=0xA87D555D Pos=0,0 Size=3840,2137 Split=X Selected=0x40484D8F
DockNode ID=0x00000003 Parent=0x7C6B3D9B SizeRef=549,1161 Split=Y Selected=0xEE087978

14
gui.py
View File

@@ -303,6 +303,7 @@ class App:
self.ai_response = ""
self.last_md = ""
self.last_md_path: Path | None = None
self.last_file_items: list = []
self.send_thread: threading.Thread | None = None
self.models_thread: threading.Thread | None = None
@@ -349,7 +350,7 @@ class App:
_render_comms_entry("comms_scroll", entry, idx)
def _rebuild_comms_log(self):
"""Full redraw from ai_client.get_comms_log() used after clear/reset."""
"""Full redraw from ai_client.get_comms_log() - used after clear/reset."""
if not dpg.does_item_exist("comms_scroll"):
return
dpg.delete_item("comms_scroll", children_only=True)
@@ -425,7 +426,7 @@ class App:
"model": self.current_model,
}
def _do_generate(self) -> tuple[str, Path]:
def _do_generate(self) -> tuple[str, Path, list]:
self._flush_to_config()
save_config(self.config)
return aggregate.run(self.config)
@@ -576,7 +577,7 @@ class App:
def cb_md_only(self):
try:
md, path = self._do_generate()
md, path, _file_items = self._do_generate()
self.last_md = md
self.last_md_path = path
self._update_status(f"md written: {path.name}")
@@ -601,9 +602,10 @@ class App:
if self.send_thread and self.send_thread.is_alive():
return
try:
md, path = self._do_generate()
md, path, file_items = self._do_generate()
self.last_md = md
self.last_md_path = path
self.last_file_items = file_items
except Exception as e:
self._update_status(f"generate error: {e}")
return
@@ -612,9 +614,11 @@ class App:
user_msg = dpg.get_value("ai_input")
base_dir = dpg.get_value("files_base_dir")
file_items_snap = self.last_file_items
def do_send():
try:
response = ai_client.send(self.last_md, user_msg, base_dir)
response = ai_client.send(self.last_md, user_msg, base_dir, file_items_snap)
self._update_response(response)
self._update_status("done")
except Exception as e: