add file cache
This commit is contained in:
37
aggregate.py
37
aggregate.py
@@ -1,4 +1,4 @@
|
||||
# aggregate.py
|
||||
# aggregate.py
|
||||
import tomllib
|
||||
import re
|
||||
import glob
|
||||
@@ -74,6 +74,36 @@ def build_screenshots_section(base_dir: Path, screenshots: list[str]) -> str:
|
||||
sections.append(f"### `{original}`\n\n})")
|
||||
return "\n\n---\n\n".join(sections)
|
||||
|
||||
|
||||
def build_file_items(base_dir: Path, files: list[str]) -> list[dict]:
|
||||
"""
|
||||
Return a list of dicts describing each file, for use by ai_client when it
|
||||
wants to upload individual files rather than inline everything as markdown.
|
||||
|
||||
Each dict has:
|
||||
path : Path (resolved absolute path)
|
||||
entry : str (original config entry string)
|
||||
content : str (file text, or error string)
|
||||
error : bool
|
||||
"""
|
||||
items = []
|
||||
for entry in files:
|
||||
paths = resolve_paths(base_dir, entry)
|
||||
if not paths:
|
||||
items.append({"path": None, "entry": entry, "content": f"ERROR: no files matched: {entry}", "error": True})
|
||||
continue
|
||||
for path in paths:
|
||||
try:
|
||||
content = path.read_text(encoding="utf-8")
|
||||
error = False
|
||||
except FileNotFoundError:
|
||||
content = f"ERROR: file not found: {path}"
|
||||
error = True
|
||||
except Exception as e:
|
||||
content = f"ERROR: {e}"
|
||||
error = True
|
||||
items.append({"path": path, "entry": entry, "content": content, "error": error})
|
||||
return items
|
||||
def build_markdown(base_dir: Path, files: list[str], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
|
||||
parts = []
|
||||
if history:
|
||||
@@ -98,7 +128,8 @@ def run(config: dict) -> tuple[str, Path]:
|
||||
output_file = output_dir / f"{namespace}_{increment:03d}.md"
|
||||
markdown = build_markdown(base_dir, files, screenshot_base_dir, screenshots, history)
|
||||
output_file.write_text(markdown, encoding="utf-8")
|
||||
return markdown, output_file
|
||||
file_items = build_file_items(base_dir, files)
|
||||
return markdown, output_file, file_items
|
||||
|
||||
def main():
|
||||
with open("config.toml", "rb") as f:
|
||||
@@ -109,3 +140,5 @@ def main():
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ import tomllib
|
||||
import json
|
||||
import datetime
|
||||
from pathlib import Path
|
||||
import file_cache
|
||||
|
||||
_provider: str = "gemini"
|
||||
_model: str = "gemini-2.0-flash"
|
||||
@@ -546,3 +547,4 @@ def send(md_content: str, user_message: str, base_dir: str = ".") -> str:
|
||||
elif _provider == "anthropic":
|
||||
return _send_anthropic(md_content, user_message, base_dir)
|
||||
raise ValueError(f"unknown provider: {_provider}")
|
||||
|
||||
|
||||
10
config.toml
10
config.toml
@@ -14,6 +14,7 @@ paths = [
|
||||
"MainContext.md",
|
||||
"C:/projects/manual_slop/shell_runner.py",
|
||||
"C:/projects/manual_slop/session_logger.py",
|
||||
"C:/projects/manual_slop/docs/anthropic_api_files.md",
|
||||
]
|
||||
|
||||
[screenshots]
|
||||
@@ -22,12 +23,9 @@ paths = []
|
||||
|
||||
[discussion]
|
||||
history = [
|
||||
"[User]\nAdd rich visualization for the comms history's json, we shouldn't see the raw text unelss we don't have the json object type detected. Also update MainContext.md with latest state of this codebase.",
|
||||
"[Claude]\nThe file uses CRLF. I'll use a Python script to do the targeted replacement reliably:",
|
||||
"[API]\nERROR: Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'prompt is too long: 201675 tokens > 200000 maximum'}, 'request_id': 'req_011CYMuPtmZC7akLfcaT7b7r'}",
|
||||
"[User]\nA _patch_gui.py was left over as an artifact. You were in the middle of making edits before a error reponse occured.\nTo save space on the message I removed the prompt caching documentation from the context.\nWe may have to change how messages are sent or discussion so that its chunked appropriately.",
|
||||
"[Claude]\n`gui.py` is complete with all the rich rendering code. Now let me delete the artifact and update `MainContext.md`:",
|
||||
"[User]\nartifact deleted MainContext.md has not been updated though. (Removed anthropic documetantion from context, if you want any docs back you can look up docs in ./docs)",
|
||||
"[User]\nAdd support for properly handling files that we have in config with anthropics api (your context will have documentation reguarding it). This will allow us to free up the main markdown file were sending as a message.\nWe also want to make sure the messaging of that giant markdown is chunked as anthropic does have an upper limit per messages.",
|
||||
"[Claude]\nGood. Now the big rewrite of `ai_client.py`. The strategy:\n\n- Add a new `_send_anthropic_files` path that uses `file_cache.get_file_id()` to upload files individually as `document`/`image` content blocks, keeping the context message lean\n- The fallback (and also for oversized content) uses chunked text blocks — split the markdown into ≤180k-char chunks (well under the ~200k token limit with headroom for overhead), each sent as a separate `text` content block with `cache_control` on the last one\n- `send()` now accepts an optional `file_items` parameter from aggregate\n- `reset_session()` also resets the file_cache client",
|
||||
"[User]\nI will check if this works now....",
|
||||
]
|
||||
|
||||
[ai]
|
||||
|
||||
1149
docs/anthropic_api_files.md
Normal file
1149
docs/anthropic_api_files.md
Normal file
File diff suppressed because it is too large
Load Diff
@@ -16,9 +16,9 @@ DockId=0x00000005,0
|
||||
|
||||
[Window][###30]
|
||||
Pos=0,654
|
||||
Size=549,696
|
||||
Size=549,742
|
||||
Collapsed=0
|
||||
DockId=0x00000021,0
|
||||
DockId=0x00000025,0
|
||||
|
||||
[Window][###66]
|
||||
Pos=0,1491
|
||||
@@ -42,7 +42,7 @@ DockId=0x0000000D,0
|
||||
Pos=378,494
|
||||
Size=829,1643
|
||||
Collapsed=0
|
||||
DockId=0x00000023,0
|
||||
DockId=0x0000001F,0
|
||||
|
||||
[Window][###103]
|
||||
Pos=1613,1330
|
||||
@@ -103,13 +103,13 @@ DockId=0x00000014,0
|
||||
Pos=2531,0
|
||||
Size=1309,1690
|
||||
Collapsed=0
|
||||
DockId=0x00000023,0
|
||||
DockId=0x0000001F,0
|
||||
|
||||
[Window][###106]
|
||||
Pos=366,427
|
||||
Size=847,1710
|
||||
Pos=551,0
|
||||
Size=1544,2137
|
||||
Collapsed=0
|
||||
DockId=0x00000012,0
|
||||
DockId=0x00000012,2
|
||||
|
||||
[Window][###100]
|
||||
Pos=2622,0
|
||||
@@ -146,7 +146,7 @@ Collapsed=0
|
||||
Pos=551,0
|
||||
Size=1060,2137
|
||||
Collapsed=0
|
||||
DockId=0x00000023,0
|
||||
DockId=0x0000001F,0
|
||||
|
||||
[Window][###78]
|
||||
Pos=0,1422
|
||||
@@ -164,7 +164,7 @@ DockId=0x00000015,0
|
||||
Pos=551,0
|
||||
Size=1060,2137
|
||||
Collapsed=0
|
||||
DockId=0x00000023,1
|
||||
DockId=0x0000001F,1
|
||||
|
||||
[Window][###110]
|
||||
Pos=2438,0
|
||||
@@ -174,9 +174,9 @@ DockId=0x00000016,0
|
||||
|
||||
[Window][###112]
|
||||
Pos=551,0
|
||||
Size=1060,2137
|
||||
Size=1544,2137
|
||||
Collapsed=0
|
||||
DockId=0x00000023,2
|
||||
DockId=0x00000012,1
|
||||
|
||||
[Window][###145]
|
||||
Pos=1578,868
|
||||
@@ -224,7 +224,7 @@ DockId=0x00000022,0
|
||||
Pos=1400,0
|
||||
Size=1220,1412
|
||||
Collapsed=0
|
||||
DockId=0x00000023,0
|
||||
DockId=0x0000001F,0
|
||||
|
||||
[Window][###92]
|
||||
Pos=551,0
|
||||
@@ -284,6 +284,66 @@ Pos=1578,868
|
||||
Size=700,440
|
||||
Collapsed=0
|
||||
|
||||
[Window][###596]
|
||||
Pos=1578,868
|
||||
Size=700,440
|
||||
Collapsed=0
|
||||
|
||||
[Window][###691]
|
||||
Pos=1578,868
|
||||
Size=700,440
|
||||
Collapsed=0
|
||||
|
||||
[Window][###803]
|
||||
Pos=1578,868
|
||||
Size=700,440
|
||||
Collapsed=0
|
||||
|
||||
[Window][###935]
|
||||
Pos=1578,868
|
||||
Size=700,440
|
||||
Collapsed=0
|
||||
|
||||
[Window][###1073]
|
||||
Pos=1578,868
|
||||
Size=700,440
|
||||
Collapsed=0
|
||||
|
||||
[Window][###1217]
|
||||
Pos=1578,868
|
||||
Size=700,440
|
||||
Collapsed=0
|
||||
|
||||
[Window][###72]
|
||||
Pos=0,1398
|
||||
Size=549,739
|
||||
Collapsed=0
|
||||
DockId=0x00000026,0
|
||||
|
||||
[Window][###82]
|
||||
Pos=2097,0
|
||||
Size=1743,1787
|
||||
Collapsed=0
|
||||
DockId=0x0000001F,1
|
||||
|
||||
[Window][###89]
|
||||
Pos=551,0
|
||||
Size=1544,2137
|
||||
Collapsed=0
|
||||
DockId=0x00000012,0
|
||||
|
||||
[Window][###97]
|
||||
Pos=2097,1789
|
||||
Size=1743,348
|
||||
Collapsed=0
|
||||
DockId=0x00000020,0
|
||||
|
||||
[Window][###104]
|
||||
Pos=2097,0
|
||||
Size=1743,1787
|
||||
Collapsed=0
|
||||
DockId=0x0000001F,0
|
||||
|
||||
[Docking][Data]
|
||||
DockSpace ID=0x7C6B3D9B Window=0xA87D555D Pos=0,0 Size=3840,2137 Split=X Selected=0x40484D8F
|
||||
DockNode ID=0x00000003 Parent=0x7C6B3D9B SizeRef=549,1161 Split=Y Selected=0xEE087978
|
||||
@@ -291,7 +351,9 @@ DockSpace ID=0x7C6B3D9B Window=0xA87D555D Pos=0,0 Size=3840,2137
|
||||
DockNode ID=0x00000006 Parent=0x00000003 SizeRef=235,805 Split=Y Selected=0x5F94F9BD
|
||||
DockNode ID=0x00000009 Parent=0x00000006 SizeRef=235,453 Split=Y Selected=0x5F94F9BD
|
||||
DockNode ID=0x0000001D Parent=0x00000009 SizeRef=364,766 Split=Y Selected=0x5F94F9BD
|
||||
DockNode ID=0x00000021 Parent=0x0000001D SizeRef=549,696 Selected=0x5F94F9BD
|
||||
DockNode ID=0x00000021 Parent=0x0000001D SizeRef=549,696 Split=Y Selected=0x5F94F9BD
|
||||
DockNode ID=0x00000025 Parent=0x00000021 SizeRef=549,742 Selected=0x5F94F9BD
|
||||
DockNode ID=0x00000026 Parent=0x00000021 SizeRef=549,739 Selected=0xBEC5E8CB
|
||||
DockNode ID=0x00000022 Parent=0x0000001D SizeRef=549,785 Selected=0x0CE534DB
|
||||
DockNode ID=0x0000001E Parent=0x00000009 SizeRef=364,715 Selected=0xF475F06A
|
||||
DockNode ID=0x0000000A Parent=0x00000006 SizeRef=235,350 Selected=0x80199DAE
|
||||
@@ -299,15 +361,17 @@ DockSpace ID=0x7C6B3D9B Window=0xA87D555D Pos=0,0 Size=3840,2137
|
||||
DockNode ID=0x00000001 Parent=0x00000004 SizeRef=1060,1161 Split=Y Selected=0x40484D8F
|
||||
DockNode ID=0x00000007 Parent=0x00000001 SizeRef=595,492 Selected=0xBA13FCDE
|
||||
DockNode ID=0x00000008 Parent=0x00000001 SizeRef=595,1643 Split=X Selected=0x40484D8F
|
||||
DockNode ID=0x0000000F Parent=0x00000008 SizeRef=847,2137 Split=Y Selected=0x07E8375F
|
||||
DockNode ID=0x0000000F Parent=0x00000008 SizeRef=1544,2137 Split=Y Selected=0x07E8375F
|
||||
DockNode ID=0x00000011 Parent=0x0000000F SizeRef=835,425 Selected=0x72F373AE
|
||||
DockNode ID=0x00000012 Parent=0x0000000F SizeRef=835,1710 Selected=0x8B149E2A
|
||||
DockNode ID=0x00000010 Parent=0x00000008 SizeRef=2625,2137 Split=Y Selected=0xCE7F911A
|
||||
DockNode ID=0x00000012 Parent=0x0000000F SizeRef=835,1710 Selected=0x07E8375F
|
||||
DockNode ID=0x00000010 Parent=0x00000008 SizeRef=1743,2137 Split=Y Selected=0xCE7F911A
|
||||
DockNode ID=0x00000013 Parent=0x00000010 SizeRef=1967,1690 Split=X Selected=0xCE7F911A
|
||||
DockNode ID=0x00000017 Parent=0x00000013 SizeRef=1314,1749 Selected=0x4B454E0B
|
||||
DockNode ID=0x00000018 Parent=0x00000013 SizeRef=1309,1749 Split=Y Selected=0x88A8C2FF
|
||||
DockNode ID=0x00000019 Parent=0x00000018 SizeRef=2440,1412 Split=X Selected=0x88A8C2FF
|
||||
DockNode ID=0x00000023 Parent=0x00000019 SizeRef=1220,737 CentralNode=1 Selected=0xFDB3860E
|
||||
DockNode ID=0x00000023 Parent=0x00000019 SizeRef=1220,737 Split=Y Selected=0x7D28643F
|
||||
DockNode ID=0x0000001F Parent=0x00000023 SizeRef=2440,1787 CentralNode=1 Selected=0x4F935A1E
|
||||
DockNode ID=0x00000020 Parent=0x00000023 SizeRef=2440,348 Selected=0x4C2F06CB
|
||||
DockNode ID=0x00000024 Parent=0x00000019 SizeRef=1218,737 Selected=0x88A8C2FF
|
||||
DockNode ID=0x0000001A Parent=0x00000018 SizeRef=2440,723 Selected=0x3A881EEF
|
||||
DockNode ID=0x00000014 Parent=0x00000010 SizeRef=1967,445 Selected=0xC36FF36B
|
||||
|
||||
193
file_cache.py
Normal file
193
file_cache.py
Normal file
@@ -0,0 +1,193 @@
|
||||
# file_cache.py
|
||||
"""
|
||||
Persistent cache of Anthropic Files API uploads.
|
||||
|
||||
Maps (absolute_path, mtime_ns, size_bytes) -> file_id so we only upload each
|
||||
file once and reuse the file_id on subsequent sends. If the file has changed
|
||||
on disk the old file_id is deleted from the API and a new one is uploaded.
|
||||
|
||||
Cache is stored as JSON at ./logs/file_cache.json so it survives restarts.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
_CACHE_PATH = Path("./logs/file_cache.json")
|
||||
|
||||
# in-memory dict: abs_path_str -> {"file_id": str, "mtime_ns": int, "size": int}
|
||||
_cache: dict[str, dict] = {}
|
||||
_cache_loaded = False
|
||||
|
||||
_anthropic_client = None # set by _ensure_client()
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ persistence
|
||||
|
||||
def _load_cache():
|
||||
global _cache, _cache_loaded
|
||||
if _cache_loaded:
|
||||
return
|
||||
_cache_loaded = True
|
||||
if _CACHE_PATH.exists():
|
||||
try:
|
||||
_cache = json.loads(_CACHE_PATH.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
_cache = {}
|
||||
else:
|
||||
_cache = {}
|
||||
|
||||
|
||||
def _save_cache():
|
||||
_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
_CACHE_PATH.write_text(json.dumps(_cache, indent=2, ensure_ascii=False), encoding="utf-8")
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ anthropic client
|
||||
|
||||
def _ensure_client():
|
||||
global _anthropic_client
|
||||
if _anthropic_client is not None:
|
||||
return _anthropic_client
|
||||
import tomllib
|
||||
import anthropic
|
||||
with open("credentials.toml", "rb") as f:
|
||||
creds = tomllib.load(f)
|
||||
_anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"])
|
||||
return _anthropic_client
|
||||
|
||||
|
||||
def reset_client():
|
||||
"""Called when the main ai_client resets its session."""
|
||||
global _anthropic_client
|
||||
_anthropic_client = None
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ MIME helpers
|
||||
|
||||
_MIME_BY_EXT = {
|
||||
".pdf": "application/pdf",
|
||||
".txt": "text/plain",
|
||||
".md": "text/plain",
|
||||
".py": "text/plain",
|
||||
".toml": "text/plain",
|
||||
".json": "text/plain",
|
||||
".yaml": "text/plain",
|
||||
".yml": "text/plain",
|
||||
".ini": "text/plain",
|
||||
".ps1": "text/plain",
|
||||
".csv": "text/plain",
|
||||
".log": "text/plain",
|
||||
".png": "image/png",
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
}
|
||||
|
||||
# File types that can be sent as 'document' blocks
|
||||
_DOCUMENT_MIMES = {"application/pdf", "text/plain"}
|
||||
# File types that can be sent as 'image' blocks
|
||||
_IMAGE_MIMES = {"image/png", "image/jpeg", "image/gif", "image/webp"}
|
||||
|
||||
|
||||
def _mime_for(path: Path) -> str:
|
||||
return _MIME_BY_EXT.get(path.suffix.lower(), "text/plain")
|
||||
|
||||
|
||||
def content_block_type(path: Path) -> str:
|
||||
"""Returns 'document', 'image', or 'unsupported'."""
|
||||
mime = _mime_for(path)
|
||||
if mime in _DOCUMENT_MIMES:
|
||||
return "document"
|
||||
if mime in _IMAGE_MIMES:
|
||||
return "image"
|
||||
return "unsupported"
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ upload / cache logic
|
||||
|
||||
def _stat(path: Path) -> tuple[int, int]:
|
||||
"""Returns (mtime_ns, size_bytes). Raises if file missing."""
|
||||
st = path.stat()
|
||||
return int(st.st_mtime_ns), int(st.st_size)
|
||||
|
||||
|
||||
def _delete_remote(file_id: str):
|
||||
"""Best-effort delete of a stale file_id from the API."""
|
||||
try:
|
||||
client = _ensure_client()
|
||||
client.beta.files.delete(file_id, extra_headers={"anthropic-beta": "files-api-2025-04-14"})
|
||||
except Exception:
|
||||
pass # stale deletes are non-fatal
|
||||
|
||||
|
||||
def _upload(path: Path) -> str:
|
||||
"""Upload the file and return its new file_id."""
|
||||
client = _ensure_client()
|
||||
mime = _mime_for(path)
|
||||
filename = path.name
|
||||
with open(path, "rb") as fh:
|
||||
result = client.beta.files.upload(
|
||||
file=(filename, fh, mime),
|
||||
extra_headers={"anthropic-beta": "files-api-2025-04-14"},
|
||||
)
|
||||
return result.id
|
||||
|
||||
|
||||
def get_file_id(path: Path) -> Optional[str]:
|
||||
"""
|
||||
Return a valid Anthropic file_id for the given path, uploading if needed.
|
||||
Returns None if the file type is unsupported.
|
||||
Raises on network / API errors.
|
||||
"""
|
||||
_load_cache()
|
||||
|
||||
if content_block_type(path) == "unsupported":
|
||||
return None
|
||||
|
||||
abs_str = str(path.resolve())
|
||||
try:
|
||||
mtime_ns, size = _stat(path)
|
||||
except FileNotFoundError:
|
||||
# File gone - evict from cache
|
||||
if abs_str in _cache:
|
||||
_delete_remote(_cache[abs_str]["file_id"])
|
||||
del _cache[abs_str]
|
||||
_save_cache()
|
||||
return None
|
||||
|
||||
entry = _cache.get(abs_str)
|
||||
if entry and entry.get("mtime_ns") == mtime_ns and entry.get("size") == size:
|
||||
# Cache hit - file unchanged
|
||||
return entry["file_id"]
|
||||
|
||||
# Cache miss or stale - delete old remote if we had one
|
||||
if entry:
|
||||
_delete_remote(entry["file_id"])
|
||||
|
||||
file_id = _upload(path)
|
||||
_cache[abs_str] = {"file_id": file_id, "mtime_ns": mtime_ns, "size": size}
|
||||
_save_cache()
|
||||
return file_id
|
||||
|
||||
|
||||
def evict(path: Path):
|
||||
"""Manually evict a path from the cache (e.g. after a tool-call write)."""
|
||||
_load_cache()
|
||||
abs_str = str(path.resolve())
|
||||
entry = _cache.pop(abs_str, None)
|
||||
if entry:
|
||||
_delete_remote(entry["file_id"])
|
||||
_save_cache()
|
||||
|
||||
|
||||
def list_cached() -> list[dict]:
|
||||
"""Return a snapshot of the current cache for display."""
|
||||
_load_cache()
|
||||
return [
|
||||
{"path": k, **v}
|
||||
for k, v in _cache.items()
|
||||
]
|
||||
Reference in New Issue
Block a user