This commit is contained in:
2026-02-21 18:33:55 -05:00
parent c4a3034093
commit 383a3f3971
6 changed files with 174 additions and 564 deletions

View File

@@ -1,193 +1,29 @@
# file_cache.py
"""
Persistent cache of Anthropic Files API uploads.
Maps (absolute_path, mtime_ns, size_bytes) -> file_id so we only upload each
file once and reuse the file_id on subsequent sends. If the file has changed
on disk the old file_id is deleted from the API and a new one is uploaded.
Cache is stored as JSON at ./logs/file_cache.json so it survives restarts.
Stub — the Anthropic Files API path has been removed.
All context is now sent as inline chunked text via _send_anthropic_chunked.
This file is kept so that any stale imports do not break.
"""
import json
import os
import datetime
from pathlib import Path
from typing import Optional
_CACHE_PATH = Path("./logs/file_cache.json")
# in-memory dict: abs_path_str -> {"file_id": str, "mtime_ns": int, "size": int}
_cache: dict[str, dict] = {}
_cache_loaded = False
_anthropic_client = None # set by _ensure_client()
# ------------------------------------------------------------------ persistence
def _load_cache():
global _cache, _cache_loaded
if _cache_loaded:
return
_cache_loaded = True
if _CACHE_PATH.exists():
try:
_cache = json.loads(_CACHE_PATH.read_text(encoding="utf-8"))
except Exception:
_cache = {}
else:
_cache = {}
def _save_cache():
_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
_CACHE_PATH.write_text(json.dumps(_cache, indent=2, ensure_ascii=False), encoding="utf-8")
# ------------------------------------------------------------------ anthropic client
def _ensure_client():
global _anthropic_client
if _anthropic_client is not None:
return _anthropic_client
import tomllib
import anthropic
with open("credentials.toml", "rb") as f:
creds = tomllib.load(f)
_anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"])
return _anthropic_client
def reset_client():
"""Called when the main ai_client resets its session."""
global _anthropic_client
_anthropic_client = None
# ------------------------------------------------------------------ MIME helpers
_MIME_BY_EXT = {
".pdf": "application/pdf",
".txt": "text/plain",
".md": "text/plain",
".py": "text/plain",
".toml": "text/plain",
".json": "text/plain",
".yaml": "text/plain",
".yml": "text/plain",
".ini": "text/plain",
".ps1": "text/plain",
".csv": "text/plain",
".log": "text/plain",
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".gif": "image/gif",
".webp": "image/webp",
}
# File types that can be sent as 'document' blocks
_DOCUMENT_MIMES = {"application/pdf", "text/plain"}
# File types that can be sent as 'image' blocks
_IMAGE_MIMES = {"image/png", "image/jpeg", "image/gif", "image/webp"}
def _mime_for(path: Path) -> str:
return _MIME_BY_EXT.get(path.suffix.lower(), "text/plain")
pass
def content_block_type(path: Path) -> str:
"""Returns 'document', 'image', or 'unsupported'."""
mime = _mime_for(path)
if mime in _DOCUMENT_MIMES:
return "document"
if mime in _IMAGE_MIMES:
return "image"
return "unsupported"
# ------------------------------------------------------------------ upload / cache logic
def _stat(path: Path) -> tuple[int, int]:
"""Returns (mtime_ns, size_bytes). Raises if file missing."""
st = path.stat()
return int(st.st_mtime_ns), int(st.st_size)
def _delete_remote(file_id: str):
"""Best-effort delete of a stale file_id from the API."""
try:
client = _ensure_client()
client.beta.files.delete(file_id, extra_headers={"anthropic-beta": "files-api-2025-04-14"})
except Exception:
pass # stale deletes are non-fatal
def _upload(path: Path) -> str:
"""Upload the file and return its new file_id."""
client = _ensure_client()
mime = _mime_for(path)
filename = path.name
with open(path, "rb") as fh:
result = client.beta.files.upload(
file=(filename, fh, mime),
extra_headers={"anthropic-beta": "files-api-2025-04-14"},
)
return result.id
def get_file_id(path: Path) -> Optional[str]:
"""
Return a valid Anthropic file_id for the given path, uploading if needed.
Returns None if the file type is unsupported.
Raises on network / API errors.
"""
_load_cache()
if content_block_type(path) == "unsupported":
return None
abs_str = str(path.resolve())
try:
mtime_ns, size = _stat(path)
except FileNotFoundError:
# File gone - evict from cache
if abs_str in _cache:
_delete_remote(_cache[abs_str]["file_id"])
del _cache[abs_str]
_save_cache()
return None
entry = _cache.get(abs_str)
if entry and entry.get("mtime_ns") == mtime_ns and entry.get("size") == size:
# Cache hit - file unchanged
return entry["file_id"]
# Cache miss or stale - delete old remote if we had one
if entry:
_delete_remote(entry["file_id"])
file_id = _upload(path)
_cache[abs_str] = {"file_id": file_id, "mtime_ns": mtime_ns, "size": size}
_save_cache()
return file_id
return None
def evict(path: Path):
"""Manually evict a path from the cache (e.g. after a tool-call write)."""
_load_cache()
abs_str = str(path.resolve())
entry = _cache.pop(abs_str, None)
if entry:
_delete_remote(entry["file_id"])
_save_cache()
pass
def list_cached() -> list[dict]:
"""Return a snapshot of the current cache for display."""
_load_cache()
return [
{"path": k, **v}
for k, v in _cache.items()
]
return []