194 lines
5.4 KiB
Python
194 lines
5.4 KiB
Python
# file_cache.py
|
|
"""
|
|
Persistent cache of Anthropic Files API uploads.
|
|
|
|
Maps (absolute_path, mtime_ns, size_bytes) -> file_id so we only upload each
|
|
file once and reuse the file_id on subsequent sends. If the file has changed
|
|
on disk the old file_id is deleted from the API and a new one is uploaded.
|
|
|
|
Cache is stored as JSON at ./logs/file_cache.json so it survives restarts.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import datetime
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
_CACHE_PATH = Path("./logs/file_cache.json")
|
|
|
|
# in-memory dict: abs_path_str -> {"file_id": str, "mtime_ns": int, "size": int}
|
|
_cache: dict[str, dict] = {}
|
|
_cache_loaded = False
|
|
|
|
_anthropic_client = None # set by _ensure_client()
|
|
|
|
|
|
# ------------------------------------------------------------------ persistence
|
|
|
|
def _load_cache():
|
|
global _cache, _cache_loaded
|
|
if _cache_loaded:
|
|
return
|
|
_cache_loaded = True
|
|
if _CACHE_PATH.exists():
|
|
try:
|
|
_cache = json.loads(_CACHE_PATH.read_text(encoding="utf-8"))
|
|
except Exception:
|
|
_cache = {}
|
|
else:
|
|
_cache = {}
|
|
|
|
|
|
def _save_cache():
|
|
_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
_CACHE_PATH.write_text(json.dumps(_cache, indent=2, ensure_ascii=False), encoding="utf-8")
|
|
|
|
|
|
# ------------------------------------------------------------------ anthropic client
|
|
|
|
def _ensure_client():
|
|
global _anthropic_client
|
|
if _anthropic_client is not None:
|
|
return _anthropic_client
|
|
import tomllib
|
|
import anthropic
|
|
with open("credentials.toml", "rb") as f:
|
|
creds = tomllib.load(f)
|
|
_anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"])
|
|
return _anthropic_client
|
|
|
|
|
|
def reset_client():
|
|
"""Called when the main ai_client resets its session."""
|
|
global _anthropic_client
|
|
_anthropic_client = None
|
|
|
|
|
|
# ------------------------------------------------------------------ MIME helpers
|
|
|
|
_MIME_BY_EXT = {
|
|
".pdf": "application/pdf",
|
|
".txt": "text/plain",
|
|
".md": "text/plain",
|
|
".py": "text/plain",
|
|
".toml": "text/plain",
|
|
".json": "text/plain",
|
|
".yaml": "text/plain",
|
|
".yml": "text/plain",
|
|
".ini": "text/plain",
|
|
".ps1": "text/plain",
|
|
".csv": "text/plain",
|
|
".log": "text/plain",
|
|
".png": "image/png",
|
|
".jpg": "image/jpeg",
|
|
".jpeg": "image/jpeg",
|
|
".gif": "image/gif",
|
|
".webp": "image/webp",
|
|
}
|
|
|
|
# File types that can be sent as 'document' blocks
|
|
_DOCUMENT_MIMES = {"application/pdf", "text/plain"}
|
|
# File types that can be sent as 'image' blocks
|
|
_IMAGE_MIMES = {"image/png", "image/jpeg", "image/gif", "image/webp"}
|
|
|
|
|
|
def _mime_for(path: Path) -> str:
|
|
return _MIME_BY_EXT.get(path.suffix.lower(), "text/plain")
|
|
|
|
|
|
def content_block_type(path: Path) -> str:
|
|
"""Returns 'document', 'image', or 'unsupported'."""
|
|
mime = _mime_for(path)
|
|
if mime in _DOCUMENT_MIMES:
|
|
return "document"
|
|
if mime in _IMAGE_MIMES:
|
|
return "image"
|
|
return "unsupported"
|
|
|
|
|
|
# ------------------------------------------------------------------ upload / cache logic
|
|
|
|
def _stat(path: Path) -> tuple[int, int]:
|
|
"""Returns (mtime_ns, size_bytes). Raises if file missing."""
|
|
st = path.stat()
|
|
return int(st.st_mtime_ns), int(st.st_size)
|
|
|
|
|
|
def _delete_remote(file_id: str):
|
|
"""Best-effort delete of a stale file_id from the API."""
|
|
try:
|
|
client = _ensure_client()
|
|
client.beta.files.delete(file_id, extra_headers={"anthropic-beta": "files-api-2025-04-14"})
|
|
except Exception:
|
|
pass # stale deletes are non-fatal
|
|
|
|
|
|
def _upload(path: Path) -> str:
|
|
"""Upload the file and return its new file_id."""
|
|
client = _ensure_client()
|
|
mime = _mime_for(path)
|
|
filename = path.name
|
|
with open(path, "rb") as fh:
|
|
result = client.beta.files.upload(
|
|
file=(filename, fh, mime),
|
|
extra_headers={"anthropic-beta": "files-api-2025-04-14"},
|
|
)
|
|
return result.id
|
|
|
|
|
|
def get_file_id(path: Path) -> Optional[str]:
|
|
"""
|
|
Return a valid Anthropic file_id for the given path, uploading if needed.
|
|
Returns None if the file type is unsupported.
|
|
Raises on network / API errors.
|
|
"""
|
|
_load_cache()
|
|
|
|
if content_block_type(path) == "unsupported":
|
|
return None
|
|
|
|
abs_str = str(path.resolve())
|
|
try:
|
|
mtime_ns, size = _stat(path)
|
|
except FileNotFoundError:
|
|
# File gone - evict from cache
|
|
if abs_str in _cache:
|
|
_delete_remote(_cache[abs_str]["file_id"])
|
|
del _cache[abs_str]
|
|
_save_cache()
|
|
return None
|
|
|
|
entry = _cache.get(abs_str)
|
|
if entry and entry.get("mtime_ns") == mtime_ns and entry.get("size") == size:
|
|
# Cache hit - file unchanged
|
|
return entry["file_id"]
|
|
|
|
# Cache miss or stale - delete old remote if we had one
|
|
if entry:
|
|
_delete_remote(entry["file_id"])
|
|
|
|
file_id = _upload(path)
|
|
_cache[abs_str] = {"file_id": file_id, "mtime_ns": mtime_ns, "size": size}
|
|
_save_cache()
|
|
return file_id
|
|
|
|
|
|
def evict(path: Path):
|
|
"""Manually evict a path from the cache (e.g. after a tool-call write)."""
|
|
_load_cache()
|
|
abs_str = str(path.resolve())
|
|
entry = _cache.pop(abs_str, None)
|
|
if entry:
|
|
_delete_remote(entry["file_id"])
|
|
_save_cache()
|
|
|
|
|
|
def list_cached() -> list[dict]:
|
|
"""Return a snapshot of the current cache for display."""
|
|
_load_cache()
|
|
return [
|
|
{"path": k, **v}
|
|
for k, v in _cache.items()
|
|
]
|