# file_cache.py """ Persistent cache of Anthropic Files API uploads. Maps (absolute_path, mtime_ns, size_bytes) -> file_id so we only upload each file once and reuse the file_id on subsequent sends. If the file has changed on disk the old file_id is deleted from the API and a new one is uploaded. Cache is stored as JSON at ./logs/file_cache.json so it survives restarts. """ import json import os import datetime from pathlib import Path from typing import Optional _CACHE_PATH = Path("./logs/file_cache.json") # in-memory dict: abs_path_str -> {"file_id": str, "mtime_ns": int, "size": int} _cache: dict[str, dict] = {} _cache_loaded = False _anthropic_client = None # set by _ensure_client() # ------------------------------------------------------------------ persistence def _load_cache(): global _cache, _cache_loaded if _cache_loaded: return _cache_loaded = True if _CACHE_PATH.exists(): try: _cache = json.loads(_CACHE_PATH.read_text(encoding="utf-8")) except Exception: _cache = {} else: _cache = {} def _save_cache(): _CACHE_PATH.parent.mkdir(parents=True, exist_ok=True) _CACHE_PATH.write_text(json.dumps(_cache, indent=2, ensure_ascii=False), encoding="utf-8") # ------------------------------------------------------------------ anthropic client def _ensure_client(): global _anthropic_client if _anthropic_client is not None: return _anthropic_client import tomllib import anthropic with open("credentials.toml", "rb") as f: creds = tomllib.load(f) _anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"]) return _anthropic_client def reset_client(): """Called when the main ai_client resets its session.""" global _anthropic_client _anthropic_client = None # ------------------------------------------------------------------ MIME helpers _MIME_BY_EXT = { ".pdf": "application/pdf", ".txt": "text/plain", ".md": "text/plain", ".py": "text/plain", ".toml": "text/plain", ".json": "text/plain", ".yaml": "text/plain", ".yml": "text/plain", ".ini": "text/plain", ".ps1": "text/plain", ".csv": "text/plain", ".log": "text/plain", ".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".gif": "image/gif", ".webp": "image/webp", } # File types that can be sent as 'document' blocks _DOCUMENT_MIMES = {"application/pdf", "text/plain"} # File types that can be sent as 'image' blocks _IMAGE_MIMES = {"image/png", "image/jpeg", "image/gif", "image/webp"} def _mime_for(path: Path) -> str: return _MIME_BY_EXT.get(path.suffix.lower(), "text/plain") def content_block_type(path: Path) -> str: """Returns 'document', 'image', or 'unsupported'.""" mime = _mime_for(path) if mime in _DOCUMENT_MIMES: return "document" if mime in _IMAGE_MIMES: return "image" return "unsupported" # ------------------------------------------------------------------ upload / cache logic def _stat(path: Path) -> tuple[int, int]: """Returns (mtime_ns, size_bytes). Raises if file missing.""" st = path.stat() return int(st.st_mtime_ns), int(st.st_size) def _delete_remote(file_id: str): """Best-effort delete of a stale file_id from the API.""" try: client = _ensure_client() client.beta.files.delete(file_id, extra_headers={"anthropic-beta": "files-api-2025-04-14"}) except Exception: pass # stale deletes are non-fatal def _upload(path: Path) -> str: """Upload the file and return its new file_id.""" client = _ensure_client() mime = _mime_for(path) filename = path.name with open(path, "rb") as fh: result = client.beta.files.upload( file=(filename, fh, mime), extra_headers={"anthropic-beta": "files-api-2025-04-14"}, ) return result.id def get_file_id(path: Path) -> Optional[str]: """ Return a valid Anthropic file_id for the given path, uploading if needed. Returns None if the file type is unsupported. Raises on network / API errors. """ _load_cache() if content_block_type(path) == "unsupported": return None abs_str = str(path.resolve()) try: mtime_ns, size = _stat(path) except FileNotFoundError: # File gone - evict from cache if abs_str in _cache: _delete_remote(_cache[abs_str]["file_id"]) del _cache[abs_str] _save_cache() return None entry = _cache.get(abs_str) if entry and entry.get("mtime_ns") == mtime_ns and entry.get("size") == size: # Cache hit - file unchanged return entry["file_id"] # Cache miss or stale - delete old remote if we had one if entry: _delete_remote(entry["file_id"]) file_id = _upload(path) _cache[abs_str] = {"file_id": file_id, "mtime_ns": mtime_ns, "size": size} _save_cache() return file_id def evict(path: Path): """Manually evict a path from the cache (e.g. after a tool-call write).""" _load_cache() abs_str = str(path.resolve()) entry = _cache.pop(abs_str, None) if entry: _delete_remote(entry["file_id"]) _save_cache() def list_cached() -> list[dict]: """Return a snapshot of the current cache for display.""" _load_cache() return [ {"path": k, **v} for k, v in _cache.items() ]