feat(aggregation): Implement hash-based summary cache
This commit is contained in:
+11
-1
@@ -27,6 +27,9 @@ import ast
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Callable, Any
|
||||
from src.summary_cache import SummaryCache, get_file_hash
|
||||
|
||||
_summary_cache = SummaryCache()
|
||||
|
||||
# ------------------------------------------------------------------ per-type extractors
|
||||
|
||||
@@ -153,10 +156,17 @@ def summarise_file(path: Path, content: str) -> str:
|
||||
Return a compact markdown summary string for a single file.
|
||||
`content` is the already-read file text (or an error string).
|
||||
"""
|
||||
content_hash = get_file_hash(content)
|
||||
cached = _summary_cache.get_summary(str(path), content_hash)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
suffix = path.suffix.lower() if hasattr(path, "suffix") else ""
|
||||
fn = _SUMMARISERS.get(suffix, _summarise_generic)
|
||||
try:
|
||||
return fn(path, content)
|
||||
summary = fn(path, content)
|
||||
_summary_cache.set_summary(str(path), content_hash, summary)
|
||||
return summary
|
||||
except Exception as e:
|
||||
return f"_Summariser error: {e}_"
|
||||
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
import hashlib
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict
|
||||
|
||||
def get_file_hash(content: str) -> str:
|
||||
"""Returns SHA256 hash of the content."""
|
||||
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
||||
|
||||
class SummaryCache:
|
||||
"""
|
||||
A hash-based cache for file summaries to avoid redundant processing.
|
||||
Invalidates when content hash changes.
|
||||
"""
|
||||
def __init__(self, cache_file: Optional[str] = None):
|
||||
if cache_file:
|
||||
self.cache_file = Path(cache_file)
|
||||
else:
|
||||
# Default relative to current working directory
|
||||
self.cache_file = Path(".slop_cache/summary_cache.json")
|
||||
self.cache: Dict[str, Dict[str, str]] = {}
|
||||
self.load()
|
||||
|
||||
def load(self) -> None:
|
||||
"""Loads cache from disk."""
|
||||
if self.cache_file.exists():
|
||||
try:
|
||||
with open(self.cache_file, "r", encoding="utf-8") as f:
|
||||
self.cache = json.load(f)
|
||||
except Exception:
|
||||
self.cache = {}
|
||||
|
||||
def save(self) -> None:
|
||||
"""Saves cache to disk."""
|
||||
try:
|
||||
self.cache_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(self.cache_file, "w", encoding="utf-8") as f:
|
||||
json.dump(self.cache, f, indent=1)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def get_summary(self, file_path: str, content_hash: str) -> Optional[str]:
|
||||
"""Returns cached summary if hash matches, otherwise None."""
|
||||
entry = self.cache.get(file_path)
|
||||
if entry and entry.get("hash") == content_hash:
|
||||
return entry.get("summary")
|
||||
return None
|
||||
|
||||
def set_summary(self, file_path: str, content_hash: str, summary: str) -> None:
|
||||
"""Stores summary in cache and saves to disk."""
|
||||
self.cache[file_path] = {
|
||||
"hash": content_hash,
|
||||
"summary": summary
|
||||
}
|
||||
self.save()
|
||||
Reference in New Issue
Block a user