feat(aggregation): Implement hash-based summary cache
This commit is contained in:
+11
-1
@@ -27,6 +27,9 @@ import ast
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Callable, Any
|
||||
from src.summary_cache import SummaryCache, get_file_hash
|
||||
|
||||
_summary_cache = SummaryCache()
|
||||
|
||||
# ------------------------------------------------------------------ per-type extractors
|
||||
|
||||
@@ -153,10 +156,17 @@ def summarise_file(path: Path, content: str) -> str:
|
||||
Return a compact markdown summary string for a single file.
|
||||
`content` is the already-read file text (or an error string).
|
||||
"""
|
||||
content_hash = get_file_hash(content)
|
||||
cached = _summary_cache.get_summary(str(path), content_hash)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
suffix = path.suffix.lower() if hasattr(path, "suffix") else ""
|
||||
fn = _SUMMARISERS.get(suffix, _summarise_generic)
|
||||
try:
|
||||
return fn(path, content)
|
||||
summary = fn(path, content)
|
||||
_summary_cache.set_summary(str(path), content_hash, summary)
|
||||
return summary
|
||||
except Exception as e:
|
||||
return f"_Summariser error: {e}_"
|
||||
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
import hashlib
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict
|
||||
|
||||
def get_file_hash(content: str) -> str:
|
||||
"""Returns SHA256 hash of the content."""
|
||||
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
||||
|
||||
class SummaryCache:
|
||||
"""
|
||||
A hash-based cache for file summaries to avoid redundant processing.
|
||||
Invalidates when content hash changes.
|
||||
"""
|
||||
def __init__(self, cache_file: Optional[str] = None):
|
||||
if cache_file:
|
||||
self.cache_file = Path(cache_file)
|
||||
else:
|
||||
# Default relative to current working directory
|
||||
self.cache_file = Path(".slop_cache/summary_cache.json")
|
||||
self.cache: Dict[str, Dict[str, str]] = {}
|
||||
self.load()
|
||||
|
||||
def load(self) -> None:
|
||||
"""Loads cache from disk."""
|
||||
if self.cache_file.exists():
|
||||
try:
|
||||
with open(self.cache_file, "r", encoding="utf-8") as f:
|
||||
self.cache = json.load(f)
|
||||
except Exception:
|
||||
self.cache = {}
|
||||
|
||||
def save(self) -> None:
|
||||
"""Saves cache to disk."""
|
||||
try:
|
||||
self.cache_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(self.cache_file, "w", encoding="utf-8") as f:
|
||||
json.dump(self.cache, f, indent=1)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def get_summary(self, file_path: str, content_hash: str) -> Optional[str]:
|
||||
"""Returns cached summary if hash matches, otherwise None."""
|
||||
entry = self.cache.get(file_path)
|
||||
if entry and entry.get("hash") == content_hash:
|
||||
return entry.get("summary")
|
||||
return None
|
||||
|
||||
def set_summary(self, file_path: str, content_hash: str, summary: str) -> None:
|
||||
"""Stores summary in cache and saves to disk."""
|
||||
self.cache[file_path] = {
|
||||
"hash": content_hash,
|
||||
"summary": summary
|
||||
}
|
||||
self.save()
|
||||
@@ -0,0 +1,46 @@
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from src.summary_cache import SummaryCache, get_file_hash
|
||||
|
||||
def test_get_file_hash():
|
||||
content = "hello world"
|
||||
# sha256 of "hello world"
|
||||
expected = "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
|
||||
assert get_file_hash(content) == expected
|
||||
|
||||
def test_summary_cache():
|
||||
cache_dir = Path(".test_cache")
|
||||
if cache_dir.exists():
|
||||
shutil.rmtree(cache_dir)
|
||||
cache_file = cache_dir / "cache.json"
|
||||
|
||||
cache = SummaryCache(str(cache_file))
|
||||
|
||||
file_path = "test.py"
|
||||
content = "print('hello')"
|
||||
content_hash = get_file_hash(content)
|
||||
summary = "**Python** - 1 lines"
|
||||
|
||||
# Test empty cache
|
||||
assert cache.get_summary(file_path, content_hash) is None
|
||||
|
||||
# Test set and get
|
||||
cache.set_summary(file_path, content_hash, summary)
|
||||
assert cache.get_summary(file_path, content_hash) == summary
|
||||
|
||||
# Test cache invalidation
|
||||
assert cache.get_summary(file_path, "different_hash") is None
|
||||
|
||||
# Test persistence
|
||||
cache2 = SummaryCache(str(cache_file))
|
||||
assert cache2.get_summary(file_path, content_hash) == summary
|
||||
|
||||
# Cleanup
|
||||
if cache_dir.exists():
|
||||
shutil.rmtree(cache_dir)
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_get_file_hash()
|
||||
test_summary_cache()
|
||||
print("Tests passed!")
|
||||
Reference in New Issue
Block a user