feat(aggregation): Implement hash-based summary cache
This commit is contained in:
+11
-1
@@ -27,6 +27,9 @@ import ast
|
|||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Callable, Any
|
from typing import Callable, Any
|
||||||
|
from src.summary_cache import SummaryCache, get_file_hash
|
||||||
|
|
||||||
|
_summary_cache = SummaryCache()
|
||||||
|
|
||||||
# ------------------------------------------------------------------ per-type extractors
|
# ------------------------------------------------------------------ per-type extractors
|
||||||
|
|
||||||
@@ -153,10 +156,17 @@ def summarise_file(path: Path, content: str) -> str:
|
|||||||
Return a compact markdown summary string for a single file.
|
Return a compact markdown summary string for a single file.
|
||||||
`content` is the already-read file text (or an error string).
|
`content` is the already-read file text (or an error string).
|
||||||
"""
|
"""
|
||||||
|
content_hash = get_file_hash(content)
|
||||||
|
cached = _summary_cache.get_summary(str(path), content_hash)
|
||||||
|
if cached:
|
||||||
|
return cached
|
||||||
|
|
||||||
suffix = path.suffix.lower() if hasattr(path, "suffix") else ""
|
suffix = path.suffix.lower() if hasattr(path, "suffix") else ""
|
||||||
fn = _SUMMARISERS.get(suffix, _summarise_generic)
|
fn = _SUMMARISERS.get(suffix, _summarise_generic)
|
||||||
try:
|
try:
|
||||||
return fn(path, content)
|
summary = fn(path, content)
|
||||||
|
_summary_cache.set_summary(str(path), content_hash, summary)
|
||||||
|
return summary
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"_Summariser error: {e}_"
|
return f"_Summariser error: {e}_"
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,55 @@
|
|||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional, Dict
|
||||||
|
|
||||||
|
def get_file_hash(content: str) -> str:
|
||||||
|
"""Returns SHA256 hash of the content."""
|
||||||
|
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
class SummaryCache:
|
||||||
|
"""
|
||||||
|
A hash-based cache for file summaries to avoid redundant processing.
|
||||||
|
Invalidates when content hash changes.
|
||||||
|
"""
|
||||||
|
def __init__(self, cache_file: Optional[str] = None):
|
||||||
|
if cache_file:
|
||||||
|
self.cache_file = Path(cache_file)
|
||||||
|
else:
|
||||||
|
# Default relative to current working directory
|
||||||
|
self.cache_file = Path(".slop_cache/summary_cache.json")
|
||||||
|
self.cache: Dict[str, Dict[str, str]] = {}
|
||||||
|
self.load()
|
||||||
|
|
||||||
|
def load(self) -> None:
|
||||||
|
"""Loads cache from disk."""
|
||||||
|
if self.cache_file.exists():
|
||||||
|
try:
|
||||||
|
with open(self.cache_file, "r", encoding="utf-8") as f:
|
||||||
|
self.cache = json.load(f)
|
||||||
|
except Exception:
|
||||||
|
self.cache = {}
|
||||||
|
|
||||||
|
def save(self) -> None:
|
||||||
|
"""Saves cache to disk."""
|
||||||
|
try:
|
||||||
|
self.cache_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(self.cache_file, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(self.cache, f, indent=1)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_summary(self, file_path: str, content_hash: str) -> Optional[str]:
|
||||||
|
"""Returns cached summary if hash matches, otherwise None."""
|
||||||
|
entry = self.cache.get(file_path)
|
||||||
|
if entry and entry.get("hash") == content_hash:
|
||||||
|
return entry.get("summary")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def set_summary(self, file_path: str, content_hash: str, summary: str) -> None:
|
||||||
|
"""Stores summary in cache and saves to disk."""
|
||||||
|
self.cache[file_path] = {
|
||||||
|
"hash": content_hash,
|
||||||
|
"summary": summary
|
||||||
|
}
|
||||||
|
self.save()
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
from src.summary_cache import SummaryCache, get_file_hash
|
||||||
|
|
||||||
|
def test_get_file_hash():
|
||||||
|
content = "hello world"
|
||||||
|
# sha256 of "hello world"
|
||||||
|
expected = "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
|
||||||
|
assert get_file_hash(content) == expected
|
||||||
|
|
||||||
|
def test_summary_cache():
|
||||||
|
cache_dir = Path(".test_cache")
|
||||||
|
if cache_dir.exists():
|
||||||
|
shutil.rmtree(cache_dir)
|
||||||
|
cache_file = cache_dir / "cache.json"
|
||||||
|
|
||||||
|
cache = SummaryCache(str(cache_file))
|
||||||
|
|
||||||
|
file_path = "test.py"
|
||||||
|
content = "print('hello')"
|
||||||
|
content_hash = get_file_hash(content)
|
||||||
|
summary = "**Python** - 1 lines"
|
||||||
|
|
||||||
|
# Test empty cache
|
||||||
|
assert cache.get_summary(file_path, content_hash) is None
|
||||||
|
|
||||||
|
# Test set and get
|
||||||
|
cache.set_summary(file_path, content_hash, summary)
|
||||||
|
assert cache.get_summary(file_path, content_hash) == summary
|
||||||
|
|
||||||
|
# Test cache invalidation
|
||||||
|
assert cache.get_summary(file_path, "different_hash") is None
|
||||||
|
|
||||||
|
# Test persistence
|
||||||
|
cache2 = SummaryCache(str(cache_file))
|
||||||
|
assert cache2.get_summary(file_path, content_hash) == summary
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
if cache_dir.exists():
|
||||||
|
shutil.rmtree(cache_dir)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_get_file_hash()
|
||||||
|
test_summary_cache()
|
||||||
|
print("Tests passed!")
|
||||||
Reference in New Issue
Block a user