feat(aggregation): Add persistent cache storage and LRU management

This commit is contained in:
2026-05-04 05:20:03 -04:00
parent 56c752d070
commit fb2df2a758
2 changed files with 64 additions and 2 deletions
+35 -2
View File
@@ -12,12 +12,13 @@ class SummaryCache:
A hash-based cache for file summaries to avoid redundant processing. A hash-based cache for file summaries to avoid redundant processing.
Invalidates when content hash changes. Invalidates when content hash changes.
""" """
def __init__(self, cache_file: Optional[str] = None): def __init__(self, cache_file: Optional[str] = None, max_entries: int = 1000):
if cache_file: if cache_file:
self.cache_file = Path(cache_file) self.cache_file = Path(cache_file)
else: else:
# Default relative to current working directory # Default relative to current working directory
self.cache_file = Path(".slop_cache/summary_cache.json") self.cache_file = Path(".slop_cache/summary_cache.json")
self.max_entries = max_entries
self.cache: Dict[str, Dict[str, str]] = {} self.cache: Dict[str, Dict[str, str]] = {}
self.load() self.load()
@@ -43,13 +44,45 @@ class SummaryCache:
"""Returns cached summary if hash matches, otherwise None.""" """Returns cached summary if hash matches, otherwise None."""
entry = self.cache.get(file_path) entry = self.cache.get(file_path)
if entry and entry.get("hash") == content_hash: if entry and entry.get("hash") == content_hash:
return entry.get("summary") # LRU: move to end
val = self.cache.pop(file_path)
self.cache[file_path] = val
return val.get("summary")
return None return None
def set_summary(self, file_path: str, content_hash: str, summary: str) -> None: def set_summary(self, file_path: str, content_hash: str, summary: str) -> None:
"""Stores summary in cache and saves to disk.""" """Stores summary in cache and saves to disk."""
if file_path in self.cache:
self.cache.pop(file_path)
self.cache[file_path] = { self.cache[file_path] = {
"hash": content_hash, "hash": content_hash,
"summary": summary "summary": summary
} }
# Enforce LRU size limit
while len(self.cache) > self.max_entries:
# pop first item (oldest)
first_key = next(iter(self.cache))
self.cache.pop(first_key)
self.save() self.save()
def clear(self) -> None:
"""Clears the cache both in-memory and on disk."""
self.cache.clear()
if self.cache_file.exists():
try:
self.cache_file.unlink()
except Exception:
pass
def get_stats(self) -> dict:
"""Returns dictionary of cache statistics."""
size_bytes = 0
if self.cache_file.exists():
try:
size_bytes = self.cache_file.stat().st_size
except Exception:
pass
return {
"entries": len(self.cache),
"size_bytes": size_bytes
}
+29
View File
@@ -40,7 +40,36 @@ def test_summary_cache():
if cache_dir.exists(): if cache_dir.exists():
shutil.rmtree(cache_dir) shutil.rmtree(cache_dir)
def test_summary_cache_lru():
cache_dir = Path(".test_cache_lru")
if cache_dir.exists():
shutil.rmtree(cache_dir)
cache_file = cache_dir / "cache.json"
# Create cache with max 2 entries
cache = SummaryCache(str(cache_file), max_entries=2)
cache.set_summary("file1.py", "hash1", "summary1")
cache.set_summary("file2.py", "hash2", "summary2")
cache.set_summary("file3.py", "hash3", "summary3") # This should evict file1.py
assert cache.get_summary("file1.py", "hash1") is None
assert cache.get_summary("file2.py", "hash2") == "summary2"
assert cache.get_summary("file3.py", "hash3") == "summary3"
# Access file2.py, then add file4.py. file3.py should be evicted
cache.get_summary("file2.py", "hash2")
cache.set_summary("file4.py", "hash4", "summary4")
assert cache.get_summary("file3.py", "hash3") is None
assert cache.get_summary("file2.py", "hash2") == "summary2"
assert cache.get_summary("file4.py", "hash4") == "summary4"
if cache_dir.exists():
shutil.rmtree(cache_dir)
if __name__ == "__main__": if __name__ == "__main__":
test_get_file_hash() test_get_file_hash()
test_summary_cache() test_summary_cache()
test_summary_cache_lru()
print("Tests passed!") print("Tests passed!")