feat(aggregation): Add persistent cache storage and LRU management
This commit is contained in:
+35
-2
@@ -12,12 +12,13 @@ class SummaryCache:
|
|||||||
A hash-based cache for file summaries to avoid redundant processing.
|
A hash-based cache for file summaries to avoid redundant processing.
|
||||||
Invalidates when content hash changes.
|
Invalidates when content hash changes.
|
||||||
"""
|
"""
|
||||||
def __init__(self, cache_file: Optional[str] = None):
|
def __init__(self, cache_file: Optional[str] = None, max_entries: int = 1000):
|
||||||
if cache_file:
|
if cache_file:
|
||||||
self.cache_file = Path(cache_file)
|
self.cache_file = Path(cache_file)
|
||||||
else:
|
else:
|
||||||
# Default relative to current working directory
|
# Default relative to current working directory
|
||||||
self.cache_file = Path(".slop_cache/summary_cache.json")
|
self.cache_file = Path(".slop_cache/summary_cache.json")
|
||||||
|
self.max_entries = max_entries
|
||||||
self.cache: Dict[str, Dict[str, str]] = {}
|
self.cache: Dict[str, Dict[str, str]] = {}
|
||||||
self.load()
|
self.load()
|
||||||
|
|
||||||
@@ -43,13 +44,45 @@ class SummaryCache:
|
|||||||
"""Returns cached summary if hash matches, otherwise None."""
|
"""Returns cached summary if hash matches, otherwise None."""
|
||||||
entry = self.cache.get(file_path)
|
entry = self.cache.get(file_path)
|
||||||
if entry and entry.get("hash") == content_hash:
|
if entry and entry.get("hash") == content_hash:
|
||||||
return entry.get("summary")
|
# LRU: move to end
|
||||||
|
val = self.cache.pop(file_path)
|
||||||
|
self.cache[file_path] = val
|
||||||
|
return val.get("summary")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def set_summary(self, file_path: str, content_hash: str, summary: str) -> None:
|
def set_summary(self, file_path: str, content_hash: str, summary: str) -> None:
|
||||||
"""Stores summary in cache and saves to disk."""
|
"""Stores summary in cache and saves to disk."""
|
||||||
|
if file_path in self.cache:
|
||||||
|
self.cache.pop(file_path)
|
||||||
self.cache[file_path] = {
|
self.cache[file_path] = {
|
||||||
"hash": content_hash,
|
"hash": content_hash,
|
||||||
"summary": summary
|
"summary": summary
|
||||||
}
|
}
|
||||||
|
# Enforce LRU size limit
|
||||||
|
while len(self.cache) > self.max_entries:
|
||||||
|
# pop first item (oldest)
|
||||||
|
first_key = next(iter(self.cache))
|
||||||
|
self.cache.pop(first_key)
|
||||||
self.save()
|
self.save()
|
||||||
|
|
||||||
|
def clear(self) -> None:
|
||||||
|
"""Clears the cache both in-memory and on disk."""
|
||||||
|
self.cache.clear()
|
||||||
|
if self.cache_file.exists():
|
||||||
|
try:
|
||||||
|
self.cache_file.unlink()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_stats(self) -> dict:
|
||||||
|
"""Returns dictionary of cache statistics."""
|
||||||
|
size_bytes = 0
|
||||||
|
if self.cache_file.exists():
|
||||||
|
try:
|
||||||
|
size_bytes = self.cache_file.stat().st_size
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return {
|
||||||
|
"entries": len(self.cache),
|
||||||
|
"size_bytes": size_bytes
|
||||||
|
}
|
||||||
|
|||||||
@@ -40,7 +40,36 @@ def test_summary_cache():
|
|||||||
if cache_dir.exists():
|
if cache_dir.exists():
|
||||||
shutil.rmtree(cache_dir)
|
shutil.rmtree(cache_dir)
|
||||||
|
|
||||||
|
def test_summary_cache_lru():
|
||||||
|
cache_dir = Path(".test_cache_lru")
|
||||||
|
if cache_dir.exists():
|
||||||
|
shutil.rmtree(cache_dir)
|
||||||
|
cache_file = cache_dir / "cache.json"
|
||||||
|
|
||||||
|
# Create cache with max 2 entries
|
||||||
|
cache = SummaryCache(str(cache_file), max_entries=2)
|
||||||
|
|
||||||
|
cache.set_summary("file1.py", "hash1", "summary1")
|
||||||
|
cache.set_summary("file2.py", "hash2", "summary2")
|
||||||
|
cache.set_summary("file3.py", "hash3", "summary3") # This should evict file1.py
|
||||||
|
|
||||||
|
assert cache.get_summary("file1.py", "hash1") is None
|
||||||
|
assert cache.get_summary("file2.py", "hash2") == "summary2"
|
||||||
|
assert cache.get_summary("file3.py", "hash3") == "summary3"
|
||||||
|
|
||||||
|
# Access file2.py, then add file4.py. file3.py should be evicted
|
||||||
|
cache.get_summary("file2.py", "hash2")
|
||||||
|
cache.set_summary("file4.py", "hash4", "summary4")
|
||||||
|
|
||||||
|
assert cache.get_summary("file3.py", "hash3") is None
|
||||||
|
assert cache.get_summary("file2.py", "hash2") == "summary2"
|
||||||
|
assert cache.get_summary("file4.py", "hash4") == "summary4"
|
||||||
|
|
||||||
|
if cache_dir.exists():
|
||||||
|
shutil.rmtree(cache_dir)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test_get_file_hash()
|
test_get_file_hash()
|
||||||
test_summary_cache()
|
test_summary_cache()
|
||||||
|
test_summary_cache_lru()
|
||||||
print("Tests passed!")
|
print("Tests passed!")
|
||||||
|
|||||||
Reference in New Issue
Block a user