feat(rag): implement incremental and parallel indexing performance optimizations

This commit is contained in:
2026-05-04 21:47:54 -04:00
parent a3d7376535
commit 095368bca2
3 changed files with 47 additions and 5 deletions
+17 -3
View File
@@ -658,9 +658,23 @@ class AppController:
def _run():
try:
self._set_rag_status("indexing...")
for f in self.files:
path = f.path if hasattr(f, "path") else str(f)
self.rag_engine.index_file(path)
import concurrent.futures
# 1. Incremental indexing of current files in parallel
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
futures = []
for f in self.files:
path = f.path if hasattr(f, "path") else str(f)
futures.append(executor.submit(self.rag_engine.index_file, path))
concurrent.futures.wait(futures)
# 2. Cleanup stale entries (files no longer tracked)
indexed_paths = self.rag_engine.get_all_indexed_paths()
current_paths = {f.path if hasattr(f, "path") else str(f) for f in self.files}
stale_paths = [p for p in indexed_paths if p not in current_paths]
if stale_paths:
self.rag_engine.delete_documents_by_path(stale_paths)
self._set_rag_status("ready")
except Exception as e:
self._set_rag_status(f"error: {e}")