feat(rag): implement incremental and parallel indexing performance optimizations
This commit is contained in:
+17
-3
@@ -658,9 +658,23 @@ class AppController:
|
||||
def _run():
|
||||
try:
|
||||
self._set_rag_status("indexing...")
|
||||
for f in self.files:
|
||||
path = f.path if hasattr(f, "path") else str(f)
|
||||
self.rag_engine.index_file(path)
|
||||
import concurrent.futures
|
||||
|
||||
# 1. Incremental indexing of current files in parallel
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
|
||||
futures = []
|
||||
for f in self.files:
|
||||
path = f.path if hasattr(f, "path") else str(f)
|
||||
futures.append(executor.submit(self.rag_engine.index_file, path))
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# 2. Cleanup stale entries (files no longer tracked)
|
||||
indexed_paths = self.rag_engine.get_all_indexed_paths()
|
||||
current_paths = {f.path if hasattr(f, "path") else str(f) for f in self.files}
|
||||
stale_paths = [p for p in indexed_paths if p not in current_paths]
|
||||
if stale_paths:
|
||||
self.rag_engine.delete_documents_by_path(stale_paths)
|
||||
|
||||
self._set_rag_status("ready")
|
||||
except Exception as e:
|
||||
self._set_rag_status(f"error: {e}")
|
||||
|
||||
Reference in New Issue
Block a user