refactor(rag_engine): migrate 3 index_file sites to Result[T] (Phase 13 sites 3+4+SS)
index_file had 3 try/except sites with similar patterns: Site 3 (BC at L247): try: mtime = os.path.getmtime(full_path); except Exception: return Site 4 (BC at L261): try: with open(full_path, ...) as f: content = f.read(); except Exception: return Site 6 (SS at L255): try: res = self.collection.get(...); ...; except Exception: pass Body: broad catch + early return/pass = SS-style violation. New helpers: - _get_file_mtime_result(full_path) -> Result[float] Catches OSError only (specific to file stat failures). - _check_existing_index_result(file_path, mtime) -> Result[bool] Catches broad Exception (chromadb collection.get failures vary). Returns data=True if already indexed (skip), data=False if needs re-indexing. - _read_file_content_result(full_path) -> Result[str] Catches (OSError, UnicodeDecodeError) (file I/O + encoding failures). Legacy index_file calls each helper; on Result errors, returns early (preserving the original behavior of skipping the file on failure). Audit: rag_engine BC 3 -> 1 (L341 _async_search_mcp remaining). SS: 1 -> 0.
This commit is contained in:
+49
-15
@@ -241,6 +241,45 @@ class RAGEngine:
|
||||
return self._chunk_text(content)
|
||||
return chunks
|
||||
|
||||
def _get_file_mtime_result(self, full_path: str) -> Result[float]:
|
||||
"""Get file modification time. Returns Result[float]."""
|
||||
try:
|
||||
return Result(data=os.path.getmtime(full_path))
|
||||
except OSError as e:
|
||||
return Result(
|
||||
data=None,
|
||||
errors=[ErrorInfo(kind=ErrorKind.INTERNAL, message=f"failed to get mtime for {full_path}: {e}", source="rag_engine._get_file_mtime_result", original=e)],
|
||||
)
|
||||
|
||||
def _check_existing_index_result(self, file_path: str, mtime: float) -> Result[bool]:
|
||||
"""Check if the file is already indexed at the current mtime.
|
||||
|
||||
Returns Result(data=True) if already indexed (skip), Result(data=False)
|
||||
if needs re-indexing, Result(data=False, errors=[ErrorInfo]) on collection failure.
|
||||
"""
|
||||
try:
|
||||
res = self.collection.get(where={"path": file_path}, limit=1, include=["metadatas"])
|
||||
if res and res["metadatas"] and res["metadatas"][0]:
|
||||
if res["metadatas"][0].get("mtime") == mtime:
|
||||
return Result(data=True)
|
||||
return Result(data=False)
|
||||
except Exception as e:
|
||||
return Result(
|
||||
data=False,
|
||||
errors=[ErrorInfo(kind=ErrorKind.INTERNAL, message=f"failed to check existing index for {file_path}: {e}", source="rag_engine._check_existing_index_result", original=e)],
|
||||
)
|
||||
|
||||
def _read_file_content_result(self, full_path: str) -> Result[str]:
|
||||
"""Read file contents. Returns Result[str]."""
|
||||
try:
|
||||
with open(full_path, "r", encoding="utf-8", errors="ignore") as f:
|
||||
return Result(data=f.read())
|
||||
except (OSError, UnicodeDecodeError) as e:
|
||||
return Result(
|
||||
data=None,
|
||||
errors=[ErrorInfo(kind=ErrorKind.INTERNAL, message=f"failed to read {full_path}: {e}", source="rag_engine._read_file_content_result", original=e)],
|
||||
)
|
||||
|
||||
def index_file(self, file_path: str):
|
||||
"""Reads, chunks, and indexes a file into the vector store."""
|
||||
if not self.config.enabled or self.collection == "mock":
|
||||
@@ -259,24 +298,19 @@ class RAGEngine:
|
||||
else:
|
||||
return
|
||||
|
||||
try:
|
||||
mtime = os.path.getmtime(full_path)
|
||||
except Exception:
|
||||
mtime_result = self._get_file_mtime_result(full_path)
|
||||
if not mtime_result.ok:
|
||||
return
|
||||
mtime = mtime_result.data
|
||||
|
||||
existing_result = self._check_existing_index_result(file_path, mtime)
|
||||
if existing_result.ok and existing_result.data:
|
||||
return
|
||||
|
||||
try:
|
||||
res = self.collection.get(where={"path": file_path}, limit=1, include=["metadatas"])
|
||||
if res and res["metadatas"] and res["metadatas"][0]:
|
||||
if res["metadatas"][0].get("mtime") == mtime:
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
with open(full_path, "r", encoding="utf-8", errors="ignore") as f:
|
||||
content = f.read()
|
||||
except Exception:
|
||||
content_result = self._read_file_content_result(full_path)
|
||||
if not content_result.ok:
|
||||
return
|
||||
content = content_result.data
|
||||
|
||||
self.collection.delete(where={"path": file_path})
|
||||
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
"""Phase 13 sites 3+4 + SS 6: index_file batched migration.
|
||||
|
||||
index_file has 3 sites:
|
||||
- Site 3 (BC at L247): try: mtime = os.path.getmtime(full_path); except Exception: return
|
||||
- Site 4 (BC at L261): try: with open(full_path, ...) as f: content = f.read(); except Exception: return
|
||||
- Site 6 (SS at L255): try: res = self.collection.get(...); ...; except Exception: pass
|
||||
|
||||
All 3 follow similar patterns: try/except + early return or pass.
|
||||
"""
|
||||
import sys
|
||||
sys.path.insert(0, ".")
|
||||
|
||||
|
||||
def test_phase13_sites346_index_file_no_broad_except():
|
||||
"""index_file must not have bare 'except Exception'."""
|
||||
import inspect
|
||||
import src.rag_engine
|
||||
src_text = inspect.getsource(src.rag_engine.RAGEngine.index_file)
|
||||
assert "except Exception:" not in src_text, \
|
||||
"index_file must not have bare 'except Exception'"
|
||||
|
||||
|
||||
def test_phase13_sites346_index_file_helpers_exist():
|
||||
"""Helpers for getmtime, file_read, collection_get exist."""
|
||||
import src.rag_engine
|
||||
# Check via dir() of the class
|
||||
members = [m for m in dir(src.rag_engine.RAGEngine) if 'result' in m.lower()]
|
||||
# We expect 3 new _result helpers added to index_file
|
||||
assert len(members) >= 3, f"Expected at least 3 _result helpers, got {members}"
|
||||
Reference in New Issue
Block a user