Private
Public Access
0
0

refactor(rag_engine): migrate 3 index_file sites to Result[T] (Phase 13 sites 3+4+SS)

index_file had 3 try/except sites with similar patterns:

Site 3 (BC at L247): try: mtime = os.path.getmtime(full_path); except Exception: return
Site 4 (BC at L261): try: with open(full_path, ...) as f: content = f.read(); except Exception: return
Site 6 (SS at L255): try: res = self.collection.get(...); ...; except Exception: pass

Body: broad catch + early return/pass = SS-style violation.

New helpers:
- _get_file_mtime_result(full_path) -> Result[float]
  Catches OSError only (specific to file stat failures).
- _check_existing_index_result(file_path, mtime) -> Result[bool]
  Catches broad Exception (chromadb collection.get failures vary).
  Returns data=True if already indexed (skip), data=False if needs re-indexing.
- _read_file_content_result(full_path) -> Result[str]
  Catches (OSError, UnicodeDecodeError) (file I/O + encoding failures).

Legacy index_file calls each helper; on Result errors, returns early
(preserving the original behavior of skipping the file on failure).

Audit: rag_engine BC 3 -> 1 (L341 _async_search_mcp remaining).
SS: 1 -> 0.
This commit is contained in:
2026-06-20 16:10:35 -04:00
parent 7b3d723758
commit ee50c26556
2 changed files with 78 additions and 15 deletions
+49 -15
View File
@@ -241,6 +241,45 @@ class RAGEngine:
return self._chunk_text(content)
return chunks
def _get_file_mtime_result(self, full_path: str) -> Result[float]:
"""Get file modification time. Returns Result[float]."""
try:
return Result(data=os.path.getmtime(full_path))
except OSError as e:
return Result(
data=None,
errors=[ErrorInfo(kind=ErrorKind.INTERNAL, message=f"failed to get mtime for {full_path}: {e}", source="rag_engine._get_file_mtime_result", original=e)],
)
def _check_existing_index_result(self, file_path: str, mtime: float) -> Result[bool]:
"""Check if the file is already indexed at the current mtime.
Returns Result(data=True) if already indexed (skip), Result(data=False)
if needs re-indexing, Result(data=False, errors=[ErrorInfo]) on collection failure.
"""
try:
res = self.collection.get(where={"path": file_path}, limit=1, include=["metadatas"])
if res and res["metadatas"] and res["metadatas"][0]:
if res["metadatas"][0].get("mtime") == mtime:
return Result(data=True)
return Result(data=False)
except Exception as e:
return Result(
data=False,
errors=[ErrorInfo(kind=ErrorKind.INTERNAL, message=f"failed to check existing index for {file_path}: {e}", source="rag_engine._check_existing_index_result", original=e)],
)
def _read_file_content_result(self, full_path: str) -> Result[str]:
"""Read file contents. Returns Result[str]."""
try:
with open(full_path, "r", encoding="utf-8", errors="ignore") as f:
return Result(data=f.read())
except (OSError, UnicodeDecodeError) as e:
return Result(
data=None,
errors=[ErrorInfo(kind=ErrorKind.INTERNAL, message=f"failed to read {full_path}: {e}", source="rag_engine._read_file_content_result", original=e)],
)
def index_file(self, file_path: str):
"""Reads, chunks, and indexes a file into the vector store."""
if not self.config.enabled or self.collection == "mock":
@@ -259,24 +298,19 @@ class RAGEngine:
else:
return
try:
mtime = os.path.getmtime(full_path)
except Exception:
mtime_result = self._get_file_mtime_result(full_path)
if not mtime_result.ok:
return
mtime = mtime_result.data
existing_result = self._check_existing_index_result(file_path, mtime)
if existing_result.ok and existing_result.data:
return
try:
res = self.collection.get(where={"path": file_path}, limit=1, include=["metadatas"])
if res and res["metadatas"] and res["metadatas"][0]:
if res["metadatas"][0].get("mtime") == mtime:
return
except Exception:
pass
try:
with open(full_path, "r", encoding="utf-8", errors="ignore") as f:
content = f.read()
except Exception:
content_result = self._read_file_content_result(full_path)
if not content_result.ok:
return
content = content_result.data
self.collection.delete(where={"path": file_path})
+29
View File
@@ -0,0 +1,29 @@
"""Phase 13 sites 3+4 + SS 6: index_file batched migration.
index_file has 3 sites:
- Site 3 (BC at L247): try: mtime = os.path.getmtime(full_path); except Exception: return
- Site 4 (BC at L261): try: with open(full_path, ...) as f: content = f.read(); except Exception: return
- Site 6 (SS at L255): try: res = self.collection.get(...); ...; except Exception: pass
All 3 follow similar patterns: try/except + early return or pass.
"""
import sys
sys.path.insert(0, ".")
def test_phase13_sites346_index_file_no_broad_except():
"""index_file must not have bare 'except Exception'."""
import inspect
import src.rag_engine
src_text = inspect.getsource(src.rag_engine.RAGEngine.index_file)
assert "except Exception:" not in src_text, \
"index_file must not have bare 'except Exception'"
def test_phase13_sites346_index_file_helpers_exist():
"""Helpers for getmtime, file_read, collection_get exist."""
import src.rag_engine
# Check via dir() of the class
members = [m for m in dir(src.rag_engine.RAGEngine) if 'result' in m.lower()]
# We expect 3 new _result helpers added to index_file
assert len(members) >= 3, f"Expected at least 3 _result helpers, got {members}"