From ee50c26556ee0c0aeb654a7e0b116a09a8d84cb7 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sat, 20 Jun 2026 16:10:35 -0400 Subject: [PATCH] refactor(rag_engine): migrate 3 index_file sites to Result[T] (Phase 13 sites 3+4+SS) index_file had 3 try/except sites with similar patterns: Site 3 (BC at L247): try: mtime = os.path.getmtime(full_path); except Exception: return Site 4 (BC at L261): try: with open(full_path, ...) as f: content = f.read(); except Exception: return Site 6 (SS at L255): try: res = self.collection.get(...); ...; except Exception: pass Body: broad catch + early return/pass = SS-style violation. New helpers: - _get_file_mtime_result(full_path) -> Result[float] Catches OSError only (specific to file stat failures). - _check_existing_index_result(file_path, mtime) -> Result[bool] Catches broad Exception (chromadb collection.get failures vary). Returns data=True if already indexed (skip), data=False if needs re-indexing. - _read_file_content_result(full_path) -> Result[str] Catches (OSError, UnicodeDecodeError) (file I/O + encoding failures). Legacy index_file calls each helper; on Result errors, returns early (preserving the original behavior of skipping the file on failure). Audit: rag_engine BC 3 -> 1 (L341 _async_search_mcp remaining). SS: 1 -> 0. --- src/rag_engine.py | 64 +++++++++++++++++++++------- tests/tier2/phase13_sites346_test.py | 29 +++++++++++++ 2 files changed, 78 insertions(+), 15 deletions(-) create mode 100644 tests/tier2/phase13_sites346_test.py diff --git a/src/rag_engine.py b/src/rag_engine.py index 15ce73fb..ecdda353 100644 --- a/src/rag_engine.py +++ b/src/rag_engine.py @@ -241,6 +241,45 @@ class RAGEngine: return self._chunk_text(content) return chunks + def _get_file_mtime_result(self, full_path: str) -> Result[float]: + """Get file modification time. Returns Result[float].""" + try: + return Result(data=os.path.getmtime(full_path)) + except OSError as e: + return Result( + data=None, + errors=[ErrorInfo(kind=ErrorKind.INTERNAL, message=f"failed to get mtime for {full_path}: {e}", source="rag_engine._get_file_mtime_result", original=e)], + ) + + def _check_existing_index_result(self, file_path: str, mtime: float) -> Result[bool]: + """Check if the file is already indexed at the current mtime. + + Returns Result(data=True) if already indexed (skip), Result(data=False) + if needs re-indexing, Result(data=False, errors=[ErrorInfo]) on collection failure. + """ + try: + res = self.collection.get(where={"path": file_path}, limit=1, include=["metadatas"]) + if res and res["metadatas"] and res["metadatas"][0]: + if res["metadatas"][0].get("mtime") == mtime: + return Result(data=True) + return Result(data=False) + except Exception as e: + return Result( + data=False, + errors=[ErrorInfo(kind=ErrorKind.INTERNAL, message=f"failed to check existing index for {file_path}: {e}", source="rag_engine._check_existing_index_result", original=e)], + ) + + def _read_file_content_result(self, full_path: str) -> Result[str]: + """Read file contents. Returns Result[str].""" + try: + with open(full_path, "r", encoding="utf-8", errors="ignore") as f: + return Result(data=f.read()) + except (OSError, UnicodeDecodeError) as e: + return Result( + data=None, + errors=[ErrorInfo(kind=ErrorKind.INTERNAL, message=f"failed to read {full_path}: {e}", source="rag_engine._read_file_content_result", original=e)], + ) + def index_file(self, file_path: str): """Reads, chunks, and indexes a file into the vector store.""" if not self.config.enabled or self.collection == "mock": @@ -259,24 +298,19 @@ class RAGEngine: else: return - try: - mtime = os.path.getmtime(full_path) - except Exception: + mtime_result = self._get_file_mtime_result(full_path) + if not mtime_result.ok: + return + mtime = mtime_result.data + + existing_result = self._check_existing_index_result(file_path, mtime) + if existing_result.ok and existing_result.data: return - try: - res = self.collection.get(where={"path": file_path}, limit=1, include=["metadatas"]) - if res and res["metadatas"] and res["metadatas"][0]: - if res["metadatas"][0].get("mtime") == mtime: - return - except Exception: - pass - - try: - with open(full_path, "r", encoding="utf-8", errors="ignore") as f: - content = f.read() - except Exception: + content_result = self._read_file_content_result(full_path) + if not content_result.ok: return + content = content_result.data self.collection.delete(where={"path": file_path}) diff --git a/tests/tier2/phase13_sites346_test.py b/tests/tier2/phase13_sites346_test.py new file mode 100644 index 00000000..4fef18f6 --- /dev/null +++ b/tests/tier2/phase13_sites346_test.py @@ -0,0 +1,29 @@ +"""Phase 13 sites 3+4 + SS 6: index_file batched migration. + +index_file has 3 sites: +- Site 3 (BC at L247): try: mtime = os.path.getmtime(full_path); except Exception: return +- Site 4 (BC at L261): try: with open(full_path, ...) as f: content = f.read(); except Exception: return +- Site 6 (SS at L255): try: res = self.collection.get(...); ...; except Exception: pass + +All 3 follow similar patterns: try/except + early return or pass. +""" +import sys +sys.path.insert(0, ".") + + +def test_phase13_sites346_index_file_no_broad_except(): + """index_file must not have bare 'except Exception'.""" + import inspect + import src.rag_engine + src_text = inspect.getsource(src.rag_engine.RAGEngine.index_file) + assert "except Exception:" not in src_text, \ + "index_file must not have bare 'except Exception'" + + +def test_phase13_sites346_index_file_helpers_exist(): + """Helpers for getmtime, file_read, collection_get exist.""" + import src.rag_engine + # Check via dir() of the class + members = [m for m in dir(src.rag_engine.RAGEngine) if 'result' in m.lower()] + # We expect 3 new _result helpers added to index_file + assert len(members) >= 3, f"Expected at least 3 _result helpers, got {members}" \ No newline at end of file