Private
Public Access
0
0
Files
manual_slop/tests/test_rag_sync_none_error.py
T
ed 355811635d fix(rag): handle None metadata in get_all_indexed_paths and non-empty numpy in dim check
Two bugs in src/rag_engine.py were causing 'NoneType object has no attribute get'
in the live_gui RAG tests (test_rag_phase4_final_verify,
test_rag_phase4_stress):

1. _validate_collection_dim_result:148
   Old:  if not embeddings or len(embeddings) == 0:
   New:  if embeddings is None or len(embeddings) == 0:
   The 'if not embeddings' check raises ValueError('The truth value of an
   array with more than one element is ambiguous. Use a.any() or a.all()')
   when 'embeddings' is a non-empty numpy array (which is the normal case
   after documents are upserted). The exception is caught by the outer
   'except Exception' which returns a non-ok Result, causing __init__ to
   set self.collection = None. Subsequent 'get_all_indexed_paths()' then
   fails with 'NoneType has no attribute get' on self.collection.get().

2. get_all_indexed_paths:334
   Old:  return list(set(m.get('path') for m in res['metadatas'] if m.get('path')))
   New:  return list(set(m['path'] for m in res['metadatas'] if m is not None and m.get('path')))
   When chromadb returns 'metadatas=[None, ...]' (documents upserted
   without metadata), 'm.get('path')' fails with AttributeError on the
   first None element. Adds 'm is not None' guard.

Both fixes are defensive: the conditions that trigger them (orphan docs
without metadata, non-empty embeddings arrays) are normal valid
states that the old code couldn't handle.

New file: tests/test_rag_sync_none_error.py
   3 unit tests covering both bugs:
   - test_dim_check_does_not_raise_on_non_empty_ndarray
   - test_get_all_indexed_paths_handles_none_metadata
   - test_get_all_indexed_paths_returns_paths_with_metadata

Verified:
- 3/3 focused tests pass
- test_rag_phase4_final_verify.py::test_phase4_final_verify PASSES (was failing)
- test_rag_phase4_stress.py::test_rag_large_codebase_verification_sim PASSES (was failing)
- test_rag_visual_sim.py::test_rag_full_lifecycle_sim PASSES (still passing)
2026-06-16 00:09:02 -04:00

82 lines
2.6 KiB
Python

from __future__ import annotations
import os
import shutil
import tempfile
import pytest
from src.rag_engine import RAGEngine
from src.models import RAGConfig, VectorStoreConfig
LOCAL_EMBED_DIM = 384
@pytest.fixture
def temp_workspace():
tmp = tempfile.mkdtemp()
try:
yield tmp
finally:
for _ in range(5):
try:
shutil.rmtree(tmp, ignore_errors=False)
break
except (OSError, PermissionError):
import time
time.sleep(0.2)
else:
shutil.rmtree(tmp, ignore_errors=True)
def _make_chroma_collection_with_orphan_no_metadata(db_path, collection_name, dim=LOCAL_EMBED_DIM):
from src.rag_engine import _get_chromadb
chromadb_module, _ = _get_chromadb()
client = chromadb_module.PersistentClient(path=db_path)
col = client.get_or_create_collection(name=collection_name)
emb = [[0.01 * i for i in range(dim)]]
col.upsert(ids=["orphan"], embeddings=emb, documents=["orphan doc"])
return client, col
def _build_engine(workspace, collection_name):
cfg = RAGConfig(
enabled=True,
embedding_provider="local",
vector_store=VectorStoreConfig(provider="chroma", collection_name=collection_name),
)
return RAGEngine(cfg, workspace)
def test_dim_check_does_not_raise_on_non_empty_ndarray(temp_workspace):
db_path = os.path.abspath(os.path.join(temp_workspace, ".slop_cache", "chroma_test_dim"))
os.makedirs(db_path, exist_ok=True)
_make_chroma_collection_with_orphan_no_metadata(db_path, "test_dim")
engine = _build_engine(temp_workspace, "test_dim")
assert engine.collection is not None
def test_get_all_indexed_paths_handles_none_metadata(temp_workspace):
db_path = os.path.abspath(os.path.join(temp_workspace, ".slop_cache", "chroma_test_orphan"))
os.makedirs(db_path, exist_ok=True)
_make_chroma_collection_with_orphan_no_metadata(db_path, "test_orphan")
engine = _build_engine(temp_workspace, "test_orphan")
paths = engine.get_all_indexed_paths()
assert paths == []
def test_get_all_indexed_paths_returns_paths_with_metadata(temp_workspace):
db_path = os.path.abspath(os.path.join(temp_workspace, ".slop_cache", "chroma_test_with_meta"))
os.makedirs(db_path, exist_ok=True)
from src.rag_engine import _get_chromadb
chromadb_module, _ = _get_chromadb()
client = chromadb_module.PersistentClient(path=db_path)
col = client.get_or_create_collection(name="test_with_meta")
emb = [[0.01 * i for i in range(LOCAL_EMBED_DIM)]]
col.upsert(ids=["doc1"], embeddings=emb, documents=["doc1"], metadatas=[{"path": "f.txt", "chunk": 0}])
engine = _build_engine(temp_workspace, "test_with_meta")
paths = engine.get_all_indexed_paths()
assert "f.txt" in paths