355811635d
Two bugs in src/rag_engine.py were causing 'NoneType object has no attribute get'
in the live_gui RAG tests (test_rag_phase4_final_verify,
test_rag_phase4_stress):
1. _validate_collection_dim_result:148
Old: if not embeddings or len(embeddings) == 0:
New: if embeddings is None or len(embeddings) == 0:
The 'if not embeddings' check raises ValueError('The truth value of an
array with more than one element is ambiguous. Use a.any() or a.all()')
when 'embeddings' is a non-empty numpy array (which is the normal case
after documents are upserted). The exception is caught by the outer
'except Exception' which returns a non-ok Result, causing __init__ to
set self.collection = None. Subsequent 'get_all_indexed_paths()' then
fails with 'NoneType has no attribute get' on self.collection.get().
2. get_all_indexed_paths:334
Old: return list(set(m.get('path') for m in res['metadatas'] if m.get('path')))
New: return list(set(m['path'] for m in res['metadatas'] if m is not None and m.get('path')))
When chromadb returns 'metadatas=[None, ...]' (documents upserted
without metadata), 'm.get('path')' fails with AttributeError on the
first None element. Adds 'm is not None' guard.
Both fixes are defensive: the conditions that trigger them (orphan docs
without metadata, non-empty embeddings arrays) are normal valid
states that the old code couldn't handle.
New file: tests/test_rag_sync_none_error.py
3 unit tests covering both bugs:
- test_dim_check_does_not_raise_on_non_empty_ndarray
- test_get_all_indexed_paths_handles_none_metadata
- test_get_all_indexed_paths_returns_paths_with_metadata
Verified:
- 3/3 focused tests pass
- test_rag_phase4_final_verify.py::test_phase4_final_verify PASSES (was failing)
- test_rag_phase4_stress.py::test_rag_large_codebase_verification_sim PASSES (was failing)
- test_rag_visual_sim.py::test_rag_full_lifecycle_sim PASSES (still passing)
82 lines
2.6 KiB
Python
82 lines
2.6 KiB
Python
from __future__ import annotations
|
|
import os
|
|
import shutil
|
|
import tempfile
|
|
import pytest
|
|
|
|
from src.rag_engine import RAGEngine
|
|
from src.models import RAGConfig, VectorStoreConfig
|
|
|
|
|
|
LOCAL_EMBED_DIM = 384
|
|
|
|
|
|
@pytest.fixture
|
|
def temp_workspace():
|
|
tmp = tempfile.mkdtemp()
|
|
try:
|
|
yield tmp
|
|
finally:
|
|
for _ in range(5):
|
|
try:
|
|
shutil.rmtree(tmp, ignore_errors=False)
|
|
break
|
|
except (OSError, PermissionError):
|
|
import time
|
|
time.sleep(0.2)
|
|
else:
|
|
shutil.rmtree(tmp, ignore_errors=True)
|
|
|
|
|
|
def _make_chroma_collection_with_orphan_no_metadata(db_path, collection_name, dim=LOCAL_EMBED_DIM):
|
|
from src.rag_engine import _get_chromadb
|
|
chromadb_module, _ = _get_chromadb()
|
|
client = chromadb_module.PersistentClient(path=db_path)
|
|
col = client.get_or_create_collection(name=collection_name)
|
|
emb = [[0.01 * i for i in range(dim)]]
|
|
col.upsert(ids=["orphan"], embeddings=emb, documents=["orphan doc"])
|
|
return client, col
|
|
|
|
|
|
def _build_engine(workspace, collection_name):
|
|
cfg = RAGConfig(
|
|
enabled=True,
|
|
embedding_provider="local",
|
|
vector_store=VectorStoreConfig(provider="chroma", collection_name=collection_name),
|
|
)
|
|
return RAGEngine(cfg, workspace)
|
|
|
|
|
|
def test_dim_check_does_not_raise_on_non_empty_ndarray(temp_workspace):
|
|
db_path = os.path.abspath(os.path.join(temp_workspace, ".slop_cache", "chroma_test_dim"))
|
|
os.makedirs(db_path, exist_ok=True)
|
|
_make_chroma_collection_with_orphan_no_metadata(db_path, "test_dim")
|
|
|
|
engine = _build_engine(temp_workspace, "test_dim")
|
|
assert engine.collection is not None
|
|
|
|
|
|
def test_get_all_indexed_paths_handles_none_metadata(temp_workspace):
|
|
db_path = os.path.abspath(os.path.join(temp_workspace, ".slop_cache", "chroma_test_orphan"))
|
|
os.makedirs(db_path, exist_ok=True)
|
|
_make_chroma_collection_with_orphan_no_metadata(db_path, "test_orphan")
|
|
|
|
engine = _build_engine(temp_workspace, "test_orphan")
|
|
paths = engine.get_all_indexed_paths()
|
|
assert paths == []
|
|
|
|
|
|
def test_get_all_indexed_paths_returns_paths_with_metadata(temp_workspace):
|
|
db_path = os.path.abspath(os.path.join(temp_workspace, ".slop_cache", "chroma_test_with_meta"))
|
|
os.makedirs(db_path, exist_ok=True)
|
|
from src.rag_engine import _get_chromadb
|
|
chromadb_module, _ = _get_chromadb()
|
|
client = chromadb_module.PersistentClient(path=db_path)
|
|
col = client.get_or_create_collection(name="test_with_meta")
|
|
emb = [[0.01 * i for i in range(LOCAL_EMBED_DIM)]]
|
|
col.upsert(ids=["doc1"], embeddings=emb, documents=["doc1"], metadatas=[{"path": "f.txt", "chunk": 0}])
|
|
|
|
engine = _build_engine(temp_workspace, "test_with_meta")
|
|
paths = engine.get_all_indexed_paths()
|
|
assert "f.txt" in paths
|