from __future__ import annotations import os import shutil import tempfile import pytest from src.rag_engine import RAGEngine from src.models import RAGConfig, VectorStoreConfig LOCAL_EMBED_DIM = 384 @pytest.fixture def temp_workspace(): tmp = tempfile.mkdtemp() try: yield tmp finally: for _ in range(5): try: shutil.rmtree(tmp, ignore_errors=False) break except (OSError, PermissionError): import time time.sleep(0.2) else: shutil.rmtree(tmp, ignore_errors=True) def _make_chroma_collection_with_orphan_no_metadata(db_path, collection_name, dim=LOCAL_EMBED_DIM): from src.rag_engine import _get_chromadb chromadb_module, _ = _get_chromadb() client = chromadb_module.PersistentClient(path=db_path) col = client.get_or_create_collection(name=collection_name) emb = [[0.01 * i for i in range(dim)]] col.upsert(ids=["orphan"], embeddings=emb, documents=["orphan doc"]) return client, col def _build_engine(workspace, collection_name): cfg = RAGConfig( enabled=True, embedding_provider="local", vector_store=VectorStoreConfig(provider="chroma", collection_name=collection_name), ) return RAGEngine(cfg, workspace) def test_dim_check_does_not_raise_on_non_empty_ndarray(temp_workspace): db_path = os.path.abspath(os.path.join(temp_workspace, ".slop_cache", "chroma_test_dim")) os.makedirs(db_path, exist_ok=True) _make_chroma_collection_with_orphan_no_metadata(db_path, "test_dim") engine = _build_engine(temp_workspace, "test_dim") assert engine.collection is not None def test_get_all_indexed_paths_handles_none_metadata(temp_workspace): db_path = os.path.abspath(os.path.join(temp_workspace, ".slop_cache", "chroma_test_orphan")) os.makedirs(db_path, exist_ok=True) _make_chroma_collection_with_orphan_no_metadata(db_path, "test_orphan") engine = _build_engine(temp_workspace, "test_orphan") paths = engine.get_all_indexed_paths() assert paths == [] def test_get_all_indexed_paths_returns_paths_with_metadata(temp_workspace): db_path = os.path.abspath(os.path.join(temp_workspace, ".slop_cache", "chroma_test_with_meta")) os.makedirs(db_path, exist_ok=True) from src.rag_engine import _get_chromadb chromadb_module, _ = _get_chromadb() client = chromadb_module.PersistentClient(path=db_path) col = client.get_or_create_collection(name="test_with_meta") emb = [[0.01 * i for i in range(LOCAL_EMBED_DIM)]] col.upsert(ids=["doc1"], embeddings=emb, documents=["doc1"], metadatas=[{"path": "f.txt", "chunk": 0}]) engine = _build_engine(temp_workspace, "test_with_meta") paths = engine.get_all_indexed_paths() assert "f.txt" in paths