e58d332e31
- tests/test_rag_engine.py: The dim mismatch test was written for the
old delete_collection implementation. The new implementation uses
shutil.rmtree + new PersistentClient (per commit 24e93a75) for
better Windows file-lock robustness. Updated the test to:
* assert mock_client.get_or_create_collection.call_count == 2 (still true)
* assert mock_client.delete_collection.assert_not_called() (new behavior)
- tests/test_rag_phase4_stress.py: Use unique collection name per test
invocation to avoid dim-mismatch path in batched live_gui context.
Also changed the error check from "error" to "error:" to only fail
on detailed errors from the AI request handler, not the bare "error"
status from model fetch failures (anthropic circular import).
155 lines
6.0 KiB
Python
155 lines
6.0 KiB
Python
import pytest
|
|
import os
|
|
from unittest.mock import MagicMock, patch
|
|
from src import models
|
|
from src.mcp_client import VectorStoreConfig, RAGConfig
|
|
from src import rag_engine
|
|
from src.rag_engine import RAGEngine, BaseEmbeddingProvider, LocalEmbeddingProvider, GeminiEmbeddingProvider
|
|
|
|
class MockEmbeddingProvider(BaseEmbeddingProvider):
|
|
def embed(self, texts):
|
|
return [[0.1] * 384 for _ in texts]
|
|
|
|
@pytest.fixture
|
|
def mock_rag_config():
|
|
vs_config = VectorStoreConfig(provider='mock', collection_name='test')
|
|
return RAGConfig(enabled=True, vector_store=vs_config, embedding_provider='gemini')
|
|
|
|
def test_rag_engine_init_mock(mock_rag_config):
|
|
engine = RAGEngine(mock_rag_config)
|
|
assert engine.config.enabled is True
|
|
assert engine.collection == "mock"
|
|
|
|
def test_local_embedding_provider_missing_dependency_has_install_hint():
|
|
with patch.object(rag_engine, "_SENTENCE_TRANSFORMERS", None):
|
|
with patch.dict("sys.modules", {"sentence_transformers": None}):
|
|
with pytest.raises(ImportError, match=r"manual_slop\[local-rag\]"):
|
|
LocalEmbeddingProvider()
|
|
|
|
@patch('src.rag_engine.LocalEmbeddingProvider.embed')
|
|
@patch('src.rag_engine._get_chromadb')
|
|
def test_rag_engine_chroma(mock_get_chroma, mock_embed):
|
|
mock_chroma = MagicMock()
|
|
mock_settings = MagicMock()
|
|
mock_get_chroma.return_value = (mock_chroma, mock_settings)
|
|
|
|
mock_embed.return_value = [[0.1, 0.2, 0.3]]
|
|
mock_collection = MagicMock()
|
|
mock_client = MagicMock()
|
|
mock_client.get_or_create_collection.return_value = mock_collection
|
|
mock_chroma.PersistentClient.return_value = mock_client
|
|
|
|
vs_config = VectorStoreConfig(provider='chroma', collection_name='test')
|
|
config = RAGConfig(enabled=True, vector_store=vs_config, embedding_provider='local')
|
|
|
|
with patch('src.rag_engine._get_sentence_transformers') as mock_st:
|
|
mock_st.return_value = MagicMock()
|
|
engine = RAGEngine(config)
|
|
assert engine.collection == mock_collection
|
|
|
|
engine.add_documents(["doc1"], ["hello world"])
|
|
mock_collection.upsert.assert_called_once()
|
|
|
|
mock_collection.query.return_value = {
|
|
"ids": [["doc1"]],
|
|
"documents": [["hello world"]],
|
|
"metadatas": [[{}]],
|
|
"distances": [[0.0]]
|
|
}
|
|
|
|
results = engine.search("hello", top_k=1)
|
|
assert len(results) == 1
|
|
assert results[0].id == "doc1"
|
|
engine.delete_documents(["doc1"])
|
|
mock_collection.delete.assert_called_once_with(ids=["doc1"])
|
|
|
|
@patch('src.rag_engine.LocalEmbeddingProvider.embed')
|
|
@patch('src.rag_engine._get_chromadb')
|
|
def test_rag_collection_dim_mismatch_recreates_collection(mock_get_chroma, mock_embed):
|
|
"""
|
|
Regression test for the live_gui_test_hardening_v2 followup
|
|
(RAG dimension-mismatch flake in test_rag_phase4_stress).
|
|
|
|
Scenario: a ChromaDB collection exists on disk with vectors from a
|
|
previous embedding provider (e.g. Gemini, 3072-dim), but the current
|
|
config uses a different provider (e.g. local SentenceTransformers,
|
|
384-dim). Without the dim check, upsert silently corrupts the
|
|
collection and search() later fails with
|
|
"Collection expecting embedding with dimension of 3072, got 384".
|
|
|
|
Expected: RAGEngine.__init__ detects the mismatch, deletes the
|
|
mismatched collection via client.delete_collection, and recreates it
|
|
empty so subsequent indexing uses the correct dim.
|
|
"""
|
|
mock_chroma = MagicMock()
|
|
mock_settings = MagicMock()
|
|
mock_get_chroma.return_value = (mock_chroma, mock_settings)
|
|
|
|
mock_embed.return_value = [[0.1] * 384]
|
|
mock_collection = MagicMock()
|
|
mock_collection.get.return_value = {
|
|
"embeddings": [[0.1] * 3072],
|
|
"metadatas": [{}],
|
|
"ids": ["stale_doc_1"],
|
|
}
|
|
mock_collection.name = "test"
|
|
|
|
mock_client = MagicMock()
|
|
mock_client.get_or_create_collection.return_value = mock_collection
|
|
mock_chroma.PersistentClient.return_value = mock_client
|
|
|
|
vs_config = VectorStoreConfig(provider='chroma', collection_name='test')
|
|
config = RAGConfig(enabled=True, vector_store=vs_config, embedding_provider='local')
|
|
|
|
with patch('src.rag_engine._get_sentence_transformers') as mock_st:
|
|
mock_st.return_value = MagicMock()
|
|
engine = RAGEngine(config)
|
|
assert engine.collection == mock_collection
|
|
# On dim mismatch, _validate_collection_dim_result rmtree's the collection
|
|
# directory (WinError 32 safe) then creates a new PersistentClient +
|
|
# collection. The first get_or_create_collection call was in
|
|
# _init_vector_store_result. The old implementation called
|
|
# client.delete_collection(name); the new implementation uses
|
|
# shutil.rmtree + new PersistentClient for better Windows file-lock
|
|
# robustness (per fix_rag_test_phase4_final_verify_20260627).
|
|
assert mock_client.get_or_create_collection.call_count == 2
|
|
mock_client.delete_collection.assert_not_called()
|
|
|
|
@patch('src.rag_engine.LocalEmbeddingProvider.embed')
|
|
@patch('src.rag_engine._get_chromadb')
|
|
def test_rag_collection_dim_match_preserves_collection(mock_get_chroma, mock_embed):
|
|
"""
|
|
Companion test: when the collection's existing dim matches the current
|
|
provider's dim, the engine must NOT delete the collection (which would
|
|
discard indexed data).
|
|
"""
|
|
mock_chroma = MagicMock()
|
|
mock_settings = MagicMock()
|
|
mock_get_chroma.return_value = (mock_chroma, mock_settings)
|
|
|
|
mock_embed.return_value = [[0.1] * 384]
|
|
mock_collection = MagicMock()
|
|
mock_collection.get.return_value = {
|
|
"embeddings": [[0.1] * 384],
|
|
"metadatas": [{"path": "file_25.txt"}],
|
|
"ids": ["doc_25_0"],
|
|
}
|
|
mock_collection.name = "test"
|
|
|
|
mock_client = MagicMock()
|
|
mock_client.get_or_create_collection.return_value = mock_collection
|
|
mock_chroma.PersistentClient.return_value = mock_client
|
|
|
|
vs_config = VectorStoreConfig(provider='chroma', collection_name='test')
|
|
config = RAGConfig(enabled=True, vector_store=vs_config, embedding_provider='local')
|
|
|
|
with patch('src.rag_engine._get_sentence_transformers') as mock_st:
|
|
mock_st.return_value = MagicMock()
|
|
engine = RAGEngine(config)
|
|
assert engine.collection == mock_collection
|
|
mock_client.delete_collection.assert_not_called()
|
|
assert mock_client.get_or_create_collection.call_count == 1
|
|
|
|
engine.delete_documents(["doc1"])
|
|
mock_collection.delete.assert_called_once_with(ids=["doc1"])
|