import pytest import os from unittest.mock import MagicMock, patch from src import models from src import rag_engine from src.rag_engine import RAGEngine, BaseEmbeddingProvider, LocalEmbeddingProvider, GeminiEmbeddingProvider class MockEmbeddingProvider(BaseEmbeddingProvider): def embed(self, texts): return [[0.1] * 384 for _ in texts] @pytest.fixture def mock_rag_config(): vs_config = models.VectorStoreConfig(provider='mock', collection_name='test') return models.RAGConfig(enabled=True, vector_store=vs_config, embedding_provider='gemini') def test_rag_engine_init_mock(mock_rag_config): engine = RAGEngine(mock_rag_config) assert engine.config.enabled is True assert engine.collection == "mock" def test_local_embedding_provider_missing_dependency_has_install_hint(): with patch.object(rag_engine, "_SENTENCE_TRANSFORMERS", None): with patch.dict("sys.modules", {"sentence_transformers": None}): with pytest.raises(ImportError, match=r"manual_slop\[local-rag\]"): LocalEmbeddingProvider() @patch('src.rag_engine.LocalEmbeddingProvider.embed') @patch('src.rag_engine._get_chromadb') def test_rag_engine_chroma(mock_get_chroma, mock_embed): mock_chroma = MagicMock() mock_settings = MagicMock() mock_get_chroma.return_value = (mock_chroma, mock_settings) mock_embed.return_value = [[0.1, 0.2, 0.3]] mock_collection = MagicMock() mock_client = MagicMock() mock_client.get_or_create_collection.return_value = mock_collection mock_chroma.PersistentClient.return_value = mock_client vs_config = models.VectorStoreConfig(provider='chroma', collection_name='test') config = models.RAGConfig(enabled=True, vector_store=vs_config, embedding_provider='local') with patch('src.rag_engine._get_sentence_transformers') as mock_st: mock_st.return_value = MagicMock() engine = RAGEngine(config) assert engine.collection == mock_collection engine.add_documents(["doc1"], ["hello world"]) mock_collection.upsert.assert_called_once() mock_collection.query.return_value = { "ids": [["doc1"]], "documents": [["hello world"]], "metadatas": [[{}]], "distances": [[0.0]] } results = engine.search("hello", top_k=1) assert len(results) == 1 assert results[0]["id"] == "doc1" engine.delete_documents(["doc1"]) mock_collection.delete.assert_called_once_with(ids=["doc1"]) @patch('src.rag_engine.LocalEmbeddingProvider.embed') @patch('src.rag_engine._get_chromadb') def test_rag_collection_dim_mismatch_recreates_collection(mock_get_chroma, mock_embed): """ Regression test for the live_gui_test_hardening_v2 followup (RAG dimension-mismatch flake in test_rag_phase4_stress). Scenario: a ChromaDB collection exists on disk with vectors from a previous embedding provider (e.g. Gemini, 3072-dim), but the current config uses a different provider (e.g. local SentenceTransformers, 384-dim). Without the dim check, upsert silently corrupts the collection and search() later fails with "Collection expecting embedding with dimension of 3072, got 384". Expected: RAGEngine.__init__ detects the mismatch, deletes the mismatched collection, and recreates it empty so subsequent indexing uses the correct dim. """ mock_chroma = MagicMock() mock_settings = MagicMock() mock_get_chroma.return_value = (mock_chroma, mock_settings) mock_embed.return_value = [[0.1] * 384] mock_collection = MagicMock() mock_collection.get.return_value = { "embeddings": [[0.1] * 3072], "metadatas": [{}], "ids": ["stale_doc_1"], } mock_collection.name = "test" mock_client = MagicMock() mock_client.get_or_create_collection.return_value = mock_collection mock_chroma.PersistentClient.return_value = mock_client vs_config = models.VectorStoreConfig(provider='chroma', collection_name='test') config = models.RAGConfig(enabled=True, vector_store=vs_config, embedding_provider='local') with patch('src.rag_engine._get_sentence_transformers') as mock_st: mock_st.return_value = MagicMock() engine = RAGEngine(config) assert engine.collection == mock_collection mock_client.delete_collection.assert_called_once_with("test") assert mock_client.get_or_create_collection.call_count == 2 @patch('src.rag_engine.LocalEmbeddingProvider.embed') @patch('src.rag_engine._get_chromadb') def test_rag_collection_dim_match_preserves_collection(mock_get_chroma, mock_embed): """ Companion test: when the collection's existing dim matches the current provider's dim, the engine must NOT delete the collection (which would discard indexed data). """ mock_chroma = MagicMock() mock_settings = MagicMock() mock_get_chroma.return_value = (mock_chroma, mock_settings) mock_embed.return_value = [[0.1] * 384] mock_collection = MagicMock() mock_collection.get.return_value = { "embeddings": [[0.1] * 384], "metadatas": [{"path": "file_25.txt"}], "ids": ["doc_25_0"], } mock_collection.name = "test" mock_client = MagicMock() mock_client.get_or_create_collection.return_value = mock_collection mock_chroma.PersistentClient.return_value = mock_client vs_config = models.VectorStoreConfig(provider='chroma', collection_name='test') config = models.RAGConfig(enabled=True, vector_store=vs_config, embedding_provider='local') with patch('src.rag_engine._get_sentence_transformers') as mock_st: mock_st.return_value = MagicMock() engine = RAGEngine(config) assert engine.collection == mock_collection mock_client.delete_collection.assert_not_called() assert mock_client.get_or_create_collection.call_count == 1 engine.delete_documents(["doc1"]) mock_collection.delete.assert_called_once_with(ids=["doc1"])