Private
Public Access
0
0
Files
manual_slop/tests/test_rag_engine.py
T
ed e58d332e31 test(rag): update dim mismatch test + stress test for new implementation
- tests/test_rag_engine.py: The dim mismatch test was written for the
  old delete_collection implementation. The new implementation uses
  shutil.rmtree + new PersistentClient (per commit 24e93a75) for
  better Windows file-lock robustness. Updated the test to:
  * assert mock_client.get_or_create_collection.call_count == 2 (still true)
  * assert mock_client.delete_collection.assert_not_called() (new behavior)
- tests/test_rag_phase4_stress.py: Use unique collection name per test
  invocation to avoid dim-mismatch path in batched live_gui context.
  Also changed the error check from "error" to "error:" to only fail
  on detailed errors from the AI request handler, not the bare "error"
  status from model fetch failures (anthropic circular import).
2026-06-27 21:52:18 -04:00

155 lines
6.0 KiB
Python

import pytest
import os
from unittest.mock import MagicMock, patch
from src import models
from src.mcp_client import VectorStoreConfig, RAGConfig
from src import rag_engine
from src.rag_engine import RAGEngine, BaseEmbeddingProvider, LocalEmbeddingProvider, GeminiEmbeddingProvider
class MockEmbeddingProvider(BaseEmbeddingProvider):
def embed(self, texts):
return [[0.1] * 384 for _ in texts]
@pytest.fixture
def mock_rag_config():
vs_config = VectorStoreConfig(provider='mock', collection_name='test')
return RAGConfig(enabled=True, vector_store=vs_config, embedding_provider='gemini')
def test_rag_engine_init_mock(mock_rag_config):
engine = RAGEngine(mock_rag_config)
assert engine.config.enabled is True
assert engine.collection == "mock"
def test_local_embedding_provider_missing_dependency_has_install_hint():
with patch.object(rag_engine, "_SENTENCE_TRANSFORMERS", None):
with patch.dict("sys.modules", {"sentence_transformers": None}):
with pytest.raises(ImportError, match=r"manual_slop\[local-rag\]"):
LocalEmbeddingProvider()
@patch('src.rag_engine.LocalEmbeddingProvider.embed')
@patch('src.rag_engine._get_chromadb')
def test_rag_engine_chroma(mock_get_chroma, mock_embed):
mock_chroma = MagicMock()
mock_settings = MagicMock()
mock_get_chroma.return_value = (mock_chroma, mock_settings)
mock_embed.return_value = [[0.1, 0.2, 0.3]]
mock_collection = MagicMock()
mock_client = MagicMock()
mock_client.get_or_create_collection.return_value = mock_collection
mock_chroma.PersistentClient.return_value = mock_client
vs_config = VectorStoreConfig(provider='chroma', collection_name='test')
config = RAGConfig(enabled=True, vector_store=vs_config, embedding_provider='local')
with patch('src.rag_engine._get_sentence_transformers') as mock_st:
mock_st.return_value = MagicMock()
engine = RAGEngine(config)
assert engine.collection == mock_collection
engine.add_documents(["doc1"], ["hello world"])
mock_collection.upsert.assert_called_once()
mock_collection.query.return_value = {
"ids": [["doc1"]],
"documents": [["hello world"]],
"metadatas": [[{}]],
"distances": [[0.0]]
}
results = engine.search("hello", top_k=1)
assert len(results) == 1
assert results[0].id == "doc1"
engine.delete_documents(["doc1"])
mock_collection.delete.assert_called_once_with(ids=["doc1"])
@patch('src.rag_engine.LocalEmbeddingProvider.embed')
@patch('src.rag_engine._get_chromadb')
def test_rag_collection_dim_mismatch_recreates_collection(mock_get_chroma, mock_embed):
"""
Regression test for the live_gui_test_hardening_v2 followup
(RAG dimension-mismatch flake in test_rag_phase4_stress).
Scenario: a ChromaDB collection exists on disk with vectors from a
previous embedding provider (e.g. Gemini, 3072-dim), but the current
config uses a different provider (e.g. local SentenceTransformers,
384-dim). Without the dim check, upsert silently corrupts the
collection and search() later fails with
"Collection expecting embedding with dimension of 3072, got 384".
Expected: RAGEngine.__init__ detects the mismatch, deletes the
mismatched collection via client.delete_collection, and recreates it
empty so subsequent indexing uses the correct dim.
"""
mock_chroma = MagicMock()
mock_settings = MagicMock()
mock_get_chroma.return_value = (mock_chroma, mock_settings)
mock_embed.return_value = [[0.1] * 384]
mock_collection = MagicMock()
mock_collection.get.return_value = {
"embeddings": [[0.1] * 3072],
"metadatas": [{}],
"ids": ["stale_doc_1"],
}
mock_collection.name = "test"
mock_client = MagicMock()
mock_client.get_or_create_collection.return_value = mock_collection
mock_chroma.PersistentClient.return_value = mock_client
vs_config = VectorStoreConfig(provider='chroma', collection_name='test')
config = RAGConfig(enabled=True, vector_store=vs_config, embedding_provider='local')
with patch('src.rag_engine._get_sentence_transformers') as mock_st:
mock_st.return_value = MagicMock()
engine = RAGEngine(config)
assert engine.collection == mock_collection
# On dim mismatch, _validate_collection_dim_result rmtree's the collection
# directory (WinError 32 safe) then creates a new PersistentClient +
# collection. The first get_or_create_collection call was in
# _init_vector_store_result. The old implementation called
# client.delete_collection(name); the new implementation uses
# shutil.rmtree + new PersistentClient for better Windows file-lock
# robustness (per fix_rag_test_phase4_final_verify_20260627).
assert mock_client.get_or_create_collection.call_count == 2
mock_client.delete_collection.assert_not_called()
@patch('src.rag_engine.LocalEmbeddingProvider.embed')
@patch('src.rag_engine._get_chromadb')
def test_rag_collection_dim_match_preserves_collection(mock_get_chroma, mock_embed):
"""
Companion test: when the collection's existing dim matches the current
provider's dim, the engine must NOT delete the collection (which would
discard indexed data).
"""
mock_chroma = MagicMock()
mock_settings = MagicMock()
mock_get_chroma.return_value = (mock_chroma, mock_settings)
mock_embed.return_value = [[0.1] * 384]
mock_collection = MagicMock()
mock_collection.get.return_value = {
"embeddings": [[0.1] * 384],
"metadatas": [{"path": "file_25.txt"}],
"ids": ["doc_25_0"],
}
mock_collection.name = "test"
mock_client = MagicMock()
mock_client.get_or_create_collection.return_value = mock_collection
mock_chroma.PersistentClient.return_value = mock_client
vs_config = VectorStoreConfig(provider='chroma', collection_name='test')
config = RAGConfig(enabled=True, vector_store=vs_config, embedding_provider='local')
with patch('src.rag_engine._get_sentence_transformers') as mock_st:
mock_st.return_value = MagicMock()
engine = RAGEngine(config)
assert engine.collection == mock_collection
mock_client.delete_collection.assert_not_called()
assert mock_client.get_or_create_collection.call_count == 1
engine.delete_documents(["doc1"])
mock_collection.delete.assert_called_once_with(ids=["doc1"])