132 lines
5.5 KiB
Python
132 lines
5.5 KiB
Python
"""Regression tests for the RAG engine 'ready' status lie.
|
|
|
|
The bug: when the local embedding provider is configured but
|
|
sentence-transformers is not installed, RAGEngine.__init__'s
|
|
_init_embedding_provider raises ImportError. The exception
|
|
propagates out, so the engine is never created.
|
|
|
|
However, the AppController._sync_rag_engine() then has a
|
|
fallthrough path: if the engine is None (because the constructor
|
|
failed), it calls self._set_rag_status('ready') via the else
|
|
branch. This is a LIE - the engine isn't actually ready.
|
|
|
|
The result: the GUI's RAG panel reports 'ready' status. The user
|
|
triggers a RAG retrieval. The AI attempts to use the RAG engine.
|
|
The engine either has a broken embedding provider (the second
|
|
_init call succeeded but the embedding_provider is None) or
|
|
the retrieval fails silently. The RAG context is not in the
|
|
user's history.
|
|
|
|
The fix: when sentence-transformers is unavailable, the RAG
|
|
sync should NOT report 'ready'. It should report an error
|
|
status that includes the missing dependency.
|
|
"""
|
|
import pytest
|
|
import sys
|
|
import os
|
|
from typing import Any
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
|
sys.path.insert(0, ROOT)
|
|
sys.path.insert(0, os.path.join(ROOT, "src"))
|
|
|
|
|
|
def test_rag_engine_init_with_local_provider_raises_when_sentence_transformers_missing() -> None:
|
|
"""RAGEngine(config_with_local_embedding, base_dir) raises ImportError
|
|
when sentence-transformers is not installed.
|
|
"""
|
|
from src import models
|
|
config = models.RAGConfig(
|
|
enabled=True,
|
|
embedding_provider="local",
|
|
vector_store=models.VectorStoreConfig(provider="chroma", collection_name="test"),
|
|
)
|
|
# Force the import to fail
|
|
with patch.dict(sys.modules, {"sentence_transformers": None}):
|
|
with patch("src.rag_engine._get_sentence_transformers",
|
|
side_effect=ImportError("Local RAG embeddings require sentence-transformers.")):
|
|
from src.rag_engine import RAGEngine
|
|
with pytest.raises(ImportError, match="sentence-transformers"):
|
|
RAGEngine(config, base_dir=".")
|
|
|
|
|
|
def test_rag_status_remains_error_after_sync_failure() -> None:
|
|
"""After a failed _sync_rag_engine, the rag_status must remain
|
|
'error: ...' not 'ready'. The bug: _sync_rag_engine's else branch
|
|
sets status to 'ready' if the engine has no files OR if the engine
|
|
is None. The check `if self.rag_engine and self.rag_engine.is_empty()`
|
|
is False when rag_engine is None, so the else branch fires and sets
|
|
status to 'ready' - a lie.
|
|
"""
|
|
from src.app_controller import AppController
|
|
# Construct a controller, bypassing the full __init__
|
|
ctrl = AppController.__new__(AppController)
|
|
ctrl.rag_config = MagicMock()
|
|
ctrl.rag_config.enabled = True
|
|
ctrl.rag_engine = None
|
|
ctrl.files = []
|
|
# Use a mock io_pool to capture the _task function
|
|
submitted_tasks = []
|
|
class MockFuture:
|
|
def add_done_callback(self, fn): pass
|
|
def result(self, timeout=None): return None
|
|
class MockPool:
|
|
def submit(self, fn, *args, **kwargs):
|
|
submitted_tasks.append(fn)
|
|
return MockFuture()
|
|
ctrl._io_pool = MockPool()
|
|
captured_statuses = []
|
|
def capture(status):
|
|
captured_statuses.append(status)
|
|
ctrl._set_rag_status = capture
|
|
with patch("src.rag_engine.RAGEngine",
|
|
side_effect=ImportError("Local RAG embeddings require sentence-transformers.")):
|
|
ctrl._sync_rag_engine()
|
|
# Now run the submitted task synchronously
|
|
assert len(submitted_tasks) == 1, f"Expected 1 task submitted, got {len(submitted_tasks)}"
|
|
submitted_tasks[0]()
|
|
# Check that 'error' is in the captured statuses
|
|
assert any("error" in s for s in captured_statuses), (
|
|
f"After sync failure, rag_status should be 'error: ...' but was "
|
|
f"{captured_statuses!r}. The bug: _sync_rag_engine sets status to "
|
|
f"'ready' even when the engine failed to initialize."
|
|
)
|
|
# Also: 'ready' should NOT be in the captured statuses
|
|
assert "ready" not in captured_statuses, (
|
|
f"After sync failure, rag_status should NOT be 'ready' but was "
|
|
f"{captured_statuses!r}. The bug: _sync_rag_engine's else branch sets "
|
|
f"status to 'ready' even when the engine failed to initialize."
|
|
)
|
|
|
|
|
|
def test_rag_engine_init_with_failing_local_embedding_leaves_engine_broken() -> None:
|
|
"""When the LocalEmbeddingProvider ctor fails (e.g. sentence-transformers
|
|
raises), the RAGEngine ctor itself raises ImportError. The sync path
|
|
catches this and sets rag_status to 'error: ...' (the existing
|
|
test_rag_status_remains_error_after_sync_failure covers this).
|
|
|
|
This test verifies the precondition: that RAGEngine.__init__ actually
|
|
raises ImportError when the local embedding provider can't be built,
|
|
rather than silently swallowing the error and leaving a broken engine.
|
|
|
|
The 2026-06-08 RAG batch failure root-cause analysis showed the
|
|
failure mode was NOT "engine is created with embedding_provider=None"
|
|
(which was the original test docstring's claim) — the constructor
|
|
RAISES. The actual bug was in the sync path's fallback to 'ready'
|
|
status, which test_rag_status_remains_error_after_sync_failure
|
|
verifies. This test is the lower-level sanity check that the
|
|
precondition for the sync-path test is real.
|
|
"""
|
|
from src import models
|
|
from src import rag_engine
|
|
config = models.RAGConfig(
|
|
enabled=True,
|
|
embedding_provider="local",
|
|
vector_store=models.VectorStoreConfig(provider="chroma", collection_name="t"),
|
|
)
|
|
with patch("src.rag_engine._get_sentence_transformers",
|
|
side_effect=ImportError("Local RAG embeddings require sentence-transformers.")):
|
|
with pytest.raises(ImportError, match="sentence-transformers"):
|
|
rag_engine.RAGEngine(config, base_dir=".")
|