Private
Public Access
0
0
Files
manual_slop/tests/test_rag_engine_ready_status_bug.py
T

132 lines
5.5 KiB
Python

"""Regression tests for the RAG engine 'ready' status lie.
The bug: when the local embedding provider is configured but
sentence-transformers is not installed, RAGEngine.__init__'s
_init_embedding_provider raises ImportError. The exception
propagates out, so the engine is never created.
However, the AppController._sync_rag_engine() then has a
fallthrough path: if the engine is None (because the constructor
failed), it calls self._set_rag_status('ready') via the else
branch. This is a LIE - the engine isn't actually ready.
The result: the GUI's RAG panel reports 'ready' status. The user
triggers a RAG retrieval. The AI attempts to use the RAG engine.
The engine either has a broken embedding provider (the second
_init call succeeded but the embedding_provider is None) or
the retrieval fails silently. The RAG context is not in the
user's history.
The fix: when sentence-transformers is unavailable, the RAG
sync should NOT report 'ready'. It should report an error
status that includes the missing dependency.
"""
import pytest
import sys
import os
from typing import Any
from unittest.mock import patch, MagicMock
ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
sys.path.insert(0, ROOT)
sys.path.insert(0, os.path.join(ROOT, "src"))
def test_rag_engine_init_with_local_provider_raises_when_sentence_transformers_missing() -> None:
"""RAGEngine(config_with_local_embedding, base_dir) raises ImportError
when sentence-transformers is not installed.
"""
from src import models
config = models.RAGConfig(
enabled=True,
embedding_provider="local",
vector_store=models.VectorStoreConfig(provider="chroma", collection_name="test"),
)
# Force the import to fail
with patch.dict(sys.modules, {"sentence_transformers": None}):
with patch("src.rag_engine._get_sentence_transformers",
side_effect=ImportError("Local RAG embeddings require sentence-transformers.")):
from src.rag_engine import RAGEngine
with pytest.raises(ImportError, match="sentence-transformers"):
RAGEngine(config, base_dir=".")
def test_rag_status_remains_error_after_sync_failure() -> None:
"""After a failed _sync_rag_engine, the rag_status must remain
'error: ...' not 'ready'. The bug: _sync_rag_engine's else branch
sets status to 'ready' if the engine has no files OR if the engine
is None. The check `if self.rag_engine and self.rag_engine.is_empty()`
is False when rag_engine is None, so the else branch fires and sets
status to 'ready' - a lie.
"""
from src.app_controller import AppController
# Construct a controller, bypassing the full __init__
ctrl = AppController.__new__(AppController)
ctrl.rag_config = MagicMock()
ctrl.rag_config.enabled = True
ctrl.rag_engine = None
ctrl.files = []
# Use a mock io_pool to capture the _task function
submitted_tasks = []
class MockFuture:
def add_done_callback(self, fn): pass
def result(self, timeout=None): return None
class MockPool:
def submit(self, fn, *args, **kwargs):
submitted_tasks.append(fn)
return MockFuture()
ctrl._io_pool = MockPool()
captured_statuses = []
def capture(status):
captured_statuses.append(status)
ctrl._set_rag_status = capture
with patch("src.rag_engine.RAGEngine",
side_effect=ImportError("Local RAG embeddings require sentence-transformers.")):
ctrl._sync_rag_engine()
# Now run the submitted task synchronously
assert len(submitted_tasks) == 1, f"Expected 1 task submitted, got {len(submitted_tasks)}"
submitted_tasks[0]()
# Check that 'error' is in the captured statuses
assert any("error" in s for s in captured_statuses), (
f"After sync failure, rag_status should be 'error: ...' but was "
f"{captured_statuses!r}. The bug: _sync_rag_engine sets status to "
f"'ready' even when the engine failed to initialize."
)
# Also: 'ready' should NOT be in the captured statuses
assert "ready" not in captured_statuses, (
f"After sync failure, rag_status should NOT be 'ready' but was "
f"{captured_statuses!r}. The bug: _sync_rag_engine's else branch sets "
f"status to 'ready' even when the engine failed to initialize."
)
def test_rag_engine_init_with_failing_local_embedding_leaves_engine_broken() -> None:
"""When the LocalEmbeddingProvider ctor fails (e.g. sentence-transformers
raises), the RAGEngine ctor itself raises ImportError. The sync path
catches this and sets rag_status to 'error: ...' (the existing
test_rag_status_remains_error_after_sync_failure covers this).
This test verifies the precondition: that RAGEngine.__init__ actually
raises ImportError when the local embedding provider can't be built,
rather than silently swallowing the error and leaving a broken engine.
The 2026-06-08 RAG batch failure root-cause analysis showed the
failure mode was NOT "engine is created with embedding_provider=None"
(which was the original test docstring's claim) — the constructor
RAISES. The actual bug was in the sync path's fallback to 'ready'
status, which test_rag_status_remains_error_after_sync_failure
verifies. This test is the lower-level sanity check that the
precondition for the sync-path test is real.
"""
from src import models
from src import rag_engine
config = models.RAGConfig(
enabled=True,
embedding_provider="local",
vector_store=models.VectorStoreConfig(provider="chroma", collection_name="t"),
)
with patch("src.rag_engine._get_sentence_transformers",
side_effect=ImportError("Local RAG embeddings require sentence-transformers.")):
with pytest.raises(ImportError, match="sentence-transformers"):
rag_engine.RAGEngine(config, base_dir=".")