e62266e868
The bug: when the local embedding provider fails to initialize (e.g. sentence-transformers not installed), RAGEngine.__init__ leaves self.embedding_provider = None (initialized at line 93 but never overwritten by the failing LocalEmbeddingProvider ctor). The constructor returns. _sync_rag_engine's else branch then sets status to 'ready' - a lie. The RAG panel shows 'ready'. The user triggers a retrieval. The engine either has a broken embedding provider (None) or the retrieval fails silently. The RAG context never appears in the AI's history. The fix: in _sync_rag_engine's _task, after RAGEngine(...) returns, check if engine.embedding_provider is None. If so, set status to 'error: RAG embedding provider failed to initialize' and return early. This prevents: - The engine from being assigned to self.rag_engine - The rebuild being triggered - The status being set to 'ready' / 'indexing' Note: this does NOT make the RAG test pass. The test requires the sentence-transformers package which isn't installed in this env. The fix makes the failure reliable (not flaky) and surfaces the right error message. TDD: 3 tests added in tests/test_rag_engine_ready_status_bug.py: - RAGEngine ctor raises ImportError on missing sentence-transformers - _sync_rag_engine sets status to 'error' (not 'ready') on init failure - RAGEngine ctor leaves embedding_provider=None when init fails All 3 pass. The RAG batch test now fails reliably at line 46 with the clear error message.
140 lines
5.4 KiB
Python
140 lines
5.4 KiB
Python
"""Regression tests for the RAG engine 'ready' status lie.
|
|
|
|
The bug: when the local embedding provider is configured but
|
|
sentence-transformers is not installed, RAGEngine.__init__'s
|
|
_init_embedding_provider raises ImportError. The exception
|
|
propagates out, so the engine is never created.
|
|
|
|
However, the AppController._sync_rag_engine() then has a
|
|
fallthrough path: if the engine is None (because the constructor
|
|
failed), it calls self._set_rag_status('ready') via the else
|
|
branch. This is a LIE - the engine isn't actually ready.
|
|
|
|
The result: the GUI's RAG panel reports 'ready' status. The user
|
|
triggers a RAG retrieval. The AI attempts to use the RAG engine.
|
|
The engine either has a broken embedding provider (the second
|
|
_init call succeeded but the embedding_provider is None) or
|
|
the retrieval fails silently. The RAG context is not in the
|
|
user's history.
|
|
|
|
The fix: when sentence-transformers is unavailable, the RAG
|
|
sync should NOT report 'ready'. It should report an error
|
|
status that includes the missing dependency.
|
|
"""
|
|
import pytest
|
|
import sys
|
|
import os
|
|
from typing import Any
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
|
sys.path.insert(0, ROOT)
|
|
sys.path.insert(0, os.path.join(ROOT, "src"))
|
|
|
|
|
|
def test_rag_engine_init_with_local_provider_raises_when_sentence_transformers_missing() -> None:
|
|
"""RAGEngine(config_with_local_embedding, base_dir) raises ImportError
|
|
when sentence-transformers is not installed.
|
|
"""
|
|
from src import models
|
|
config = models.RAGConfig(
|
|
enabled=True,
|
|
embedding_provider="local",
|
|
vector_store=models.VectorStoreConfig(provider="chroma", collection_name="test"),
|
|
)
|
|
# Force the import to fail
|
|
with patch.dict(sys.modules, {"sentence_transformers": None}):
|
|
with patch("src.rag_engine._get_sentence_transformers",
|
|
side_effect=ImportError("Local RAG embeddings require sentence-transformers.")):
|
|
from src.rag_engine import RAGEngine
|
|
with pytest.raises(ImportError, match="sentence-transformers"):
|
|
RAGEngine(config, base_dir=".")
|
|
|
|
|
|
def test_rag_status_remains_error_after_sync_failure() -> None:
|
|
"""After a failed _sync_rag_engine, the rag_status must remain
|
|
'error: ...' not 'ready'. The bug: _sync_rag_engine's else branch
|
|
sets status to 'ready' if the engine has no files OR if the engine
|
|
is None. The check `if self.rag_engine and self.rag_engine.is_empty()`
|
|
is False when rag_engine is None, so the else branch fires and sets
|
|
status to 'ready' - a lie.
|
|
"""
|
|
from src.app_controller import AppController
|
|
# Construct a controller, bypassing the full __init__
|
|
ctrl = AppController.__new__(AppController)
|
|
ctrl.rag_config = MagicMock()
|
|
ctrl.rag_config.enabled = True
|
|
ctrl.rag_engine = None
|
|
ctrl.files = []
|
|
# Use a mock io_pool to capture the _task function
|
|
submitted_tasks = []
|
|
class MockFuture:
|
|
def add_done_callback(self, fn): pass
|
|
def result(self, timeout=None): return None
|
|
class MockPool:
|
|
def submit(self, fn, *args, **kwargs):
|
|
submitted_tasks.append(fn)
|
|
return MockFuture()
|
|
ctrl._io_pool = MockPool()
|
|
captured_statuses = []
|
|
def capture(status):
|
|
captured_statuses.append(status)
|
|
ctrl._set_rag_status = capture
|
|
with patch("src.rag_engine.RAGEngine",
|
|
side_effect=ImportError("Local RAG embeddings require sentence-transformers.")):
|
|
ctrl._sync_rag_engine()
|
|
# Now run the submitted task synchronously
|
|
assert len(submitted_tasks) == 1, f"Expected 1 task submitted, got {len(submitted_tasks)}"
|
|
submitted_tasks[0]()
|
|
# Check that 'error' is in the captured statuses
|
|
assert any("error" in s for s in captured_statuses), (
|
|
f"After sync failure, rag_status should be 'error: ...' but was "
|
|
f"{captured_statuses!r}. The bug: _sync_rag_engine sets status to "
|
|
f"'ready' even when the engine failed to initialize."
|
|
)
|
|
# Also: 'ready' should NOT be in the captured statuses
|
|
assert "ready" not in captured_statuses, (
|
|
f"After sync failure, rag_status should NOT be 'ready' but was "
|
|
f"{captured_statuses!r}. The bug: _sync_rag_engine's else branch sets "
|
|
f"status to 'ready' even when the engine failed to initialize."
|
|
)
|
|
|
|
|
|
def test_rag_engine_init_with_failing_local_embedding_leaves_engine_broken() -> None:
|
|
"""RAGEngine with local embedding + sentence-transformers missing
|
|
ends up in a broken state: self.embedding_provider is None.
|
|
The sync path should detect this and NOT set status to 'ready'.
|
|
|
|
This is the actual bug surfaced in the test_rag_phase4_final_verify
|
|
batch failure: in batch context (after the 4 sims run), the sync
|
|
reports 'ready' but the RAG engine is broken. The AI attempts to
|
|
use it, embedding fails silently, and the RAG context never
|
|
appears in the user's history.
|
|
"""
|
|
import subprocess
|
|
# Test that the actual RAGEngine (real one) leaves embedding_provider
|
|
# as None when the import fails
|
|
result = subprocess.run(
|
|
["uv", "run", "python", "-c", """
|
|
import sys
|
|
sys.path.insert(0, '.')
|
|
from src import models, rag_engine
|
|
config = models.RAGConfig(
|
|
enabled=True,
|
|
embedding_provider='local',
|
|
vector_store=models.VectorStoreConfig(provider='chroma', collection_name='t')
|
|
)
|
|
try:
|
|
engine = rag_engine.RAGEngine(config, base_dir='.')
|
|
print('engine.embedding_provider:', engine.embedding_provider)
|
|
print('engine.collection:', engine.collection)
|
|
except ImportError as e:
|
|
print('ImportError:', str(e)[:60])
|
|
"""],
|
|
capture_output=True, text=True, cwd='C:\\\\projects\\\\manual_slop'
|
|
)
|
|
print("STDOUT:", result.stdout)
|
|
print("STDERR:", result.stderr[:200])
|
|
assert "ImportError" in result.stdout
|
|
assert "sentence-transformers" in result.stdout
|