fix(rag): wipe chroma dir on dim mismatch instead of delete_collection
When the existing collection has embeddings from a different embedding provider (e.g. Gemini 3072-dim vs local 384-dim), the prior approach of calling client.delete_collection() fails with 'RustBindingsAPI object has no attribute bindings' in chromadb 1.5.x when the underlying state is corrupted. rmtree is reliable and re-creates a fresh empty collection. Also fixes: - 'The truth value of an empty array is ambiguous' on numpy 2.x by using try/except around len() instead of truthiness check - WinError 32 on rmtree by closing the chroma client first Verified: tests/test_rag_phase4_final_verify.py passes in isolation in 7.75s after this fix. The test still fails in batch context due to a separate io_pool race condition (multiple _sync_rag_engine calls collide when the test sets rag_enabled, rag_source, and rag_emb_provider in sequence). The race is in app_controller.py and is out of scope for this defensive fix. Note: tests/test_rag_engine.py has explicit unit tests for test_rag_collection_dim_mismatch_recreates_collection and test_rag_collection_dim_match_preserves_collection which exercise this code path.
This commit is contained in:
+58
-22
@@ -129,36 +129,72 @@ class RAGEngine:
|
||||
Detect dimension mismatch between an existing collection's vectors and
|
||||
the current embedding provider's output. When mismatched (e.g. the user
|
||||
switched from Gemini 3072-dim to local 384-dim, or vice versa), the
|
||||
collection is deleted and recreated empty so the next index pass
|
||||
populates it with the correct dim. Prevents silent corruption that
|
||||
would later surface as a search error ("Collection expecting
|
||||
embedding with dimension of X, got Y") and hang live_gui tests.
|
||||
collection is wiped at the directory level (not via delete_collection,
|
||||
which can fail on corrupted state in chromadb 1.5.x with
|
||||
"RustBindingsAPI object has no attribute bindings") so the next
|
||||
index pass populates it with the correct dim. Prevents silent
|
||||
corruption that would later surface as a search error
|
||||
("Collection expecting embedding with dimension of X, got Y") and
|
||||
hang live_gui tests.
|
||||
[C: tests/test_rag_engine.py:test_rag_collection_dim_mismatch_recreates_collection, tests/test_rag_engine.py:test_rag_collection_dim_match_preserves_collection]
|
||||
"""
|
||||
if self.collection is None or self.collection == "mock" or self.embedding_provider is None:
|
||||
return
|
||||
try:
|
||||
res = self.collection.get(limit=1, include=["embeddings"])
|
||||
if not res:
|
||||
return
|
||||
embeddings = res.get("embeddings") if isinstance(res, dict) else None
|
||||
if not embeddings or len(embeddings) == 0:
|
||||
return
|
||||
existing_dim = len(embeddings[0])
|
||||
expected_dim = len(self.embedding_provider.embed(["__rag_dim_check__"])[0])
|
||||
if existing_dim == expected_dim:
|
||||
return
|
||||
sys.stderr.write(
|
||||
f"RAG: Collection '{self.collection.name}' dim mismatch "
|
||||
f"(existing={existing_dim}, expected={expected_dim}). "
|
||||
f"Recreating collection to prevent silent corruption.\n"
|
||||
)
|
||||
sys.stderr.flush()
|
||||
self.client.delete_collection(self.collection.name)
|
||||
self.collection = self.client.get_or_create_collection(name=self.collection.name)
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"RAG: Failed to validate collection dim: {e}\n")
|
||||
sys.stderr.write(f"RAG: Failed to read collection for dim check: {e}\n")
|
||||
sys.stderr.flush()
|
||||
return
|
||||
if not res:
|
||||
return
|
||||
embeddings = res.get("embeddings") if isinstance(res, dict) else None
|
||||
if embeddings is None:
|
||||
return
|
||||
# Use numpy-safe emptiness check (numpy 2.x disallows truthiness on empty arrays)
|
||||
try:
|
||||
if len(embeddings) == 0:
|
||||
return
|
||||
except TypeError:
|
||||
return
|
||||
existing_dim = len(embeddings[0])
|
||||
try:
|
||||
expected_dim = len(self.embedding_provider.embed(["__rag_dim_check__"])[0])
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"RAG: Failed to compute expected dim: {e}\n")
|
||||
sys.stderr.flush()
|
||||
return
|
||||
if existing_dim == expected_dim:
|
||||
return
|
||||
sys.stderr.write(
|
||||
f"RAG: Collection '{self.collection.name}' dim mismatch "
|
||||
f"(existing={existing_dim}, expected={expected_dim}). "
|
||||
f"Wiping chroma dir to prevent silent corruption.\n"
|
||||
)
|
||||
sys.stderr.flush()
|
||||
# Wipe the entire chroma dir (not via delete_collection which
|
||||
# fails on corrupted state in chromadb 1.5.x with
|
||||
# "RustBindingsAPI object has no attribute bindings"). Rmtree is
|
||||
# reliable and re-creates a fresh empty collection.
|
||||
import shutil as _shutil
|
||||
# Close the chroma client first to release file handles. Without
|
||||
# this, rmtree fails with WinError 32 on Windows.
|
||||
try:
|
||||
if hasattr(self, 'client') and self.client and self.client != "mock":
|
||||
self.client.close()
|
||||
except Exception:
|
||||
pass
|
||||
self.client = None
|
||||
self.collection = None
|
||||
if hasattr(self, 'base_dir') and self.base_dir:
|
||||
db_path = os.path.abspath(os.path.join(self.base_dir, ".slop_cache", f"chroma_{self.config.vector_store.collection_name}"))
|
||||
if os.path.isdir(db_path):
|
||||
try:
|
||||
_shutil.rmtree(db_path)
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"RAG: Failed to wipe chroma dir: {e}\n")
|
||||
sys.stderr.flush()
|
||||
self._init_vector_store()
|
||||
|
||||
def is_empty(self) -> bool:
|
||||
if not self.config.enabled:
|
||||
|
||||
Reference in New Issue
Block a user