From 9e4fac496dd3fe18a70aa4a6fa9e75af8e5806e0 Mon Sep 17 00:00:00 2001 From: razor950 Date: Sat, 6 Jun 2026 13:21:28 -0400 Subject: [PATCH] made local rag needs optional (prevents having to have torch / sentence-transformers if you never use local embedding) --- conductor/tech-stack.md | 3 ++- pyproject.toml | 4 ++++ src/rag_engine.py | 23 +++++++++-------------- tests/test_rag_engine.py | 9 ++++++++- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/conductor/tech-stack.md b/conductor/tech-stack.md index a241231c..7bf584bb 100644 --- a/conductor/tech-stack.md +++ b/conductor/tech-stack.md @@ -28,6 +28,7 @@ - **DeepSeek-V3:** Tier 3 Worker model optimized for code implementation. - **DeepSeek-R1:** Specialized reasoning model for complex logical chains and "thinking" traces. - **Gemini Embedding 001:** Default embedding model for RAG vector store. +- **sentence-transformers:** Optional `local-rag` extra for fully local RAG embeddings. Not part of the default install because it pulls in PyTorch. ## Configuration & Tooling @@ -57,7 +58,7 @@ - **`/api/ask` Protocol:** Non-blocking, ID-based challenge/response for synchronous HITL approvals from external contexts. - **`_predefined_callbacks` and `_gettable_fields`:** AppController-owned registries that the Hook API consumes to expose any App method as a `custom_callback` action. -- **src/rag_engine.py:** Core RAG implementation managing the vector store lifecycle, chunking strategies (character-based and AST-aware), and multi-provider search. Integrates with **ChromaDB** for local persistence and provides a bridge for external MCP retrieval tools. +- **src/rag_engine.py:** Core RAG implementation managing the vector store lifecycle, chunking strategies (character-based and AST-aware), and multi-provider search. Integrates with **ChromaDB** for local persistence, uses external embeddings by default, and provides an optional local embedding path via `manual_slop[local-rag]`. - **src/beads_client.py:** Python client for interacting with the [Beads](https://github.com/steveyegge/beads) / Dolt backend. Handles repository initialization, bead creation, status updates, and graph queries. diff --git a/pyproject.toml b/pyproject.toml index b5ffafb6..52d933c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,10 @@ dependencies = [ "openai", "chromadb>=1.5.8", +] + +[project.optional-dependencies] +local-rag = [ "sentence-transformers>=5.4.1", ] diff --git a/src/rag_engine.py b/src/rag_engine.py index 3287f1bd..08c01866 100644 --- a/src/rag_engine.py +++ b/src/rag_engine.py @@ -16,6 +16,7 @@ from src.file_cache import ASTParser _SENTENCE_TRANSFORMERS = None _GOOGLE_GENAI = None _CHROMADB = None +LOCAL_RAG_INSTALL_HINT = "Local RAG embeddings require sentence-transformers. Install with manual_slop[local-rag] to use local embeddings." def _get_sentence_transformers(): @@ -24,6 +25,10 @@ def _get_sentence_transformers(): try: from sentence_transformers import SentenceTransformer _SENTENCE_TRANSFORMERS = SentenceTransformer + except ModuleNotFoundError as e: + if e.name == "sentence_transformers": + raise ImportError(LOCAL_RAG_INSTALL_HINT) from e + raise except Exception as e: sys.stderr.write(f"FAILED to import sentence_transformers: {e}\n") sys.stderr.flush() @@ -52,22 +57,12 @@ class BaseEmbeddingProvider: class LocalEmbeddingProvider(BaseEmbeddingProvider): def __init__(self, model_name: str = 'all-MiniLM-L6-v2'): - self.model = None - try: - ST = _get_sentence_transformers() - if ST: - self.model = ST(model_name) - except Exception as e: - sys.stderr.write(f"LocalEmbeddingProvider failed to load model {model_name}: {e}. Using dummy embeddings.\n") - sys.stderr.flush() + ST = _get_sentence_transformers() + self.model = ST(model_name) def embed(self, texts: List[str]) -> List[List[float]]: - if self.model: - embeddings = self.model.encode(texts) - return embeddings.tolist() - else: - # Dummy embeddings (384 dims for all-MiniLM-L6-v2) - return [[0.0] * 384 for _ in texts] + embeddings = self.model.encode(texts) + return embeddings.tolist() class GeminiEmbeddingProvider(BaseEmbeddingProvider): def __init__(self, model_name: str = 'gemini-embedding-001'): diff --git a/tests/test_rag_engine.py b/tests/test_rag_engine.py index c9d50133..370d20d5 100644 --- a/tests/test_rag_engine.py +++ b/tests/test_rag_engine.py @@ -2,6 +2,7 @@ import pytest import os from unittest.mock import MagicMock, patch from src import models +from src import rag_engine from src.rag_engine import RAGEngine, BaseEmbeddingProvider, LocalEmbeddingProvider, GeminiEmbeddingProvider class MockEmbeddingProvider(BaseEmbeddingProvider): @@ -11,13 +12,19 @@ class MockEmbeddingProvider(BaseEmbeddingProvider): @pytest.fixture def mock_rag_config(): vs_config = models.VectorStoreConfig(provider='mock', collection_name='test') - return models.RAGConfig(enabled=True, vector_store=vs_config, embedding_provider='local') + return models.RAGConfig(enabled=True, vector_store=vs_config, embedding_provider='gemini') def test_rag_engine_init_mock(mock_rag_config): engine = RAGEngine(mock_rag_config) assert engine.config.enabled is True assert engine.collection == "mock" +def test_local_embedding_provider_missing_dependency_has_install_hint(): + with patch.object(rag_engine, "_SENTENCE_TRANSFORMERS", None): + with patch.dict("sys.modules", {"sentence_transformers": None}): + with pytest.raises(ImportError, match=r"manual_slop\[local-rag\]"): + LocalEmbeddingProvider() + @patch('src.rag_engine.LocalEmbeddingProvider.embed') @patch('src.rag_engine._get_chromadb') def test_rag_engine_chroma(mock_get_chroma, mock_embed):