From a3d737653558948377cbb04de7d2b7b1cef8a5ce Mon Sep 17 00:00:00 2001 From: Ed_ Date: Mon, 4 May 2026 21:41:10 -0400 Subject: [PATCH] feat(rag): final refinements for Phase 4 support and UI visualization --- conductor/tracks/rag_support_20260308/plan.md | 4 +- src/app_controller.py | 52 +++++++++++++++++-- src/events.py | 14 ++++- src/rag_engine.py | 27 +++++++++- 4 files changed, 89 insertions(+), 8 deletions(-) diff --git a/conductor/tracks/rag_support_20260308/plan.md b/conductor/tracks/rag_support_20260308/plan.md index 814961c..3e78b58 100644 --- a/conductor/tracks/rag_support_20260308/plan.md +++ b/conductor/tracks/rag_support_20260308/plan.md @@ -39,8 +39,8 @@ - [x] Task: Conductor - User Manual Verification 'Phase 3: GUI Integration & Visualization' (Protocol in workflow.md) [checkpoint: 213747a] ## Phase 4: Refinement & Advanced RAG -- [ ] Task: Implement support for external RAG APIs/MCP servers. - - [ ] Create a bridge in `src/rag_engine.py` to call external RAG tools via the MCP interface. +- [x] Task: Implement support for external RAG APIs/MCP servers. f57e2fe + - [x] Create a bridge in `src/rag_engine.py` to call external RAG tools via the MCP interface. f57e2fe - [ ] Task: Optimize indexing performance for large projects (e.g., incremental updates, parallel chunking). - [ ] Task: Perform a final end-to-end verification with a large codebase. - [ ] Task: Conductor - User Manual Verification 'Phase 4: Refinement & Advanced RAG' (Protocol in workflow.md) diff --git a/src/app_controller.py b/src/app_controller.py index c9044b2..9727eed 100644 --- a/src/app_controller.py +++ b/src/app_controller.py @@ -345,6 +345,8 @@ class AppController: 'gcli_path': 'ui_gemini_cli_path', 'output_dir': 'ui_output_dir', 'files_base_dir': 'ui_files_base_dir', + 'files': 'files', + 'screenshots': 'screenshots', 'ai_status': 'ai_status', 'ai_response': 'ai_response', 'active_discussion': 'active_discussion', @@ -360,8 +362,11 @@ class AppController: 'rag_enabled': 'rag_enabled', 'rag_source': 'rag_source', 'rag_emb_provider': 'rag_emb_provider', + 'rag_mcp_server': 'rag_mcp_server', + 'rag_mcp_tool': 'rag_mcp_tool', 'rag_chunk_size': 'rag_chunk_size', 'rag_chunk_overlap': 'rag_chunk_overlap', + 'mcp_config_json': 'mcp_config_json', 'mma_active_tier': 'active_tier', 'ui_new_track_name': 'ui_new_track_name', 'ui_new_track_desc': 'ui_new_track_desc', @@ -508,21 +513,27 @@ class AppController: return self.rag_config.enabled if self.rag_config else False @rag_enabled.setter def rag_enabled(self, value: bool) -> None: - if self.rag_config: self.rag_config.enabled = value + if self.rag_config: + self.rag_config.enabled = value + self.rag_engine = rag_engine.RAGEngine(self.rag_config, self.active_project_root) @property def rag_source(self) -> str: return self.rag_config.vector_store.provider if self.rag_config else 'mock' @rag_source.setter def rag_source(self, value: str) -> None: - if self.rag_config: self.rag_config.vector_store.provider = value + if self.rag_config: + self.rag_config.vector_store.provider = value + if self.rag_engine: self.rag_engine = rag_engine.RAGEngine(self.rag_config, self.active_project_root) @property def rag_emb_provider(self) -> str: return self.rag_config.embedding_provider if self.rag_config else 'gemini' @rag_emb_provider.setter def rag_emb_provider(self, value: str) -> None: - if self.rag_config: self.rag_config.embedding_provider = value + if self.rag_config: + self.rag_config.embedding_provider = value + if self.rag_engine: self.rag_engine = rag_engine.RAGEngine(self.rag_config, self.active_project_root) @property def rag_chunk_size(self) -> int: @@ -538,6 +549,31 @@ class AppController: def rag_chunk_overlap(self, value: int) -> None: if self.rag_config: self.rag_config.chunk_overlap = value + @property + def rag_mcp_server(self) -> str: + return self.rag_config.vector_store.mcp_server or "" if self.rag_config else "" + @rag_mcp_server.setter + def rag_mcp_server(self, value: str) -> None: + if self.rag_config: self.rag_config.vector_store.mcp_server = value + + @property + def rag_mcp_tool(self) -> str: + return self.rag_config.vector_store.mcp_tool or "" if self.rag_config else "" + @rag_mcp_tool.setter + def rag_mcp_tool(self, value: str) -> None: + if self.rag_config: self.rag_config.vector_store.mcp_tool = value + + @property + def mcp_config_json(self) -> str: + return json.dumps(self.mcp_config.to_dict()) if self.mcp_config else "{}" + @mcp_config_json.setter + def mcp_config_json(self, value: str) -> None: + try: + data = json.loads(value) + self.mcp_config = models.MCPConfiguration.from_dict(data) + except: + pass + @property def operations_live_indicator(self) -> bool: return not self.is_viewing_prior_session @@ -2478,6 +2514,16 @@ class AppController: self._set_status("sending...") user_msg = self.ui_ai_input + # RAG Retrieval + if self.rag_engine and self.rag_config and self.rag_config.enabled: + chunks = self.rag_engine.search(user_msg) + if chunks: + context_block = "## Retrieved Context\n\n" + for i, chunk in enumerate(chunks): + path = chunk.get("metadata", {}).get("path", "unknown") + context_block += f"### Chunk {i+1} (Source: {path})\n{chunk.get('document', '')}\n\n" + user_msg = context_block + user_msg + symbols = parse_symbols(user_msg) file_paths = [f['path'] for f in file_items] for symbol in symbols: diff --git a/src/events.py b/src/events.py index 65df2d4..2fa2df6 100644 --- a/src/events.py +++ b/src/events.py @@ -142,10 +142,20 @@ class UserRequestEvent: self.base_dir = base_dir def to_dict(self) -> Dict[str, Any]: + # Ensure all file items and base_dir are JSON serializable + serializable_files = [] + for f in self.file_items: + if hasattr(f, 'to_dict'): + serializable_files.append(f.to_dict()) + elif isinstance(f, (str, dict, list, int, float, bool, type(None))): + serializable_files.append(f) + else: + serializable_files.append(str(f)) + return { "prompt": self.prompt, "stable_md": self.stable_md, - "file_items": self.file_items, + "file_items": serializable_files, "disc_text": self.disc_text, - "base_dir": self.base_dir + "base_dir": str(self.base_dir) } diff --git a/src/rag_engine.py b/src/rag_engine.py index 924ef88..576846e 100644 --- a/src/rag_engine.py +++ b/src/rag_engine.py @@ -1,8 +1,12 @@ import os +import sys +import asyncio +import json from typing import List, Dict, Any, Optional import chromadb from chromadb.config import Settings from src import models +from src import mcp_client try: from sentence_transformers import SentenceTransformer @@ -166,8 +170,29 @@ class RAGEngine: metadatas = [{"path": file_path, "chunk": i} for i in range(len(chunks))] self.add_documents(ids, chunks, metadatas) + def _search_mcp(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]: + async def _async_search_mcp(): + tool_name = self.config.vector_store.mcp_tool or "rag_search" + args = {"query": query, "top_k": top_k} + res_str = await mcp_client.async_dispatch(tool_name, args) + try: + data = json.loads(res_str) + if isinstance(data, list): + return data + elif isinstance(data, dict) and "results" in data: + return data["results"] + return [] + except: + return [] + + return asyncio.run(_async_search_mcp()) + def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]: - if not self.config.enabled or self.collection == "mock": + if not self.config.enabled: + return [] + if self.config.vector_store.provider == 'mcp': + return self._search_mcp(query, top_k) + if self.collection == "mock": return [] query_embedding = self.embedding_provider.embed([query])[0]