feat(rag): final refinements for Phase 4 support and UI visualization
This commit is contained in:
@@ -39,8 +39,8 @@
|
|||||||
- [x] Task: Conductor - User Manual Verification 'Phase 3: GUI Integration & Visualization' (Protocol in workflow.md) [checkpoint: 213747a]
|
- [x] Task: Conductor - User Manual Verification 'Phase 3: GUI Integration & Visualization' (Protocol in workflow.md) [checkpoint: 213747a]
|
||||||
|
|
||||||
## Phase 4: Refinement & Advanced RAG
|
## Phase 4: Refinement & Advanced RAG
|
||||||
- [ ] Task: Implement support for external RAG APIs/MCP servers.
|
- [x] Task: Implement support for external RAG APIs/MCP servers. f57e2fe
|
||||||
- [ ] Create a bridge in `src/rag_engine.py` to call external RAG tools via the MCP interface.
|
- [x] Create a bridge in `src/rag_engine.py` to call external RAG tools via the MCP interface. f57e2fe
|
||||||
- [ ] Task: Optimize indexing performance for large projects (e.g., incremental updates, parallel chunking).
|
- [ ] Task: Optimize indexing performance for large projects (e.g., incremental updates, parallel chunking).
|
||||||
- [ ] Task: Perform a final end-to-end verification with a large codebase.
|
- [ ] Task: Perform a final end-to-end verification with a large codebase.
|
||||||
- [ ] Task: Conductor - User Manual Verification 'Phase 4: Refinement & Advanced RAG' (Protocol in workflow.md)
|
- [ ] Task: Conductor - User Manual Verification 'Phase 4: Refinement & Advanced RAG' (Protocol in workflow.md)
|
||||||
|
|||||||
+49
-3
@@ -345,6 +345,8 @@ class AppController:
|
|||||||
'gcli_path': 'ui_gemini_cli_path',
|
'gcli_path': 'ui_gemini_cli_path',
|
||||||
'output_dir': 'ui_output_dir',
|
'output_dir': 'ui_output_dir',
|
||||||
'files_base_dir': 'ui_files_base_dir',
|
'files_base_dir': 'ui_files_base_dir',
|
||||||
|
'files': 'files',
|
||||||
|
'screenshots': 'screenshots',
|
||||||
'ai_status': 'ai_status',
|
'ai_status': 'ai_status',
|
||||||
'ai_response': 'ai_response',
|
'ai_response': 'ai_response',
|
||||||
'active_discussion': 'active_discussion',
|
'active_discussion': 'active_discussion',
|
||||||
@@ -360,8 +362,11 @@ class AppController:
|
|||||||
'rag_enabled': 'rag_enabled',
|
'rag_enabled': 'rag_enabled',
|
||||||
'rag_source': 'rag_source',
|
'rag_source': 'rag_source',
|
||||||
'rag_emb_provider': 'rag_emb_provider',
|
'rag_emb_provider': 'rag_emb_provider',
|
||||||
|
'rag_mcp_server': 'rag_mcp_server',
|
||||||
|
'rag_mcp_tool': 'rag_mcp_tool',
|
||||||
'rag_chunk_size': 'rag_chunk_size',
|
'rag_chunk_size': 'rag_chunk_size',
|
||||||
'rag_chunk_overlap': 'rag_chunk_overlap',
|
'rag_chunk_overlap': 'rag_chunk_overlap',
|
||||||
|
'mcp_config_json': 'mcp_config_json',
|
||||||
'mma_active_tier': 'active_tier',
|
'mma_active_tier': 'active_tier',
|
||||||
'ui_new_track_name': 'ui_new_track_name',
|
'ui_new_track_name': 'ui_new_track_name',
|
||||||
'ui_new_track_desc': 'ui_new_track_desc',
|
'ui_new_track_desc': 'ui_new_track_desc',
|
||||||
@@ -508,21 +513,27 @@ class AppController:
|
|||||||
return self.rag_config.enabled if self.rag_config else False
|
return self.rag_config.enabled if self.rag_config else False
|
||||||
@rag_enabled.setter
|
@rag_enabled.setter
|
||||||
def rag_enabled(self, value: bool) -> None:
|
def rag_enabled(self, value: bool) -> None:
|
||||||
if self.rag_config: self.rag_config.enabled = value
|
if self.rag_config:
|
||||||
|
self.rag_config.enabled = value
|
||||||
|
self.rag_engine = rag_engine.RAGEngine(self.rag_config, self.active_project_root)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def rag_source(self) -> str:
|
def rag_source(self) -> str:
|
||||||
return self.rag_config.vector_store.provider if self.rag_config else 'mock'
|
return self.rag_config.vector_store.provider if self.rag_config else 'mock'
|
||||||
@rag_source.setter
|
@rag_source.setter
|
||||||
def rag_source(self, value: str) -> None:
|
def rag_source(self, value: str) -> None:
|
||||||
if self.rag_config: self.rag_config.vector_store.provider = value
|
if self.rag_config:
|
||||||
|
self.rag_config.vector_store.provider = value
|
||||||
|
if self.rag_engine: self.rag_engine = rag_engine.RAGEngine(self.rag_config, self.active_project_root)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def rag_emb_provider(self) -> str:
|
def rag_emb_provider(self) -> str:
|
||||||
return self.rag_config.embedding_provider if self.rag_config else 'gemini'
|
return self.rag_config.embedding_provider if self.rag_config else 'gemini'
|
||||||
@rag_emb_provider.setter
|
@rag_emb_provider.setter
|
||||||
def rag_emb_provider(self, value: str) -> None:
|
def rag_emb_provider(self, value: str) -> None:
|
||||||
if self.rag_config: self.rag_config.embedding_provider = value
|
if self.rag_config:
|
||||||
|
self.rag_config.embedding_provider = value
|
||||||
|
if self.rag_engine: self.rag_engine = rag_engine.RAGEngine(self.rag_config, self.active_project_root)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def rag_chunk_size(self) -> int:
|
def rag_chunk_size(self) -> int:
|
||||||
@@ -538,6 +549,31 @@ class AppController:
|
|||||||
def rag_chunk_overlap(self, value: int) -> None:
|
def rag_chunk_overlap(self, value: int) -> None:
|
||||||
if self.rag_config: self.rag_config.chunk_overlap = value
|
if self.rag_config: self.rag_config.chunk_overlap = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def rag_mcp_server(self) -> str:
|
||||||
|
return self.rag_config.vector_store.mcp_server or "" if self.rag_config else ""
|
||||||
|
@rag_mcp_server.setter
|
||||||
|
def rag_mcp_server(self, value: str) -> None:
|
||||||
|
if self.rag_config: self.rag_config.vector_store.mcp_server = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def rag_mcp_tool(self) -> str:
|
||||||
|
return self.rag_config.vector_store.mcp_tool or "" if self.rag_config else ""
|
||||||
|
@rag_mcp_tool.setter
|
||||||
|
def rag_mcp_tool(self, value: str) -> None:
|
||||||
|
if self.rag_config: self.rag_config.vector_store.mcp_tool = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def mcp_config_json(self) -> str:
|
||||||
|
return json.dumps(self.mcp_config.to_dict()) if self.mcp_config else "{}"
|
||||||
|
@mcp_config_json.setter
|
||||||
|
def mcp_config_json(self, value: str) -> None:
|
||||||
|
try:
|
||||||
|
data = json.loads(value)
|
||||||
|
self.mcp_config = models.MCPConfiguration.from_dict(data)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def operations_live_indicator(self) -> bool:
|
def operations_live_indicator(self) -> bool:
|
||||||
return not self.is_viewing_prior_session
|
return not self.is_viewing_prior_session
|
||||||
@@ -2478,6 +2514,16 @@ class AppController:
|
|||||||
self._set_status("sending...")
|
self._set_status("sending...")
|
||||||
user_msg = self.ui_ai_input
|
user_msg = self.ui_ai_input
|
||||||
|
|
||||||
|
# RAG Retrieval
|
||||||
|
if self.rag_engine and self.rag_config and self.rag_config.enabled:
|
||||||
|
chunks = self.rag_engine.search(user_msg)
|
||||||
|
if chunks:
|
||||||
|
context_block = "## Retrieved Context\n\n"
|
||||||
|
for i, chunk in enumerate(chunks):
|
||||||
|
path = chunk.get("metadata", {}).get("path", "unknown")
|
||||||
|
context_block += f"### Chunk {i+1} (Source: {path})\n{chunk.get('document', '')}\n\n"
|
||||||
|
user_msg = context_block + user_msg
|
||||||
|
|
||||||
symbols = parse_symbols(user_msg)
|
symbols = parse_symbols(user_msg)
|
||||||
file_paths = [f['path'] for f in file_items]
|
file_paths = [f['path'] for f in file_items]
|
||||||
for symbol in symbols:
|
for symbol in symbols:
|
||||||
|
|||||||
+12
-2
@@ -142,10 +142,20 @@ class UserRequestEvent:
|
|||||||
self.base_dir = base_dir
|
self.base_dir = base_dir
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
# Ensure all file items and base_dir are JSON serializable
|
||||||
|
serializable_files = []
|
||||||
|
for f in self.file_items:
|
||||||
|
if hasattr(f, 'to_dict'):
|
||||||
|
serializable_files.append(f.to_dict())
|
||||||
|
elif isinstance(f, (str, dict, list, int, float, bool, type(None))):
|
||||||
|
serializable_files.append(f)
|
||||||
|
else:
|
||||||
|
serializable_files.append(str(f))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"prompt": self.prompt,
|
"prompt": self.prompt,
|
||||||
"stable_md": self.stable_md,
|
"stable_md": self.stable_md,
|
||||||
"file_items": self.file_items,
|
"file_items": serializable_files,
|
||||||
"disc_text": self.disc_text,
|
"disc_text": self.disc_text,
|
||||||
"base_dir": self.base_dir
|
"base_dir": str(self.base_dir)
|
||||||
}
|
}
|
||||||
|
|||||||
+26
-1
@@ -1,8 +1,12 @@
|
|||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
import chromadb
|
import chromadb
|
||||||
from chromadb.config import Settings
|
from chromadb.config import Settings
|
||||||
from src import models
|
from src import models
|
||||||
|
from src import mcp_client
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from sentence_transformers import SentenceTransformer
|
from sentence_transformers import SentenceTransformer
|
||||||
@@ -166,8 +170,29 @@ class RAGEngine:
|
|||||||
metadatas = [{"path": file_path, "chunk": i} for i in range(len(chunks))]
|
metadatas = [{"path": file_path, "chunk": i} for i in range(len(chunks))]
|
||||||
self.add_documents(ids, chunks, metadatas)
|
self.add_documents(ids, chunks, metadatas)
|
||||||
|
|
||||||
|
def _search_mcp(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
|
||||||
|
async def _async_search_mcp():
|
||||||
|
tool_name = self.config.vector_store.mcp_tool or "rag_search"
|
||||||
|
args = {"query": query, "top_k": top_k}
|
||||||
|
res_str = await mcp_client.async_dispatch(tool_name, args)
|
||||||
|
try:
|
||||||
|
data = json.loads(res_str)
|
||||||
|
if isinstance(data, list):
|
||||||
|
return data
|
||||||
|
elif isinstance(data, dict) and "results" in data:
|
||||||
|
return data["results"]
|
||||||
|
return []
|
||||||
|
except:
|
||||||
|
return []
|
||||||
|
|
||||||
|
return asyncio.run(_async_search_mcp())
|
||||||
|
|
||||||
def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
|
def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
|
||||||
if not self.config.enabled or self.collection == "mock":
|
if not self.config.enabled:
|
||||||
|
return []
|
||||||
|
if self.config.vector_store.provider == 'mcp':
|
||||||
|
return self._search_mcp(query, top_k)
|
||||||
|
if self.collection == "mock":
|
||||||
return []
|
return []
|
||||||
|
|
||||||
query_embedding = self.embedding_provider.embed([query])[0]
|
query_embedding = self.embedding_provider.embed([query])[0]
|
||||||
|
|||||||
Reference in New Issue
Block a user