refactor(rag_engine,ai_client): rag_engine.search returns List[RAGChunk] directly
Phase 5: rag_engine.search() return type (FR4 row 7) Before: def search(...) -> List[Dict[str, Any]] at src/rag_engine.py:367 After: def search(...) -> List["RAGChunk"] Delta: -1 wrong type annotation (List[Dict] -> List[RAGChunk]) RAGChunk dataclass extended with `id: str = ""` field to preserve the chroma wire-format identifier. The search() function now constructs RAGChunk instances directly from chromadb query results, normalizing the wire format (metadata.path -> RAGChunk.path; distance -> 1.0 - score) at the boundary. Consumer updates: - src/ai_client.py:3259-3266: chunk["metadata"]["path"] -> chunk.path; chunk["document"] -> chunk.document (direct attribute access) - src/app_controller.py:3506: docstring updated from Result[List[Dict]] to Result[List[RAGChunk]] (no code change; pass-through) Test updates: - tests/test_rag_engine.py:61: results[0]["id"] -> results[0].id (now uses dataclass attribute access) Verification: - audit_weak_types --strict: OK (107 <= 112 baseline) - py_check_syntax: OK on rag_engine.py, ai_client.py, test_rag_engine.py - 21 RAG tests pass (test_rag_engine, test_rag_chunk, test_rag_engine_ready_status_bug, test_rag_integration, test_context_composition_decoupled, test_tiered_aggregation)
This commit is contained in:
+2
-3
@@ -3260,9 +3260,8 @@ def send(
|
||||
if chunks:
|
||||
context_block = "## Retrieved Context\n\n"
|
||||
for i, chunk in enumerate(chunks):
|
||||
chunk_meta = chunk["metadata"] if "metadata" in chunk else {}
|
||||
path = chunk_meta["path"] if "path" in chunk_meta else "unknown"
|
||||
doc = chunk["document"] if "document" in chunk else ""
|
||||
path = chunk.path if chunk.path else "unknown"
|
||||
doc = chunk.document
|
||||
context_block += f"### Chunk {i+1} (Source: {path})\n{doc}\n\n"
|
||||
user_message = context_block + user_message
|
||||
|
||||
|
||||
+12
-7
@@ -18,6 +18,7 @@ from src.file_cache import ASTParser
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RAGChunk:
|
||||
id: str = ""
|
||||
document: str = ""
|
||||
path: str = ""
|
||||
score: float = 0.0
|
||||
@@ -364,7 +365,7 @@ class RAGEngine:
|
||||
|
||||
return asyncio.run(_async_search_mcp())
|
||||
|
||||
def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
|
||||
def search(self, query: str, top_k: int = 5) -> List["RAGChunk"]:
|
||||
"""
|
||||
[C: tests/mock_concurrent_mma.py:main, tests/test_rag_engine.py:test_rag_engine_chroma]
|
||||
"""
|
||||
@@ -381,12 +382,16 @@ class RAGEngine:
|
||||
ret = []
|
||||
if results and results["ids"] and results["ids"][0]:
|
||||
for i in range(len(results["ids"][0])):
|
||||
ret.append({
|
||||
"id": results["ids"][0][i],
|
||||
"document": results["documents"][0][i],
|
||||
"metadata": results["metadatas"][0][i] if results["metadatas"] else {},
|
||||
"distance": results["distances"][0][i] if "distances" in results and results["distances"] else 0.0
|
||||
})
|
||||
raw_meta = results["metadatas"][0][i] if results["metadatas"] else {}
|
||||
distance = results["distances"][0][i] if "distances" in results and results["distances"] else 0.0
|
||||
raw_path = raw_meta.get("path", "") if isinstance(raw_meta, dict) else ""
|
||||
ret.append(RAGChunk(
|
||||
id=results["ids"][0][i],
|
||||
document=results["documents"][0][i],
|
||||
path=raw_path,
|
||||
score=1.0 - float(distance),
|
||||
metadata=Metadata.from_dict(raw_meta) if isinstance(raw_meta, dict) else Metadata(),
|
||||
))
|
||||
return ret
|
||||
|
||||
def delete_documents(self, ids: List[str]):
|
||||
|
||||
@@ -58,7 +58,7 @@ def test_rag_engine_chroma(mock_get_chroma, mock_embed):
|
||||
|
||||
results = engine.search("hello", top_k=1)
|
||||
assert len(results) == 1
|
||||
assert results[0]["id"] == "doc1"
|
||||
assert results[0].id == "doc1"
|
||||
engine.delete_documents(["doc1"])
|
||||
mock_collection.delete.assert_called_once_with(ids=["doc1"])
|
||||
|
||||
|
||||
Reference in New Issue
Block a user