Private
Public Access
0
0

refactor(rag_engine,ai_client): rag_engine.search returns List[RAGChunk] directly

Phase 5: rag_engine.search() return type (FR4 row 7)
Before: def search(...) -> List[Dict[str, Any]] at src/rag_engine.py:367
After:  def search(...) -> List["RAGChunk"]
Delta:  -1 wrong type annotation (List[Dict] -> List[RAGChunk])

RAGChunk dataclass extended with `id: str = ""` field to preserve the
chroma wire-format identifier. The search() function now constructs
RAGChunk instances directly from chromadb query results, normalizing
the wire format (metadata.path -> RAGChunk.path; distance -> 1.0 - score)
at the boundary.

Consumer updates:
- src/ai_client.py:3259-3266: chunk["metadata"]["path"] -> chunk.path;
  chunk["document"] -> chunk.document (direct attribute access)
- src/app_controller.py:3506: docstring updated from Result[List[Dict]]
  to Result[List[RAGChunk]] (no code change; pass-through)

Test updates:
- tests/test_rag_engine.py:61: results[0]["id"] -> results[0].id
  (now uses dataclass attribute access)

Verification:
- audit_weak_types --strict: OK (107 <= 112 baseline)
- py_check_syntax: OK on rag_engine.py, ai_client.py, test_rag_engine.py
- 21 RAG tests pass (test_rag_engine, test_rag_chunk,
  test_rag_engine_ready_status_bug, test_rag_integration,
  test_context_composition_decoupled, test_tiered_aggregation)
This commit is contained in:
2026-06-26 04:54:02 -04:00
parent cfd881e719
commit 6399dcc4ed
3 changed files with 15 additions and 11 deletions
+2 -3
View File
@@ -3260,9 +3260,8 @@ def send(
if chunks:
context_block = "## Retrieved Context\n\n"
for i, chunk in enumerate(chunks):
chunk_meta = chunk["metadata"] if "metadata" in chunk else {}
path = chunk_meta["path"] if "path" in chunk_meta else "unknown"
doc = chunk["document"] if "document" in chunk else ""
path = chunk.path if chunk.path else "unknown"
doc = chunk.document
context_block += f"### Chunk {i+1} (Source: {path})\n{doc}\n\n"
user_message = context_block + user_message
+12 -7
View File
@@ -18,6 +18,7 @@ from src.file_cache import ASTParser
@dataclass(frozen=True)
class RAGChunk:
id: str = ""
document: str = ""
path: str = ""
score: float = 0.0
@@ -364,7 +365,7 @@ class RAGEngine:
return asyncio.run(_async_search_mcp())
def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
def search(self, query: str, top_k: int = 5) -> List["RAGChunk"]:
"""
[C: tests/mock_concurrent_mma.py:main, tests/test_rag_engine.py:test_rag_engine_chroma]
"""
@@ -381,12 +382,16 @@ class RAGEngine:
ret = []
if results and results["ids"] and results["ids"][0]:
for i in range(len(results["ids"][0])):
ret.append({
"id": results["ids"][0][i],
"document": results["documents"][0][i],
"metadata": results["metadatas"][0][i] if results["metadatas"] else {},
"distance": results["distances"][0][i] if "distances" in results and results["distances"] else 0.0
})
raw_meta = results["metadatas"][0][i] if results["metadatas"] else {}
distance = results["distances"][0][i] if "distances" in results and results["distances"] else 0.0
raw_path = raw_meta.get("path", "") if isinstance(raw_meta, dict) else ""
ret.append(RAGChunk(
id=results["ids"][0][i],
document=results["documents"][0][i],
path=raw_path,
score=1.0 - float(distance),
metadata=Metadata.from_dict(raw_meta) if isinstance(raw_meta, dict) else Metadata(),
))
return ret
def delete_documents(self, ids: List[str]):
+1 -1
View File
@@ -58,7 +58,7 @@ def test_rag_engine_chroma(mock_get_chroma, mock_embed):
results = engine.search("hello", top_k=1)
assert len(results) == 1
assert results[0]["id"] == "doc1"
assert results[0].id == "doc1"
engine.delete_documents(["doc1"])
mock_collection.delete.assert_called_once_with(ids=["doc1"])