feat(rag): Implement indexing and retrieval logic with AppController integration

This commit is contained in:
2026-05-04 06:53:32 -04:00
parent 337c21ad1f
commit fe0069c046
4 changed files with 237 additions and 3 deletions
+66
View File
@@ -91,6 +91,72 @@ class RAGEngine:
metadatas=metadatas
)
def _chunk_text(self, content: str) -> List[str]:
"""Character-based chunking with overlap."""
chunks = []
if not content:
return chunks
chunk_size = self.config.chunk_size
overlap = self.config.chunk_overlap
start = 0
while start < len(content):
end = start + chunk_size
chunks.append(content[start:end])
if end >= len(content):
break
start += (chunk_size - overlap)
return chunks
def _chunk_code(self, content: str, file_path: str) -> List[str]:
"""AST-aware chunking for Python code."""
try:
from src.file_cache import ASTParser
parser = ASTParser("python")
tree = parser.parse(content)
chunks = []
# Capture classes and top-level functions
for node in tree.root_node.children:
if node.type in ("function_definition", "class_definition"):
chunks.append(content[node.start_byte:node.end_byte])
# Fallback if no structural chunks found or if file is small
if not chunks or len(content) < self.config.chunk_size:
return self._chunk_text(content)
return chunks
except Exception:
return self._chunk_text(content)
def index_file(self, file_path: str):
"""Reads, chunks, and indexes a file into the vector store."""
if not self.config.enabled or self.collection == "mock":
return
full_path = os.path.join(self.base_dir, file_path)
if not os.path.exists(full_path):
return
try:
with open(full_path, "r", encoding="utf-8", errors="ignore") as f:
content = f.read()
except Exception:
return
# Remove old entries for this file
self.collection.delete(where={"path": file_path})
if file_path.lower().endswith(".py"):
chunks = self._chunk_code(content, file_path)
else:
chunks = self._chunk_text(content)
if not chunks:
return
ids = [f"{file_path}_{i}" for i in range(len(chunks))]
metadatas = [{"path": file_path, "chunk": i} for i in range(len(chunks))]
self.add_documents(ids, chunks, metadatas)
def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
if not self.config.enabled or self.collection == "mock":
return []