From 964f66508225df2e6c00f8c27af1008befe6a673 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Mon, 4 May 2026 21:52:39 -0400 Subject: [PATCH] test(rag): add Phase 4 stress test for large codebase verification --- conductor/tracks/rag_support_20260308/plan.md | 2 +- tests/mock_gcli.bat | 2 + tests/test_rag_phase4_stress.py | 139 ++++++++++++++++++ 3 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 tests/mock_gcli.bat create mode 100644 tests/test_rag_phase4_stress.py diff --git a/conductor/tracks/rag_support_20260308/plan.md b/conductor/tracks/rag_support_20260308/plan.md index 03ce520..570bb04 100644 --- a/conductor/tracks/rag_support_20260308/plan.md +++ b/conductor/tracks/rag_support_20260308/plan.md @@ -42,5 +42,5 @@ - [x] Task: Implement support for external RAG APIs/MCP servers. f57e2fe - [x] Create a bridge in `src/rag_engine.py` to call external RAG tools via the MCP interface. f57e2fe - [x] Task: Optimize indexing performance for large projects (e.g., incremental updates, parallel chunking). f57e2fe -- [ ] Task: Perform a final end-to-end verification with a large codebase. +- [x] Task: Perform a final end-to-end verification with a large codebase. f57e2fe - [ ] Task: Conductor - User Manual Verification 'Phase 4: Refinement & Advanced RAG' (Protocol in workflow.md) diff --git a/tests/mock_gcli.bat b/tests/mock_gcli.bat new file mode 100644 index 0000000..5cbc17b --- /dev/null +++ b/tests/mock_gcli.bat @@ -0,0 +1,2 @@ +@echo off +python %~dp0mock_gemini_cli.py %* diff --git a/tests/test_rag_phase4_stress.py b/tests/test_rag_phase4_stress.py new file mode 100644 index 0000000..c4c0943 --- /dev/null +++ b/tests/test_rag_phase4_stress.py @@ -0,0 +1,139 @@ +import pytest +import time +import sys +import os +import shutil +import tempfile +from pathlib import Path + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src"))) + +from src import api_hook_client + +@pytest.mark.integration +def test_rag_large_codebase_verification_sim(live_gui): + client = api_hook_client.ApiHookClient() + assert client.wait_for_server(timeout=15), "Hook server did not start" + + # 1. Setup mock large project data + # Create 50 dummy files to test parallel indexing and incrementality + workspace_dir = Path("tests/artifacts/live_gui_workspace") + workspace_dir.mkdir(parents=True, exist_ok=True) + + file_names = [f"file_{i}.txt" for i in range(50)] + for name in file_names: + (workspace_dir / name).write_text(f"This is the content of {name}. It contains unique tokens to verify RAG.") + + try: + # 2. Configure project through Hook API + client.set_value('files', file_names) + client.set_value('rag_enabled', True) + client.set_value('rag_source', 'chroma') + client.set_value('rag_emb_provider', 'local') + + # 3. Trigger Initial Indexing + print("[SIM] Triggering initial indexing of 50 files...") + start = time.time() + client.click('btn_rebuild_rag_index') + + # Wait for ready + success = False + for _ in range(100): + status = client.get_value('rag_status') + if status == 'ready': + success = True + break + time.sleep(0.5) + + duration_initial = time.time() - start + assert success, f"Initial indexing timed out. Final status: {status}" + print(f"[SIM] Initial indexing took {duration_initial:.2f}s") + + # 4. Trigger Incremental Indexing (no changes) + print("[SIM] Triggering incremental indexing (no changes)...") + start = time.time() + client.click('btn_rebuild_rag_index') + + success = False + for _ in range(50): + status = client.get_value('rag_status') + if status == 'ready': + success = True + break + time.sleep(0.2) + + duration_incremental = time.time() - start + assert success, "Incremental indexing timed out" + print(f"[SIM] Incremental indexing took {duration_incremental:.2f}s") + # Incremental should be significantly faster + assert duration_incremental < duration_initial, f"Incremental ({duration_incremental:.2f}s) not faster than initial ({duration_initial:.2f}s)" + + # 5. Modify one file and re-index + print("[SIM] Modifying one file and re-indexing...") + (workspace_dir / "file_25.txt").write_text("MODIFIED CONTENT FOR FILE 25. SEARCH FOR THIS.") + + client.click('btn_rebuild_rag_index') + time.sleep(2) # Wait for it to finish + + # 6. Verify retrieval of modified content + client.set_value('current_provider', 'gemini_cli') + client.set_value('gcli_path', os.path.abspath(os.path.join(os.path.dirname(__file__), "mock_gcli.bat"))) + client.set_value('auto_add_history', True) + client.set_value('ai_input', "What is the modified content?") + client.click('btn_gen_send') + + # Wait for completion + success = False + for _ in range(50): + state = client.get_gui_state() + if state.get('ai_status') == 'done': + success = True + break + time.sleep(0.5) + + assert success, "AI request timed out" + + # Verify retrieved context in discussion + session = client.get_session() + entries = session.get('session', {}).get('entries', []) + + found_mod = False + for entry in entries: + if entry.get('role') == 'User' and 'MODIFIED CONTENT FOR FILE 25' in entry.get('content', ''): + found_mod = True + break + + assert found_mod, "Modified context not found in discussion" + print("[SIM] Modified content retrieval SUCCESS.") + + # 7. Delete files and verify cleanup + print("[SIM] Deleting files and verifying cleanup...") + files_to_keep = file_names[:10] + client.set_value('files', files_to_keep) + client.click('btn_rebuild_rag_index') + time.sleep(2) + + # How to verify cleanup? RAGEngine.get_all_indexed_paths is not exposed yet. + # But we can verify by searching for a deleted file's content. + client.set_value('ai_input', "What is in file_49.txt?") + client.click('btn_gen_send') + time.sleep(5) + + session = client.get_session() + entries = session.get('session', {}).get('entries', []) + + # Last User message should NOT contain context from file_49 + last_user = next(e for e in reversed(entries) if e.get('role') == 'User') + content = last_user.get('content', '') + + # Check if "Source: file_49.txt" exists in the context block + # (which is prepended to the user input) + has_stale_context = "Source: file_49.txt" in content + assert not has_stale_context, f"Cleanup failed: stale file context found in: {content[:100]}..." + + print("[SIM] Large codebase verification PASSED.") + + except Exception as e: + print(f"[SIM] Error in stress test: {e}") + raise