import pytest import time import sys import os import json import shutil from pathlib import Path sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src"))) from src import api_hook_client @pytest.mark.integration @pytest.mark.clean_baseline def test_phase4_final_verify(live_gui, live_gui_workspace): client = api_hook_client.ApiHookClient() assert client.wait_for_server(timeout=15), "Hook server did not start" # Clean the chroma cache BEFORE the test starts. In batched live_gui # context, the live_gui subprocess is shared across many tests, and # prior tests leave chroma state at the controller's project root # (e.g. C:\projects\manual_slop\tests\artifacts\.slop_cache\chroma_test_*). # The dim-mismatch rmtree in rag_engine._validate_collection_dim # fails on Windows with WinError 32 (file in use), leaving a stale # locked collection that PersistentClient can't open. Wipe the # relevant cache dirs proactively so the test starts clean. _workspace_root = str(live_gui_workspace.parent if live_gui_workspace else Path.cwd()) stale_path = Path(_workspace_root) / ".slop_cache" if stale_path.exists(): for col_dir in stale_path.iterdir(): if col_dir.is_dir() and col_dir.name.startswith("chroma_"): try: shutil.rmtree(col_dir) except Exception: pass # 1. Setup mock project data workspace_dir = live_gui_workspace workspace_dir.mkdir(parents=True, exist_ok=True) # Create dummy files (workspace_dir / "final_test_1.txt").write_text("Manual Slop RAG is great.") (workspace_dir / "final_test_2.py").write_text("def test_func():\n return 'Manual Slop RAG result'") try: # 2. Configure project through Hook API client.set_value('rag_collection_name', 'test_final_verify') client.set_value('files', ['final_test_1.txt', 'final_test_2.py']) client.set_value('rag_enabled', True) client.set_value('rag_source', 'chroma') client.set_value('rag_emb_provider', 'local') client.set_value('auto_add_history', True) client.set_value('current_provider', 'gemini_cli') client.set_value('gcli_path', os.path.abspath(os.path.join(os.path.dirname(__file__), "mock_gcli.bat"))) time.sleep(1.5) # Wait for settings to apply and engine to sync success = False for _ in range(100): if client.get_value('rag_emb_provider') == 'local' and client.get_value('rag_status') == 'ready': success = True break time.sleep(0.5) assert success, f"RAG sync failed. Status: {client.get_value('rag_status')}" # 3. Trigger Initial Indexing print("[VERIFY] Triggering indexing...") client.click('btn_rebuild_rag_index') # Wait for ready success = False for _ in range(50): status = client.get_value('rag_status') if status == 'ready': success = True break time.sleep(0.5) assert success, f"Indexing failed. Status: {status}" # 4. Verify Retrieval and Visualization print("[VERIFY] Triggering retrieval turn...") client.set_value('ai_input', "What makes RAG great?") client.click('btn_gen_send') # Wait for completion success = False print("[VERIFY] Polling for completion...") for i in range(100): state = client.get_gui_state() status = state.get('ai_status') if i % 10 == 0: print(f"[VERIFY] Poll {i}, status: {status}") if status == 'done': success = True break if status and "error" in status.lower(): print(f"[VERIFY] Error detected: {status}") break time.sleep(0.5) if not success: print(f"[VERIFY] Timeout! Final status: {status}") assert success, f"AI request timed out or failed. Status: {status}" # 5. Verify discussion history has the context. After 'done' fires, # poll entries separately because the User entry with RAG context # injection may take an additional render frame to land in history # (race condition exposed in batched live_gui context). found_rag = False for j in range(20): session = client.get_session() entries = session.get('session', {}).get('entries', []) for entry in entries: if entry.get('role') == 'User' and '## Retrieved Context' in entry.get('content', ''): found_rag = True content = entry.get('content', '') print(f"[VERIFY] Found RAG context (poll {j}): {content[:100]}...") # Accept either file's content as proof RAG retrieved something. # The original test asserted only the .txt content, but the .py file # ("Manual Slop RAG result") can rank first in batched context # depending on prior chroma state. Either file's content proves # RAG retrieval worked. assert ("Manual Slop RAG is great" in content or "Manual Slop RAG result" in content), ( f"Expected either 'Manual Slop RAG is great' or 'Manual Slop RAG result' in retrieved context, got: {content[:200]}" ) break if found_rag: break time.sleep(0.5) assert found_rag, "RAG context not found in history" # 6. Verify Incremental Indexing (no changes) print("[VERIFY] Verifying incrementality...") start = time.time() client.click('btn_rebuild_rag_index') for _ in range(50): if client.get_value('rag_status') == 'ready': break time.sleep(0.1) duration = time.time() - start print(f"[VERIFY] Incremental indexing took {duration:.2f}s") assert duration < 1.0, "Incremental indexing too slow (expected < 1s for 2 files)" print("[VERIFY] Phase 4 final verification COMPLETED successfully.") except Exception as e: print(f"[VERIFY] ERROR in final verification: {e}") raise