149 lines
5.5 KiB
Python
149 lines
5.5 KiB
Python
import pytest
|
|
import time
|
|
import sys
|
|
import os
|
|
import json
|
|
import shutil
|
|
from pathlib import Path
|
|
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))
|
|
|
|
from src import api_hook_client
|
|
|
|
@pytest.mark.integration
|
|
@pytest.mark.clean_baseline
|
|
def test_phase4_final_verify(live_gui, live_gui_workspace):
|
|
client = api_hook_client.ApiHookClient()
|
|
assert client.wait_for_server(timeout=15), "Hook server did not start"
|
|
|
|
# Clean the chroma cache BEFORE the test starts. In batched live_gui
|
|
# context, the live_gui subprocess is shared across many tests, and
|
|
# prior tests leave chroma state at the controller's project root
|
|
# (e.g. C:\projects\manual_slop\tests\artifacts\.slop_cache\chroma_test_*).
|
|
# The dim-mismatch rmtree in rag_engine._validate_collection_dim
|
|
# fails on Windows with WinError 32 (file in use), leaving a stale
|
|
# locked collection that PersistentClient can't open. Wipe the
|
|
# relevant cache dirs proactively so the test starts clean.
|
|
_workspace_root = str(live_gui_workspace.parent if live_gui_workspace else Path.cwd())
|
|
stale_path = Path(_workspace_root) / ".slop_cache"
|
|
if stale_path.exists():
|
|
for col_dir in stale_path.iterdir():
|
|
if col_dir.is_dir() and col_dir.name.startswith("chroma_"):
|
|
try:
|
|
shutil.rmtree(col_dir)
|
|
except Exception:
|
|
pass
|
|
|
|
# 1. Setup mock project data
|
|
workspace_dir = live_gui_workspace
|
|
workspace_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Create dummy files
|
|
(workspace_dir / "final_test_1.txt").write_text("Manual Slop RAG is great.")
|
|
(workspace_dir / "final_test_2.py").write_text("def test_func():\n return 'Manual Slop RAG result'")
|
|
|
|
try:
|
|
# 2. Configure project through Hook API
|
|
client.set_value('rag_collection_name', 'test_final_verify')
|
|
client.set_value('files', ['final_test_1.txt', 'final_test_2.py'])
|
|
client.set_value('rag_enabled', True)
|
|
client.set_value('rag_source', 'chroma')
|
|
client.set_value('rag_emb_provider', 'local')
|
|
client.set_value('auto_add_history', True)
|
|
client.set_value('current_provider', 'gemini_cli')
|
|
client.set_value('gcli_path', os.path.abspath(os.path.join(os.path.dirname(__file__), "mock_gcli.bat")))
|
|
|
|
time.sleep(1.5)
|
|
# Wait for settings to apply and engine to sync
|
|
success = False
|
|
for _ in range(100):
|
|
if client.get_value('rag_emb_provider') == 'local' and client.get_value('rag_status') == 'ready':
|
|
success = True
|
|
break
|
|
time.sleep(0.5)
|
|
assert success, f"RAG sync failed. Status: {client.get_value('rag_status')}"
|
|
|
|
# 3. Trigger Initial Indexing
|
|
print("[VERIFY] Triggering indexing...")
|
|
client.click('btn_rebuild_rag_index')
|
|
|
|
# Wait for ready
|
|
success = False
|
|
for _ in range(50):
|
|
status = client.get_value('rag_status')
|
|
if status == 'ready':
|
|
success = True
|
|
break
|
|
time.sleep(0.5)
|
|
assert success, f"Indexing failed. Status: {status}"
|
|
|
|
# 4. Verify Retrieval and Visualization
|
|
print("[VERIFY] Triggering retrieval turn...")
|
|
client.set_value('ai_input', "What makes RAG great?")
|
|
client.click('btn_gen_send')
|
|
|
|
# Wait for completion
|
|
success = False
|
|
print("[VERIFY] Polling for completion...")
|
|
for i in range(100):
|
|
state = client.get_gui_state()
|
|
status = state.get('ai_status')
|
|
if i % 10 == 0:
|
|
print(f"[VERIFY] Poll {i}, status: {status}")
|
|
if status == 'done':
|
|
success = True
|
|
break
|
|
if status and "error" in status.lower():
|
|
print(f"[VERIFY] Error detected: {status}")
|
|
break
|
|
time.sleep(0.5)
|
|
if not success:
|
|
print(f"[VERIFY] Timeout! Final status: {status}")
|
|
assert success, f"AI request timed out or failed. Status: {status}"
|
|
|
|
# 5. Verify discussion history has the context. After 'done' fires,
|
|
# poll entries separately because the User entry with RAG context
|
|
# injection may take an additional render frame to land in history
|
|
# (race condition exposed in batched live_gui context).
|
|
found_rag = False
|
|
for j in range(20):
|
|
session = client.get_session()
|
|
entries = session.get('session', {}).get('entries', [])
|
|
for entry in entries:
|
|
if entry.get('role') == 'User' and '## Retrieved Context' in entry.get('content', ''):
|
|
found_rag = True
|
|
content = entry.get('content', '')
|
|
print(f"[VERIFY] Found RAG context (poll {j}): {content[:100]}...")
|
|
# Accept either file's content as proof RAG retrieved something.
|
|
# The original test asserted only the .txt content, but the .py file
|
|
# ("Manual Slop RAG result") can rank first in batched context
|
|
# depending on prior chroma state. Either file's content proves
|
|
# RAG retrieval worked.
|
|
assert ("Manual Slop RAG is great" in content
|
|
or "Manual Slop RAG result" in content), (
|
|
f"Expected either 'Manual Slop RAG is great' or 'Manual Slop RAG result' in retrieved context, got: {content[:200]}"
|
|
)
|
|
break
|
|
if found_rag:
|
|
break
|
|
time.sleep(0.5)
|
|
assert found_rag, "RAG context not found in history"
|
|
|
|
# 6. Verify Incremental Indexing (no changes)
|
|
print("[VERIFY] Verifying incrementality...")
|
|
start = time.time()
|
|
client.click('btn_rebuild_rag_index')
|
|
for _ in range(50):
|
|
if client.get_value('rag_status') == 'ready': break
|
|
time.sleep(0.1)
|
|
duration = time.time() - start
|
|
print(f"[VERIFY] Incremental indexing took {duration:.2f}s")
|
|
assert duration < 1.0, "Incremental indexing too slow (expected < 1s for 2 files)"
|
|
|
|
print("[VERIFY] Phase 4 final verification COMPLETED successfully.")
|
|
|
|
except Exception as e:
|
|
print(f"[VERIFY] ERROR in final verification: {e}")
|
|
raise
|