feat(testing): stabilize simulation suite and fix gemini caching
This commit is contained in:
@@ -4,39 +4,76 @@ import time
|
||||
from simulation.sim_base import BaseSimulation, run_sim
|
||||
|
||||
class ExecutionSimulation(BaseSimulation):
|
||||
def setup(self, project_name="SimProject"):
|
||||
super().setup(project_name)
|
||||
if os.path.exists("hello.ps1"):
|
||||
os.remove("hello.ps1")
|
||||
|
||||
def run(self):
|
||||
print("\n--- Running Execution & Modals Simulation ---")
|
||||
|
||||
# 1. Trigger script generation
|
||||
# 1. Trigger script generation (Async so we don't block on the wait loop)
|
||||
msg = "Create a hello.ps1 script that prints 'Simulation Test' and execute it."
|
||||
print(f"[Sim] Sending message to trigger script: {msg}")
|
||||
self.sim.run_discussion_turn(msg)
|
||||
self.sim.run_discussion_turn_async(msg)
|
||||
|
||||
# 2. Wait for confirmation event
|
||||
print("[Sim] Waiting for confirmation event...")
|
||||
ev = self.client.wait_for_event("script_confirmation_required", timeout=45)
|
||||
# 2. Monitor for events and text responses
|
||||
print("[Sim] Monitoring for script approvals and AI text...")
|
||||
start_wait = time.time()
|
||||
approved_count = 0
|
||||
success = False
|
||||
|
||||
assert ev is not None, "Expected script_confirmation_required event"
|
||||
print(f"[Sim] Event received: {ev}")
|
||||
|
||||
# 3. Approve script
|
||||
print("[Sim] Approving script execution...")
|
||||
self.client.click("btn_approve_script")
|
||||
time.sleep(2)
|
||||
|
||||
# 4. Verify output in history or status
|
||||
session = self.client.get_session()
|
||||
entries = session.get('session', {}).get('entries', [])
|
||||
|
||||
# Tool outputs are usually in history
|
||||
success = any("Simulation Test" in e.get('content', '') for e in entries if e.get('role') in ['Tool', 'Function'])
|
||||
if success:
|
||||
print("[Sim] Output found in session history.")
|
||||
else:
|
||||
print("[Sim] Output NOT found in history yet, checking status...")
|
||||
# Maybe check ai_status
|
||||
consecutive_errors = 0
|
||||
while time.time() - start_wait < 90:
|
||||
# Check for error status (be lenient with transients)
|
||||
status = self.client.get_value("ai_status")
|
||||
print(f"[Sim] Final Status: {status}")
|
||||
if status and status.lower().startswith("error"):
|
||||
consecutive_errors += 1
|
||||
if consecutive_errors >= 3:
|
||||
print(f"[ABORT] Execution simulation aborted due to persistent GUI error: {status}")
|
||||
break
|
||||
else:
|
||||
consecutive_errors = 0
|
||||
|
||||
# Check for script confirmation event
|
||||
ev = self.client.wait_for_event("script_confirmation_required", timeout=1)
|
||||
if ev:
|
||||
print(f"[Sim] Approving script #{approved_count+1}: {ev.get('script', '')[:50]}...")
|
||||
self.client.click("btn_approve_script")
|
||||
approved_count += 1
|
||||
# Give more time if we just approved a script
|
||||
start_wait = time.time()
|
||||
|
||||
# Check if AI has responded with text yet
|
||||
session = self.client.get_session()
|
||||
entries = session.get('session', {}).get('entries', [])
|
||||
|
||||
# Debug: log last few roles/content
|
||||
if entries:
|
||||
last_few = entries[-3:]
|
||||
print(f"[Sim] Waiting... Last {len(last_few)} roles: {[e.get('role') for e in last_few]}")
|
||||
|
||||
if any(e.get('role') == 'AI' and e.get('content') for e in entries):
|
||||
# Double check content for our keyword
|
||||
for e in entries:
|
||||
if e.get('role') == 'AI' and "Simulation Test" in e.get('content', ''):
|
||||
print("[Sim] AI responded with expected text. Success.")
|
||||
success = True
|
||||
break
|
||||
if success: break
|
||||
|
||||
# Also check if output is already in history via tool role
|
||||
for e in entries:
|
||||
if e.get('role') in ['Tool', 'Function'] and "Simulation Test" in e.get('content', ''):
|
||||
print(f"[Sim] Expected output found in {e.get('role')} results. Success.")
|
||||
success = True
|
||||
break
|
||||
if success: break
|
||||
|
||||
time.sleep(1.0)
|
||||
|
||||
assert success, "Failed to observe script execution output or AI confirmation text"
|
||||
print(f"[Sim] Final check: approved {approved_count} scripts.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_sim(ExecutionSimulation)
|
||||
|
||||
Reference in New Issue
Block a user