feat(testing): stabilize simulation suite and fix gemini caching

2026-02-25 01:44:46 -05:00
parent fb80ce8c5a
commit c952d2f67b
23 changed files with 784 additions and 596 deletions
@@ -50,7 +50,7 @@ def live_gui():
        creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == 'nt' else 0
    )

-    max_retries = 10  # Reduced as recommended
+    max_retries = 15  # Slightly more time for gui_2
    ready = False
    print(f"[Fixture] Waiting up to {max_retries}s for Hook Server on port 8999...")
    
@@ -9,5 +9,5 @@ auto_add = true

 [discussions.main]
 git_commit = ""
-last_updated = "2026-02-25T00:40:10"
+last_updated = "2026-02-25T01:42:16"
 history = []
@@ -5,10 +5,10 @@ roles = [
    "System",
 ]
 history = []
-active = "TestDisc_1771997990"
+active = "TestDisc_1772001716"
 auto_add = true

-[discussions.TestDisc_1771997990]
+[discussions.TestDisc_1772001716]
 git_commit = ""
-last_updated = "2026-02-25T00:40:04"
+last_updated = "2026-02-25T01:42:09"
 history = []
@@ -9,7 +9,5 @@ auto_add = true

 [discussions.main]
 git_commit = ""
-last_updated = "2026-02-25T00:40:46"
-history = [
-    "@2026-02-25T00:40:30\nUser:\nCreate a hello.ps1 script that prints 'Simulation Test' and execute it.",
-]
+last_updated = "2026-02-25T01:43:05"
+history = []
@@ -9,5 +9,5 @@ auto_add = true

 [discussions.main]
 git_commit = ""
-last_updated = "2026-02-25T00:40:27"
+last_updated = "2026-02-25T01:42:35"
 history = []
@@ -5,6 +5,8 @@ system_prompt = ""
 main_context = ""
 word_wrap = true
 summary_only = false
+auto_scroll_comms = true
+auto_scroll_tool_calls = true

 [output]
 output_dir = "./md_gen"
@@ -9,5 +9,5 @@ auto_add = true

 [discussions.main]
 git_commit = ""
-last_updated = "2026-02-25T00:02:11"
+last_updated = "2026-02-25T01:43:08"
 history = []
@@ -22,53 +22,49 @@ def cleanup_callback_file():
    if TEST_CALLBACK_FILE.exists():
        TEST_CALLBACK_FILE.unlink()

-def test_gui2_set_value_hook_works(live_gui_2):
+def test_gui2_set_value_hook_works(live_gui):
    """
    Tests that the 'set_value' GUI hook is correctly implemented.
-    This requires a way to read the value back, which we don't have yet.
-    For now, this test just sends the command and assumes it works.
    """
    client = ApiHookClient()
+    assert client.wait_for_server(timeout=10)
    test_value = f"New value set by test: {uuid.uuid4()}"
    gui_data = {'action': 'set_value', 'item': 'ai_input', 'value': test_value}
    
    response = client.post_gui(gui_data)
    assert response == {'status': 'queued'}
    
-    # In a future test, we would add:
-    # time.sleep(0.2)
-    # current_value = client.get_value('ai_input') # This hook doesn't exist yet
-    # assert current_value == test_value
+    # Verify the value was actually set using the new get_value hook
+    time.sleep(0.5)
+    current_value = client.get_value('ai_input')
+    assert current_value == test_value

-def test_gui2_click_hook_works(live_gui_2):
+def test_gui2_click_hook_works(live_gui):
    """
    Tests that the 'click' GUI hook for the 'Reset' button is implemented.
-    This will be verified by checking for a side effect (e.g., session is reset,
-    which can be checked via another hook).
    """
    client = ApiHookClient()
+    assert client.wait_for_server(timeout=10)
    
    # First, set some state that 'Reset' would clear.
-    # We use the 'set_value' hook for this.
    test_value = "This text should be cleared by the reset button."
-    client.post_gui({'action': 'set_value', 'item': 'ai_input', 'value': test_value})
-    time.sleep(0.2)
+    client.set_value('ai_input', test_value)
+    time.sleep(0.5)
+    assert client.get_value('ai_input') == test_value

    # Now, trigger the click
-    gui_data = {'action': 'click', 'item': 'btn_reset'}
-    response = client.post_gui(gui_data)
-    assert response == {'status': 'queued'}
+    client.click('btn_reset')
+    time.sleep(0.5)
    
-    # We need a way to verify the state was reset.
-    # We can't read the ai_input value back yet.
-    # So this test remains conceptual for now, but demonstrates the intent.
+    # Verify it was reset
+    assert client.get_value('ai_input') == ""

-def test_gui2_custom_callback_hook_works(live_gui_2):
+def test_gui2_custom_callback_hook_works(live_gui):
    """
    Tests that the 'custom_callback' GUI hook is correctly implemented.
-    This test will PASS if the hook is correctly processed by gui_2.py.
    """
    client = ApiHookClient()
+    assert client.wait_for_server(timeout=10)
    test_data = f"Callback executed: {uuid.uuid4()}"

    gui_data = {
@@ -45,27 +45,28 @@ def test_full_live_workflow(live_gui):

    # Enable auto-add so the response ends up in history
    client.set_value("auto_add_history", True)
+    client.set_value("current_model", "gemini-2.5-flash-lite")
    time.sleep(0.5)

    # 3. Discussion Turn
    client.set_value("ai_input", "Hello! This is an automated test. Just say 'Acknowledged'.")
    client.click("btn_gen_send")
-    
+
    # Verify thinking indicator appears (might be brief)
    thinking_seen = False
    print("\nPolling for thinking indicator...")
-    for i in range(20):
+    for i in range(40):
        state = client.get_indicator_state("thinking_indicator")
        if state.get('shown'):
            thinking_seen = True
            print(f"Thinking indicator seen at poll {i}")
            break
        time.sleep(0.5)
-    
+
    # 4. Wait for response in session
    success = False
    print("Waiting for AI response in session...")
-    for i in range(60):
+    for i in range(120):
        session = client.get_session()
        entries = session.get('session', {}).get('entries', [])
        if any(e.get('role') == 'AI' for e in entries):
@@ -74,8 +75,7 @@ def test_full_live_workflow(live_gui):
            break
        time.sleep(1)

-    assert success, "AI failed to respond within 60 seconds"
-
+    assert success, "AI failed to respond within 120 seconds"
    # 5. Switch Discussion
    client.set_value("disc_new_name_input", "AutoDisc")
    client.click("btn_disc_create")
@@ -37,5 +37,5 @@ def test_ai_settings_simulation_run():
        sim.run()
        
        # Verify calls
-        mock_client.set_value.assert_any_call("current_provider", "anthropic")
-        mock_client.set_value.assert_any_call("current_provider", "gemini")
+        mock_client.set_value.assert_any_call("current_model", "gemini-1.5-flash")
+        mock_client.set_value.assert_any_call("current_model", "gemini-2.5-flash-lite")
@@ -32,21 +32,19 @@ def test_execution_simulation_run():
    }
    mock_client.get_session.return_value = mock_session

+    # Mock script confirmation event
+    mock_client.wait_for_event.side_effect = [
+        {"type": "script_confirmation_required", "script": "dir"},
+        None # Second call returns None to end the loop
+    ]
+
    with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
        mock_sim = MagicMock()
        mock_sim_class.return_value = mock_sim
        
-        # We need a way to trigger show_confirm_modal = True
-        # In sim_execution.py, it's called after run_discussion_turn
-        # I'll mock run_discussion_turn to set it
-        def run_side_effect(msg):
-            vals["show_confirm_modal"] = True
-            
-        mock_sim.run_discussion_turn.side_effect = run_side_effect
-        
        sim = ExecutionSimulation(mock_client)
        sim.run()
        
        # Verify calls
-        mock_sim.run_discussion_turn.assert_called()
+        mock_sim.run_discussion_turn_async.assert_called()
        mock_client.click.assert_called_with("btn_approve_script")