feat(testing): stabilize simulation suite and fix gemini caching

This commit is contained in:
2026-02-25 01:44:46 -05:00
parent fb80ce8c5a
commit c952d2f67b
23 changed files with 784 additions and 596 deletions

View File

@@ -50,7 +50,7 @@ def live_gui():
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == 'nt' else 0
)
max_retries = 10 # Reduced as recommended
max_retries = 15 # Slightly more time for gui_2
ready = False
print(f"[Fixture] Waiting up to {max_retries}s for Hook Server on port 8999...")

View File

@@ -9,5 +9,5 @@ auto_add = true
[discussions.main]
git_commit = ""
last_updated = "2026-02-25T00:40:10"
last_updated = "2026-02-25T01:42:16"
history = []

View File

@@ -5,10 +5,10 @@ roles = [
"System",
]
history = []
active = "TestDisc_1771997990"
active = "TestDisc_1772001716"
auto_add = true
[discussions.TestDisc_1771997990]
[discussions.TestDisc_1772001716]
git_commit = ""
last_updated = "2026-02-25T00:40:04"
last_updated = "2026-02-25T01:42:09"
history = []

View File

@@ -9,7 +9,5 @@ auto_add = true
[discussions.main]
git_commit = ""
last_updated = "2026-02-25T00:40:46"
history = [
"@2026-02-25T00:40:30\nUser:\nCreate a hello.ps1 script that prints 'Simulation Test' and execute it.",
]
last_updated = "2026-02-25T01:43:05"
history = []

View File

@@ -9,5 +9,5 @@ auto_add = true
[discussions.main]
git_commit = ""
last_updated = "2026-02-25T00:40:27"
last_updated = "2026-02-25T01:42:35"
history = []

View File

@@ -5,6 +5,8 @@ system_prompt = ""
main_context = ""
word_wrap = true
summary_only = false
auto_scroll_comms = true
auto_scroll_tool_calls = true
[output]
output_dir = "./md_gen"

View File

@@ -9,5 +9,5 @@ auto_add = true
[discussions.main]
git_commit = ""
last_updated = "2026-02-25T00:02:11"
last_updated = "2026-02-25T01:43:08"
history = []

View File

@@ -22,53 +22,49 @@ def cleanup_callback_file():
if TEST_CALLBACK_FILE.exists():
TEST_CALLBACK_FILE.unlink()
def test_gui2_set_value_hook_works(live_gui_2):
def test_gui2_set_value_hook_works(live_gui):
"""
Tests that the 'set_value' GUI hook is correctly implemented.
This requires a way to read the value back, which we don't have yet.
For now, this test just sends the command and assumes it works.
"""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
test_value = f"New value set by test: {uuid.uuid4()}"
gui_data = {'action': 'set_value', 'item': 'ai_input', 'value': test_value}
response = client.post_gui(gui_data)
assert response == {'status': 'queued'}
# In a future test, we would add:
# time.sleep(0.2)
# current_value = client.get_value('ai_input') # This hook doesn't exist yet
# assert current_value == test_value
# Verify the value was actually set using the new get_value hook
time.sleep(0.5)
current_value = client.get_value('ai_input')
assert current_value == test_value
def test_gui2_click_hook_works(live_gui_2):
def test_gui2_click_hook_works(live_gui):
"""
Tests that the 'click' GUI hook for the 'Reset' button is implemented.
This will be verified by checking for a side effect (e.g., session is reset,
which can be checked via another hook).
"""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
# First, set some state that 'Reset' would clear.
# We use the 'set_value' hook for this.
test_value = "This text should be cleared by the reset button."
client.post_gui({'action': 'set_value', 'item': 'ai_input', 'value': test_value})
time.sleep(0.2)
client.set_value('ai_input', test_value)
time.sleep(0.5)
assert client.get_value('ai_input') == test_value
# Now, trigger the click
gui_data = {'action': 'click', 'item': 'btn_reset'}
response = client.post_gui(gui_data)
assert response == {'status': 'queued'}
client.click('btn_reset')
time.sleep(0.5)
# We need a way to verify the state was reset.
# We can't read the ai_input value back yet.
# So this test remains conceptual for now, but demonstrates the intent.
# Verify it was reset
assert client.get_value('ai_input') == ""
def test_gui2_custom_callback_hook_works(live_gui_2):
def test_gui2_custom_callback_hook_works(live_gui):
"""
Tests that the 'custom_callback' GUI hook is correctly implemented.
This test will PASS if the hook is correctly processed by gui_2.py.
"""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
test_data = f"Callback executed: {uuid.uuid4()}"
gui_data = {

View File

@@ -45,27 +45,28 @@ def test_full_live_workflow(live_gui):
# Enable auto-add so the response ends up in history
client.set_value("auto_add_history", True)
client.set_value("current_model", "gemini-2.5-flash-lite")
time.sleep(0.5)
# 3. Discussion Turn
client.set_value("ai_input", "Hello! This is an automated test. Just say 'Acknowledged'.")
client.click("btn_gen_send")
# Verify thinking indicator appears (might be brief)
thinking_seen = False
print("\nPolling for thinking indicator...")
for i in range(20):
for i in range(40):
state = client.get_indicator_state("thinking_indicator")
if state.get('shown'):
thinking_seen = True
print(f"Thinking indicator seen at poll {i}")
break
time.sleep(0.5)
# 4. Wait for response in session
success = False
print("Waiting for AI response in session...")
for i in range(60):
for i in range(120):
session = client.get_session()
entries = session.get('session', {}).get('entries', [])
if any(e.get('role') == 'AI' for e in entries):
@@ -74,8 +75,7 @@ def test_full_live_workflow(live_gui):
break
time.sleep(1)
assert success, "AI failed to respond within 60 seconds"
assert success, "AI failed to respond within 120 seconds"
# 5. Switch Discussion
client.set_value("disc_new_name_input", "AutoDisc")
client.click("btn_disc_create")

View File

@@ -37,5 +37,5 @@ def test_ai_settings_simulation_run():
sim.run()
# Verify calls
mock_client.set_value.assert_any_call("current_provider", "anthropic")
mock_client.set_value.assert_any_call("current_provider", "gemini")
mock_client.set_value.assert_any_call("current_model", "gemini-1.5-flash")
mock_client.set_value.assert_any_call("current_model", "gemini-2.5-flash-lite")

View File

@@ -32,21 +32,19 @@ def test_execution_simulation_run():
}
mock_client.get_session.return_value = mock_session
# Mock script confirmation event
mock_client.wait_for_event.side_effect = [
{"type": "script_confirmation_required", "script": "dir"},
None # Second call returns None to end the loop
]
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
mock_sim = MagicMock()
mock_sim_class.return_value = mock_sim
# We need a way to trigger show_confirm_modal = True
# In sim_execution.py, it's called after run_discussion_turn
# I'll mock run_discussion_turn to set it
def run_side_effect(msg):
vals["show_confirm_modal"] = True
mock_sim.run_discussion_turn.side_effect = run_side_effect
sim = ExecutionSimulation(mock_client)
sim.run()
# Verify calls
mock_sim.run_discussion_turn.assert_called()
mock_sim.run_discussion_turn_async.assert_called()
mock_client.click.assert_called_with("btn_approve_script")