Private
Public Access
0
0
Files
manual_slop/simulation/sim_context.py
T
ed 72f8f466fe fix(sim+api): proper wait loops, project switch endpoint, drop stale check
Three real fixes for the sim test + the live_gui coordination layer:

1. /api/project_switch_status endpoint in src/app_controller.py.
   The wait helper had been calling this endpoint but it did not exist;
   the helper always received a 404, fell back to {in_progress: False},
   and returned immediately even when a switch was in flight. Added the
   endpoint that reads _project_switch_in_progress, active_project_path,
   and _project_switch_error from the controller.

2. simulation/sim_base.py: replace time.sleep(2.0)/time.sleep(1.5) in
   the setup() with wait_io_pool_idle and wait_for_project_switch so
   the test does not click btn_md_only while a project switch is in
   flight. Also added the wait calls to sim_context.py for the same
   reason.

3. src/app_controller.py _handle_md_only: removed the is_project_stale()
   early-return. The stale state is a transient window during which the
   previous code dropped the click on the floor with a misleading
   'stale ui' status. The MD generation worker is safe to run from any
   project state; the action handler now always proceeds.

4. tests/test_extended_sims.py: set current_model to 'gemini-cli' so
   _do_generate does not raise KeyError('model') when the test
   overrides provider to gemini_cli.

KNOWN ISSUE: test_context_sim_live still fails with status
'switching to: temp_livecontextsim' after a 60s wait. The click
appears to be re-triggering a project switch via the GUI's render
loop. Root cause investigation deferred; the sim is async and the
test path is fragile.
2026-06-10 00:31:22 -04:00

105 lines
5.9 KiB
Python

import os
import time
from simulation.sim_base import BaseSimulation, run_sim
class ContextSimulation(BaseSimulation):
def run(self) -> None:
"""
[C: tests/conftest.py:kill_process_tree, tests/conftest.py:live_gui, tests/test_conductor_abort_event.py:test_conductor_abort_event_populated, tests/test_conductor_engine_v2.py:test_conductor_engine_dynamic_parsing_and_execution, tests/test_conductor_engine_v2.py:test_conductor_engine_run_executes_tickets_in_order, tests/test_extended_sims.py:test_ai_settings_sim_live, tests/test_extended_sims.py:test_context_sim_live, tests/test_extended_sims.py:test_execution_sim_live, tests/test_extended_sims.py:test_tools_sim_live, tests/test_external_editor_gui.py:get_vscode_processes, tests/test_external_editor_gui.py:test_vscode_launches_with_diff_view, tests/test_gui_custom_window.py:test_app_window_is_borderless, tests/test_headless_simulation.py:module, tests/test_headless_verification.py:test_headless_verification_error_and_qa_interceptor, tests/test_headless_verification.py:test_headless_verification_full_run, tests/test_mock_gemini_cli.py:run_mock, tests/test_orchestration_logic.py:test_conductor_engine_run, tests/test_parallel_execution.py:test_conductor_engine_pool_integration, tests/test_sim_ai_settings.py:test_ai_settings_simulation_run, tests/test_sim_context.py:test_context_simulation_run, tests/test_sim_execution.py:test_execution_simulation_run, tests/test_sim_tools.py:test_tools_simulation_run]
"""
print("\n--- Running Context & Chat Simulation ---")
# Wait for any in-flight async work (e.g., prior setup_new_project switch) to finish.
self.client.wait_io_pool_idle(timeout=10.0)
print("\n--- Running Context & Chat Simulation ---")
# Wait for the sim_base.setup() project switch to fully complete.
self.client.wait_io_pool_idle(timeout=10.0)
self.client.wait_for_project_switch(timeout=15.0)
# 1. Skip Discussion Creation, use 'main'
print("[Sim] Using existing 'main' discussion")
self.sim.switch_discussion("main")
# Discussion switch is a local state update (not async), but give the GUI
# a moment to render the new discussion tab.
self.client.wait_io_pool_idle(timeout=5.0)
# Verify it's in the list
session = self.client.get_session()
# The session structure usually has discussions listed somewhere, or we can check the listbox
# For now, we'll trust the click and check the session update
# 2. Test File Aggregation & Context Refresh
print("[Sim] Testing context refresh and token budget...")
proj = self.client.get_project()
# Add many files to ensure we cross the 1% threshold (~9000 tokens)
import glob
all_py = [os.path.basename(f) for f in glob.glob("*.py")]
for f in all_py:
if f not in proj['project']['files']['paths']:
proj['project']['files']['paths'].append(f)
# Update project via hook
self.client.post_project(proj['project'])
self.client.wait_io_pool_idle(timeout=10.0)
proj = self.client.get_project()
# Add many files to ensure we cross the 1% threshold (~9000 tokens)
import glob
all_py = [os.path.basename(f) for f in glob.glob("*.py")]
for f in all_py:
if f not in proj['project']['files']['paths']:
proj['project']['files']['paths'].append(f)
# Update project via hook
self.client.post_project(proj['project'])
self.client.wait_io_pool_idle(timeout=10.0)
# Trigger MD Only to refresh context and token budget
# Trigger MD Only to refresh context and token budget
print("[Sim] Clicking MD Only...")
self.client.click("btn_md_only")
# Poll for "md written" specifically. The status will go through transient
# states ("switching", "sending", etc.); we want the terminal state.
start = time.time()
status = ""
while time.time() - start < 60.0:
status = self.client.get_value("ai_status") or ""
s = str(status).lower()
if "md written" in s:
break
if "error" in s and "md written" not in s:
# Terminal error state. Print the status and break so the assertion
# below shows a clear message rather than a 60s timeout.
print(f"[Sim] Terminal error: {status}")
break
time.sleep(0.5)
print(f"[Sim] Status: {status}")
assert "md written" in status, f"Expected 'md written' in status, got {status}"
pct = self.client.get_value("token_budget_pct")
current = self.client.get_value("token_budget_current")
print(f"[Sim] Token budget pct: {pct}, current={current}")
# We'll just warn if it's 0 but the MD was written, as it might be a small context
if pct == 0:
print("[Sim] WARNING: token_budget_pct is 0. This might be due to small context or estimation failure.")
# 3. Test Chat Turn
msg = "What is the current date and time? Answer in one sentence."
print(f"[Sim] Sending message: {msg}")
self.sim.run_discussion_turn(msg)
# 4. Verify History
print("[Sim] Verifying history...")
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
if not entries:
print("[Sim] !!! WARNING: entries list is EMPTY. Waiting another 2 seconds for eventual consistency...")
time.sleep(2)
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
# We expect at least 2 entries (User and AI)
assert len(entries) >= 2, f"Expected at least 2 entries, found {len(entries)}"
assert entries[-2]['role'] == 'User', "Expected second to last entry to be User"
assert entries[-1]['role'] == 'AI', "Expected last entry to be AI"
print(f"[Sim] AI responded: {entries[-1]['content'][:50]}...")
# 5. Test History Truncation
print("[Sim] Testing history truncation...")
self.sim.truncate_history(1)
time.sleep(1)
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
print(f"[DEBUG] Entries after truncation: {entries}")
chat_entries = [e for e in entries if e.get('role') in ('User', 'AI')]
assert len(chat_entries) == 2, f"Expected exactly 2 chat entries after truncation, found {len(chat_entries)}"
if __name__ == "__main__":
run_sim(ContextSimulation)