fix(app_controller): clear project-switch state in _handle_reset_session

When a prior test in the tier-3-live_gui batch leaves a _do_project_switch background thread running, the next test's btn_project_new_automated click sees _project_switch_in_progress=True (from the prior thread) and queues the new path via _project_switch_pending_path. The queued switch is never actually submitted to the io_pool, so is_project_stale() stays True and AI ops (_handle_generate_send) bail with 'project switch in progress; AI ops disabled'. Fix: _handle_reset_session now also clears _project_switch_in_progress, _project_switch_pending_path, and _project_switch_error (under the existing _project_switch_lock). This way, even if the prior background thread is still running, the controller reports an idle state and the new switch can be submitted normally. Also: - src/api_hook_client.py: reverted wait_for_project_switch to require in_progress=False (was relaxed to return on queued path, which misled the caller into thinking the switch was done) - tests/test_handle_reset_session_clears_project.py: new test test_handle_reset_session_clears_project_switch_state asserts is_project_stale() returns False after reset - tests/test_api_hook_client_wait_for_project_switch.py: updated test_wait_for_project_switch_does_not_return_on_queued (in_progress + matching path should keep waiting, not return early) - tests/test_live_workflow.py: added pre-wait for any in-flight switch before doing btn_reset (so the test waits up to 60s for the prior switch to complete if needed) - conductor/todos/TODO_test_full_live_workflow.md: updated Task 4 with the deeper hang analysis and recommended fix Known follow-up: test_full_live_workflow still hangs in tier-3 batch even with this fix, because the new _do_project_switch itself is hung in the io_pool (likely saturation from prior sims' AI discussion turn workers). Deeper investigation required.
2026-06-08 15:19:30 -04:00
parent 5087ee988d
commit 9afc93bce2
6 changed files with 120 additions and 34 deletions
@@ -1,8 +1,14 @@
 """Tests for ApiHookClient.wait_for_project_switch.

 These tests use mocked _make_request so they don't require a live_gui
-session. They verify the polling logic: success, error, timeout, and
-path-matching behavior.
+session. They verify the polling logic: success, error, queued-not-returned,
+timeout, and path-matching behavior.
+
+Contract: wait_for_project_switch returns when the controller's switch
+has completed (in_progress=False) and the path matches. It does NOT
+return on queued switches (in_progress=True) because queued means a
+prior switch is still running, and downstream code (AI ops) will be
+blocked by is_project_stale().
 """
 import sys
 import os
@@ -52,8 +58,24 @@ def test_wait_for_project_switch_matches_by_basename() -> None:
  assert "timeout" not in result


+def test_wait_for_project_switch_does_not_return_on_queued() -> None:
+ """If in_progress=True and path matches (switch is queued), keep waiting.
+
+ The prior switch is hung. The new switch is queued behind it. Returning
+ now would mislead the caller into thinking the switch is done, but
+ is_project_stale() will still return True and block AI ops.
+ """
+ client = ApiHookClient()
+ with patch.object(client, "_make_request") as mock_make:
+  mock_make.return_value = {"in_progress": True, "path": "C:/projects/foo.toml", "error": None}
+  result = client.wait_for_project_switch(expected_path="C:/projects/foo.toml", timeout=0.5, poll_interval=0.1)
+  # Should time out, not return early
+  assert result.get("timeout") is True
+  assert result["in_progress"] is True
+
+
 def test_wait_for_project_switch_times_out_when_in_progress() -> None:
- """If the controller stays in_progress past the timeout, return with timeout flag."""
+ """If in_progress stays True and path never matches, return with timeout flag."""
 client = ApiHookClient()
 with patch.object(client, "_make_request") as mock_make:
  mock_make.return_value = {"in_progress": True, "path": None, "error": None}
@@ -80,7 +102,7 @@ def test_wait_for_project_switch_polls_then_completes() -> None:
 def fake_request(*args, **kwargs):
  call_count[0] += 1
  if call_count[0] < 3:
-   return {"in_progress": True, "path": None, "error": None}
+   return {"in_progress": True, "path": "C:/other.toml", "error": None}
  return {"in_progress": False, "path": "C:/foo.toml", "error": None}

 with patch.object(client, "_make_request", side_effect=fake_request):
@@ -77,3 +77,34 @@ def test_handle_reset_session_resets_project_to_valid_default(controller):
 assert isinstance(controller.project, dict)
 assert "project" in controller.project

+
+def test_handle_reset_session_clears_project_switch_state(controller):
+ """The project-switch state machine must be reset so a hung switch
+ from a prior test does not block the next session.
+
+ `is_project_stale()` must return False after reset, otherwise the next
+ `btn_project_new_automated` click is queued behind the hung switch
+ and `is_project_stale()` keeps returning True, blocking AI ops
+ (`_handle_generate_send` returns 'project switch in progress; AI ops disabled').
+ """
+ # Simulate a prior hung switch
+ controller._project_switch_in_progress = True
+ controller._project_switch_pending_path = "/some/old/path.toml"
+ controller._project_switch_error = "stale error from hung switch"
+ assert controller.is_project_stale()  # precondition
+ controller._handle_reset_session()
+ assert controller._project_switch_in_progress is False, (
+  f"_project_switch_in_progress not cleared: {controller._project_switch_in_progress}"
+ )
+ assert controller._project_switch_pending_path is None, (
+  f"_project_switch_pending_path not cleared: {controller._project_switch_pending_path}"
+ )
+ assert controller._project_switch_error is None, (
+  f"_project_switch_error not cleared: {controller._project_switch_error}"
+ )
+ assert not controller.is_project_stale(), (
+  f"is_project_stale() still True after reset: "
+  f"in_progress={controller._project_switch_in_progress}, "
+  f"pending={controller._project_switch_pending_path}"
+ )
+
@@ -40,7 +40,23 @@ def test_full_live_workflow(live_gui) -> None:
 client = ApiHookClient()
 assert client.wait_for_server(timeout=10)
 client.post_session(session_entries=[])
- 
+
+ # 0. Wait for any in-flight project switch to complete before starting.
+ # The session-scoped live_gui fixture shares the controller across all
+ # 48 live tests. Prior tests (especially test_extended_sims) may leave
+ # a project switch hanging in the io_pool. If we proceed without waiting,
+ # our new switch will be queued behind the hung one and is_project_stale()
+ # will return True, blocking AI ops.
+ pre_status = client.get_project_switch_status()
+ if pre_status.get("in_progress"):
+  print(f"\n[TEST] Waiting for prior project switch to complete: {pre_status}")
+  idle_status = client.wait_for_project_switch(timeout=60.0)
+  assert not idle_status.get("timeout"), (
+   f"Prior project switch did not complete in 60s. Aborting. "
+   f"Last status: {idle_status}"
+  )
+  print(f"[TEST] Prior switch done: {idle_status}")
+
 # 1. Reset
 print("\n[TEST] Clicking Reset...")
 client.click("btn_reset")