Private
Public Access
0
0

fix(app_controller): clear project-switch state in _handle_reset_session

When a prior test in the tier-3-live_gui batch leaves a _do_project_switch
background thread running, the next test's btn_project_new_automated click
sees _project_switch_in_progress=True (from the prior thread) and queues
the new path via _project_switch_pending_path. The queued switch is never
actually submitted to the io_pool, so is_project_stale() stays True and
AI ops (_handle_generate_send) bail with 'project switch in progress;
AI ops disabled'.

Fix: _handle_reset_session now also clears _project_switch_in_progress,
_project_switch_pending_path, and _project_switch_error (under the
existing _project_switch_lock). This way, even if the prior background
thread is still running, the controller reports an idle state and the
new switch can be submitted normally.

Also:
- src/api_hook_client.py: reverted wait_for_project_switch to require
  in_progress=False (was relaxed to return on queued path, which misled
  the caller into thinking the switch was done)
- tests/test_handle_reset_session_clears_project.py: new test
  test_handle_reset_session_clears_project_switch_state asserts
  is_project_stale() returns False after reset
- tests/test_api_hook_client_wait_for_project_switch.py: updated
  test_wait_for_project_switch_does_not_return_on_queued (in_progress
  + matching path should keep waiting, not return early)
- tests/test_live_workflow.py: added pre-wait for any in-flight switch
  before doing btn_reset (so the test waits up to 60s for the prior
  switch to complete if needed)
- conductor/todos/TODO_test_full_live_workflow.md: updated Task 4 with
  the deeper hang analysis and recommended fix

Known follow-up: test_full_live_workflow still hangs in tier-3 batch
even with this fix, because the new _do_project_switch itself is hung
in the io_pool (likely saturation from prior sims' AI discussion turn
workers). Deeper investigation required.
This commit is contained in:
2026-06-08 15:19:30 -04:00
parent 5087ee988d
commit 9afc93bce2
6 changed files with 120 additions and 34 deletions
@@ -1,8 +1,14 @@
"""Tests for ApiHookClient.wait_for_project_switch.
These tests use mocked _make_request so they don't require a live_gui
session. They verify the polling logic: success, error, timeout, and
path-matching behavior.
session. They verify the polling logic: success, error, queued-not-returned,
timeout, and path-matching behavior.
Contract: wait_for_project_switch returns when the controller's switch
has completed (in_progress=False) and the path matches. It does NOT
return on queued switches (in_progress=True) because queued means a
prior switch is still running, and downstream code (AI ops) will be
blocked by is_project_stale().
"""
import sys
import os
@@ -52,8 +58,24 @@ def test_wait_for_project_switch_matches_by_basename() -> None:
assert "timeout" not in result
def test_wait_for_project_switch_does_not_return_on_queued() -> None:
"""If in_progress=True and path matches (switch is queued), keep waiting.
The prior switch is hung. The new switch is queued behind it. Returning
now would mislead the caller into thinking the switch is done, but
is_project_stale() will still return True and block AI ops.
"""
client = ApiHookClient()
with patch.object(client, "_make_request") as mock_make:
mock_make.return_value = {"in_progress": True, "path": "C:/projects/foo.toml", "error": None}
result = client.wait_for_project_switch(expected_path="C:/projects/foo.toml", timeout=0.5, poll_interval=0.1)
# Should time out, not return early
assert result.get("timeout") is True
assert result["in_progress"] is True
def test_wait_for_project_switch_times_out_when_in_progress() -> None:
"""If the controller stays in_progress past the timeout, return with timeout flag."""
"""If in_progress stays True and path never matches, return with timeout flag."""
client = ApiHookClient()
with patch.object(client, "_make_request") as mock_make:
mock_make.return_value = {"in_progress": True, "path": None, "error": None}
@@ -80,7 +102,7 @@ def test_wait_for_project_switch_polls_then_completes() -> None:
def fake_request(*args, **kwargs):
call_count[0] += 1
if call_count[0] < 3:
return {"in_progress": True, "path": None, "error": None}
return {"in_progress": True, "path": "C:/other.toml", "error": None}
return {"in_progress": False, "path": "C:/foo.toml", "error": None}
with patch.object(client, "_make_request", side_effect=fake_request):
@@ -77,3 +77,34 @@ def test_handle_reset_session_resets_project_to_valid_default(controller):
assert isinstance(controller.project, dict)
assert "project" in controller.project
def test_handle_reset_session_clears_project_switch_state(controller):
"""The project-switch state machine must be reset so a hung switch
from a prior test does not block the next session.
`is_project_stale()` must return False after reset, otherwise the next
`btn_project_new_automated` click is queued behind the hung switch
and `is_project_stale()` keeps returning True, blocking AI ops
(`_handle_generate_send` returns 'project switch in progress; AI ops disabled').
"""
# Simulate a prior hung switch
controller._project_switch_in_progress = True
controller._project_switch_pending_path = "/some/old/path.toml"
controller._project_switch_error = "stale error from hung switch"
assert controller.is_project_stale() # precondition
controller._handle_reset_session()
assert controller._project_switch_in_progress is False, (
f"_project_switch_in_progress not cleared: {controller._project_switch_in_progress}"
)
assert controller._project_switch_pending_path is None, (
f"_project_switch_pending_path not cleared: {controller._project_switch_pending_path}"
)
assert controller._project_switch_error is None, (
f"_project_switch_error not cleared: {controller._project_switch_error}"
)
assert not controller.is_project_stale(), (
f"is_project_stale() still True after reset: "
f"in_progress={controller._project_switch_in_progress}, "
f"pending={controller._project_switch_pending_path}"
)
+17 -1
View File
@@ -40,7 +40,23 @@ def test_full_live_workflow(live_gui) -> None:
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
client.post_session(session_entries=[])
# 0. Wait for any in-flight project switch to complete before starting.
# The session-scoped live_gui fixture shares the controller across all
# 48 live tests. Prior tests (especially test_extended_sims) may leave
# a project switch hanging in the io_pool. If we proceed without waiting,
# our new switch will be queued behind the hung one and is_project_stale()
# will return True, blocking AI ops.
pre_status = client.get_project_switch_status()
if pre_status.get("in_progress"):
print(f"\n[TEST] Waiting for prior project switch to complete: {pre_status}")
idle_status = client.wait_for_project_switch(timeout=60.0)
assert not idle_status.get("timeout"), (
f"Prior project switch did not complete in 60s. Aborting. "
f"Last status: {idle_status}"
)
print(f"[TEST] Prior switch done: {idle_status}")
# 1. Reset
print("\n[TEST] Clicking Reset...")
client.click("btn_reset")