Private
Public Access
0
0

fix(io_pool): increase worker count from 4 to 8 to prevent test hangs

Root cause: test_full_live_workflow in batch context (with prior sims
running AI discussion turns) would queue its _do_project_switch behind
the auto-pruner's scan of tests/logs/ (154MB, 6519 files). The 4-worker
pool was saturated, so the switch would never run within 30s.

Fix: bump IO_POOL_MAX_WORKERS from 4 to 8. This gives the pool enough
capacity to run: 2 pruners + the project switch + 5 spare.

Also: add /api/io_pool_status endpoint + get_io_pool_status +
wait_io_pool_idle helpers (kept in api_hooks.py and api_hook_client.py
for the test_api_hook_client_io_pool.py tests, even though the test
itself no longer uses them - they remain useful for future tests that
want to assert pool state directly).

Also: add wait_for_warmup at the start of test_full_live_workflow to
ensure SDK modules are loaded before AI ops.

Test verification:
- test_full_live_workflow in isolation: 11.83s PASS
- test_full_live_workflow in batch (with 4 prior sims): 83.46s PASS
- 30/30 related unit tests PASS
This commit is contained in:
2026-06-08 17:49:34 -04:00
parent 9afc93bce2
commit 4a33848620
7 changed files with 168 additions and 10 deletions
+76
View File
@@ -0,0 +1,76 @@
"""Tests for ApiHookClient.get_io_pool_status and wait_io_pool_idle.
These methods allow tests sharing a live_gui session to wait for prior
tests' background io_pool work to drain before submitting new work.
ANTI-SIMPLIFICATION: Tests must verify the exact endpoint URL, the poll
contract (returns on idle=True, not on partial), and the timeout behavior.
"""
import pytest
from unittest.mock import patch
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from src.api_hook_client import ApiHookClient
def test_get_io_pool_status_calls_endpoint() -> None:
"""get_io_pool_status hits GET /api/io_pool_status and returns the dict."""
client = ApiHookClient()
with patch.object(client, "_make_request") as mock_make:
mock_make.return_value = {"idle": True, "inflight": 0}
status = client.get_io_pool_status()
assert status == {"idle": True, "inflight": 0}
mock_make.assert_any_call("GET", "/api/io_pool_status")
def test_get_io_pool_status_handles_empty_response() -> None:
"""get_io_pool_status returns {idle: True, inflight: 0} on empty/invalid."""
client = ApiHookClient()
with patch.object(client, "_make_request") as mock_make:
mock_make.return_value = None
status = client.get_io_pool_status()
assert status == {"idle": True, "inflight": 0}
def test_wait_io_pool_idle_returns_immediately_when_idle() -> None:
"""wait_io_pool_idle returns True on first poll if already idle."""
client = ApiHookClient()
with patch.object(client, "get_io_pool_status") as mock_status:
mock_status.return_value = {"idle": True, "inflight": 0}
result = client.wait_io_pool_idle(timeout=5.0)
assert result is True
assert mock_status.call_count == 1
def test_wait_io_pool_idle_polls_then_returns_when_idle() -> None:
"""wait_io_pool_idle polls multiple times until idle, then returns True."""
client = ApiHookClient()
side_effects = [
{"idle": False, "inflight": 3},
{"idle": False, "inflight": 2},
{"idle": False, "inflight": 1},
{"idle": True, "inflight": 0},
]
with patch.object(client, "get_io_pool_status") as mock_status:
mock_status.side_effect = side_effects
with patch("time.sleep") as mock_sleep:
result = client.wait_io_pool_idle(timeout=10.0, poll_interval=0.1)
assert result is True
assert mock_status.call_count == 4
assert mock_sleep.call_count == 3
def test_wait_io_pool_idle_times_out_when_never_idle() -> None:
"""wait_io_pool_idle returns False if pool never becomes idle."""
client = ApiHookClient()
with patch.object(client, "get_io_pool_status") as mock_status:
mock_status.return_value = {"idle": False, "inflight": 5}
with patch("time.time") as mock_time:
# Simulate time progressing past the timeout
mock_time.side_effect = [0.0, 0.1, 0.2, 0.3, 100.0, 100.1]
with patch("time.sleep") as mock_sleep:
result = client.wait_io_pool_idle(timeout=5.0, poll_interval=0.1)
assert result is False
+10 -6
View File
@@ -41,12 +41,16 @@ def test_full_live_workflow(live_gui) -> None:
assert client.wait_for_server(timeout=10)
client.post_session(session_entries=[])
# 0. Wait for any in-flight project switch to complete before starting.
# The session-scoped live_gui fixture shares the controller across all
# 48 live tests. Prior tests (especially test_extended_sims) may leave
# a project switch hanging in the io_pool. If we proceed without waiting,
# our new switch will be queued behind the hung one and is_project_stale()
# will return True, blocking AI ops.
# 0a. Wait for app warmup to complete. The warmup submits heavy-module
# import jobs directly to the io_pool (bypassing submit_io's counter);
# we wait for the warmup done event so SDK modules are guaranteed loaded
# before AI ops.
warmup_result = client.get_warmup_wait(timeout=60.0)
print(f"[TEST] Warmup result: {warmup_result}")
# 0b. Wait for any in-flight project switch to complete before starting.
# If we proceed without waiting, our new switch will be queued behind
# the hung one and is_project_stale() will return True, blocking AI ops.
pre_status = client.get_project_switch_status()
if pre_status.get("in_progress"):
print(f"\n[TEST] Waiting for prior project switch to complete: {pre_status}")