test(stabilization): Resolve run_linear API drift and implement vlogger high-signal reporting

2026-02-28 20:18:05 -05:00
parent 2a2675e386
commit ece46f922c
5 changed files with 174 additions and 143 deletions
@@ -1,16 +1,17 @@
-from typing import Any
+from typing import Any
 import pytest
 from unittest.mock import MagicMock, patch, call
 from models import Ticket, Track, WorkerContext
+import multi_agent_conductor
 from multi_agent_conductor import ConductorEngine
 import ai_client
 import json

@pytest.mark.asyncio
-async def test_headless_verification_full_run() -> None:
+async def test_headless_verification_full_run(vlogger) -> None:
 """
    1. Initialize a ConductorEngine with a Track containing multiple dependent Tickets.
-    2. Simulate a full execution run using engine.run_linear().
+    2. Simulate a full execution run using engine.run().
    3. Mock ai_client.send to simulate successful tool calls and final responses.
    4. Specifically verify that 'Context Amnesia' is maintained.
    """
@@ -19,12 +20,22 @@ async def test_headless_verification_full_run() -> None:
 track = Track(id="track_verify", description="Verification Track", tickets=[t1, t2])
 from events import AsyncEventQueue
 queue = AsyncEventQueue()
- engine = ConductorEngine(track=track, event_queue=queue)
- with patch("ai_client.send") as mock_send, \
- patch("ai_client.reset_session") as mock_reset:
+ engine = ConductorEngine(track=track, event_queue=queue, auto_queue=True)
+ 
+ vlogger.log_state("T1 Status Initial", "todo", t1.status)
+ vlogger.log_state("T2 Status Initial", "todo", t2.status)
+
+ # We must patch where it is USED: multi_agent_conductor
+ with patch("multi_agent_conductor.ai_client.send") as mock_send, \
+ patch("multi_agent_conductor.ai_client.reset_session") as mock_reset, \
+ patch("multi_agent_conductor.confirm_spawn", return_value=(True, "mock_prompt", "mock_ctx")):
 # We need mock_send to return something that doesn't contain "BLOCKED"
  mock_send.return_value = "Task completed successfully."
-  await engine.run_linear()
+  await engine.run()
+  
+  vlogger.log_state("T1 Status Final", "todo", t1.status)
+  vlogger.log_state("T2 Status Final", "todo", t2.status)
+
  # Verify both tickets are completed
  assert t1.status == "completed"
  assert t2.status == "completed"
@@ -32,9 +43,10 @@ async def test_headless_verification_full_run() -> None:
  assert mock_send.call_count == 2
  # Verify Context Amnesia: reset_session should be called for each ticket
  assert mock_reset.call_count == 2
+  vlogger.finalize("Headless full run with Context Amnesia", "PASS", "Tickets completed and session reset twice.")

@pytest.mark.asyncio
-async def test_headless_verification_error_and_qa_interceptor() -> None:
+async def test_headless_verification_error_and_qa_interceptor(vlogger) -> None:
 """
    5. Simulate a shell error and verify that the Tier 4 QA interceptor is triggered 
       and its summary is injected into the worker's history for the next retry.
@@ -43,7 +55,7 @@ async def test_headless_verification_error_and_qa_interceptor() -> None:
 track = Track(id="track_error", description="Error Track", tickets=[t1])
 from events import AsyncEventQueue
 queue = AsyncEventQueue()
- engine = ConductorEngine(track=track, event_queue=queue)
+ engine = ConductorEngine(track=track, event_queue=queue, auto_queue=True)
 # We need to simulate the tool loop inside ai_client._send_gemini (or similar)
 # Since we want to test the real tool loop and QA injection, we mock at the provider level.
 with patch("ai_client._provider", "gemini"), \
@@ -51,7 +63,8 @@ async def test_headless_verification_error_and_qa_interceptor() -> None:
 patch("ai_client.confirm_and_run_callback") as mock_run, \
 patch("ai_client.run_tier4_analysis") as mock_qa, \
 patch("ai_client._ensure_gemini_client") as mock_ensure, \
- patch("ai_client._gemini_tool_declaration", return_value=None):
+ patch("ai_client._gemini_tool_declaration", return_value=None), \
+ patch("multi_agent_conductor.confirm_spawn", return_value=(True, "mock_prompt", "mock_ctx")):
 # Ensure _gemini_client is restored by the mock ensure function
  import ai_client

@@ -97,7 +110,15 @@ QA ANALYSIS:
   return "Error: file not found"
  mock_run.side_effect = run_side_effect
  mock_qa.return_value = "FIX: Check if path exists."
-  await engine.run_linear()
+  
+  vlogger.log_state("T1 Initial Status", "todo", t1.status)
+  
+  # Patch engine used in test
+  with patch("multi_agent_conductor.run_worker_lifecycle", wraps=multi_agent_conductor.run_worker_lifecycle) as mock_worker_wrap:
+   await engine.run()
+  
+  vlogger.log_state("T1 Final Status", "todo", t1.status)
+  
  # Verify QA analysis was triggered
  mock_qa.assert_called_once_with("Error: file not found")
  # Verify the 2nd send_message call includes the QA ANALYSIS in its payload (f_resps)
@@ -105,17 +126,11 @@ QA ANALYSIS:
  assert mock_chat.send_message.call_count == 2
  args, kwargs = mock_chat.send_message.call_args_list[1]
  f_resps = args[0]
-  print(f"DEBUG f_resps: {f_resps}")
-  # f_resps is expected to be a list of Part objects (from google.genai.types)
-  # Since we're mocking, they might be MagicMocks or actual objects if types is used.
-  # In our case, ai_client.Part.from_function_response is used.
+  
  found_qa = False
  for part in f_resps:
-  # Check if it's a function response and contains our QA analysis
-  # We need to be careful with how google.genai.types.Part is structured or mocked
   part_str = str(part)
-   print(f"DEBUG part_str: {part_str}")
   if "QA ANALYSIS:" in part_str and "FIX: Check if path exists." in part_str:
    found_qa = True
  assert found_qa, "QA Analysis was not injected into the next round"
-
+  vlogger.finalize("Tier 4 QA Injection", "PASS", "QA summary injected into next worker round.")