import pytest from unittest.mock import MagicMock, patch, call from models import Ticket, Track, WorkerContext from multi_agent_conductor import ConductorEngine import ai_client import json @pytest.mark.asyncio async def test_headless_verification_full_run(): """ 1. Initialize a ConductorEngine with a Track containing multiple dependent Tickets. 2. Simulate a full execution run using engine.run_linear(). 3. Mock ai_client.send to simulate successful tool calls and final responses. 4. Specifically verify that 'Context Amnesia' is maintained. """ t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1") t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker1", depends_on=["T1"]) track = Track(id="track_verify", description="Verification Track", tickets=[t1, t2]) from events import AsyncEventQueue queue = AsyncEventQueue() engine = ConductorEngine(track=track, event_queue=queue) with patch("ai_client.send") as mock_send, \ patch("ai_client.reset_session") as mock_reset: # We need mock_send to return something that doesn't contain "BLOCKED" mock_send.return_value = "Task completed successfully." await engine.run_linear() # Verify both tickets are completed assert t1.status == "completed" assert t2.status == "completed" # Verify that ai_client.send was called twice (once for each ticket) assert mock_send.call_count == 2 # Verify Context Amnesia: reset_session should be called for each ticket assert mock_reset.call_count == 2 @pytest.mark.asyncio async def test_headless_verification_error_and_qa_interceptor(): """ 5. Simulate a shell error and verify that the Tier 4 QA interceptor is triggered and its summary is injected into the worker's history for the next retry. """ t1 = Ticket(id="T1", description="Task with error", status="todo", assigned_to="worker1") track = Track(id="track_error", description="Error Track", tickets=[t1]) from events import AsyncEventQueue queue = AsyncEventQueue() engine = ConductorEngine(track=track, event_queue=queue) # We need to simulate the tool loop inside ai_client._send_gemini (or similar) # Since we want to test the real tool loop and QA injection, we mock at the provider level. with patch("ai_client._provider", "gemini"), \ patch("ai_client._gemini_client") as mock_genai_client, \ patch("ai_client.confirm_and_run_callback") as mock_run, \ patch("ai_client.run_tier4_analysis") as mock_qa, \ patch("ai_client._ensure_gemini_client") as mock_ensure, \ patch("ai_client._gemini_tool_declaration", return_value=None): # Ensure _gemini_client is restored by the mock ensure function import ai_client def restore_client(): ai_client._gemini_client = mock_genai_client mock_ensure.side_effect = restore_client ai_client._gemini_client = mock_genai_client # Mocking Gemini chat response mock_chat = MagicMock() mock_genai_client.chats.create.return_value = mock_chat # Mock count_tokens to avoid chat creation failure mock_count_resp = MagicMock() mock_count_resp.total_tokens = 100 mock_genai_client.models.count_tokens.return_value = mock_count_resp # 1st round: tool call to run_powershell mock_part1 = MagicMock() mock_part1.text = "I will run a command." mock_part1.function_call = MagicMock() mock_part1.function_call.name = "run_powershell" mock_part1.function_call.args = {"script": "dir"} mock_resp1 = MagicMock() mock_resp1.candidates = [MagicMock(content=MagicMock(parts=[mock_part1]), finish_reason=MagicMock(name="STOP"))] mock_resp1.usage_metadata.prompt_token_count = 10 mock_resp1.usage_metadata.candidates_token_count = 5 # 2nd round: Final text after tool result mock_part2 = MagicMock() mock_part2.text = "The command failed but I understand why. Task done." mock_part2.function_call = None mock_resp2 = MagicMock() mock_resp2.candidates = [MagicMock(content=MagicMock(parts=[mock_part2]), finish_reason=MagicMock(name="STOP"))] mock_resp2.usage_metadata.prompt_token_count = 20 mock_resp2.usage_metadata.candidates_token_count = 10 mock_chat.send_message.side_effect = [mock_resp1, mock_resp2] # Mock run_powershell behavior: it should call the qa_callback on error def run_side_effect(script, base_dir, qa_callback): if qa_callback: analysis = qa_callback("Error: file not found") return f"""STDERR: Error: file not found QA ANALYSIS: {analysis}""" return "Error: file not found" mock_run.side_effect = run_side_effect mock_qa.return_value = "FIX: Check if path exists." await engine.run_linear() # Verify QA analysis was triggered mock_qa.assert_called_once_with("Error: file not found") # Verify the 2nd send_message call includes the QA ANALYSIS in its payload (f_resps) # The first call is the user message, the second is the tool response. assert mock_chat.send_message.call_count == 2 args, kwargs = mock_chat.send_message.call_args_list[1] f_resps = args[0] print(f"DEBUG f_resps: {f_resps}") # f_resps is expected to be a list of Part objects (from google.genai.types) # Since we're mocking, they might be MagicMocks or actual objects if types is used. # In our case, ai_client.Part.from_function_response is used. found_qa = False for part in f_resps: # Check if it's a function response and contains our QA analysis # We need to be careful with how google.genai.types.Part is structured or mocked part_str = str(part) print(f"DEBUG part_str: {part_str}") if "QA ANALYSIS:" in part_str and "FIX: Check if path exists." in part_str: found_qa = True assert found_qa, "QA Analysis was not injected into the next round"