checkpoint: massive refactor
This commit is contained in:
@@ -7,137 +7,113 @@ import json
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_headless_verification_full_run():
|
||||
"""
|
||||
"""
|
||||
1. Initialize a ConductorEngine with a Track containing multiple dependent Tickets.
|
||||
2. Simulate a full execution run using engine.run_linear().
|
||||
3. Mock ai_client.send to simulate successful tool calls and final responses.
|
||||
4. Specifically verify that 'Context Amnesia' is maintained.
|
||||
"""
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker1", depends_on=["T1"])
|
||||
track = Track(id="track_verify", description="Verification Track", tickets=[t1, t2])
|
||||
|
||||
from events import AsyncEventQueue
|
||||
queue = AsyncEventQueue()
|
||||
engine = ConductorEngine(track=track, event_queue=queue)
|
||||
|
||||
with patch("ai_client.send") as mock_send, \
|
||||
patch("ai_client.reset_session") as mock_reset:
|
||||
|
||||
# We need mock_send to return something that doesn't contain "BLOCKED"
|
||||
mock_send.return_value = "Task completed successfully."
|
||||
|
||||
await engine.run_linear()
|
||||
|
||||
# Verify both tickets are completed
|
||||
assert t1.status == "completed"
|
||||
assert t2.status == "completed"
|
||||
|
||||
# Verify that ai_client.send was called twice (once for each ticket)
|
||||
assert mock_send.call_count == 2
|
||||
|
||||
# Verify Context Amnesia: reset_session should be called for each ticket
|
||||
assert mock_reset.call_count == 2
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker1", depends_on=["T1"])
|
||||
track = Track(id="track_verify", description="Verification Track", tickets=[t1, t2])
|
||||
from events import AsyncEventQueue
|
||||
queue = AsyncEventQueue()
|
||||
engine = ConductorEngine(track=track, event_queue=queue)
|
||||
with patch("ai_client.send") as mock_send, \
|
||||
patch("ai_client.reset_session") as mock_reset:
|
||||
# We need mock_send to return something that doesn't contain "BLOCKED"
|
||||
mock_send.return_value = "Task completed successfully."
|
||||
await engine.run_linear()
|
||||
# Verify both tickets are completed
|
||||
assert t1.status == "completed"
|
||||
assert t2.status == "completed"
|
||||
# Verify that ai_client.send was called twice (once for each ticket)
|
||||
assert mock_send.call_count == 2
|
||||
# Verify Context Amnesia: reset_session should be called for each ticket
|
||||
assert mock_reset.call_count == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_headless_verification_error_and_qa_interceptor():
|
||||
"""
|
||||
"""
|
||||
5. Simulate a shell error and verify that the Tier 4 QA interceptor is triggered
|
||||
and its summary is injected into the worker's history for the next retry.
|
||||
"""
|
||||
t1 = Ticket(id="T1", description="Task with error", status="todo", assigned_to="worker1")
|
||||
track = Track(id="track_error", description="Error Track", tickets=[t1])
|
||||
|
||||
from events import AsyncEventQueue
|
||||
queue = AsyncEventQueue()
|
||||
engine = ConductorEngine(track=track, event_queue=queue)
|
||||
t1 = Ticket(id="T1", description="Task with error", status="todo", assigned_to="worker1")
|
||||
track = Track(id="track_error", description="Error Track", tickets=[t1])
|
||||
from events import AsyncEventQueue
|
||||
queue = AsyncEventQueue()
|
||||
engine = ConductorEngine(track=track, event_queue=queue)
|
||||
# We need to simulate the tool loop inside ai_client._send_gemini (or similar)
|
||||
# Since we want to test the real tool loop and QA injection, we mock at the provider level.
|
||||
with patch("ai_client._provider", "gemini"), \
|
||||
patch("ai_client._gemini_client") as mock_genai_client, \
|
||||
patch("ai_client.confirm_and_run_callback") as mock_run, \
|
||||
patch("ai_client.run_tier4_analysis") as mock_qa, \
|
||||
patch("ai_client._ensure_gemini_client") as mock_ensure, \
|
||||
patch("ai_client._gemini_tool_declaration", return_value=None):
|
||||
# Ensure _gemini_client is restored by the mock ensure function
|
||||
import ai_client
|
||||
|
||||
# We need to simulate the tool loop inside ai_client._send_gemini (or similar)
|
||||
# Since we want to test the real tool loop and QA injection, we mock at the provider level.
|
||||
|
||||
with patch("ai_client._provider", "gemini"), \
|
||||
patch("ai_client._gemini_client") as mock_genai_client, \
|
||||
patch("ai_client.confirm_and_run_callback") as mock_run, \
|
||||
patch("ai_client.run_tier4_analysis") as mock_qa, \
|
||||
patch("ai_client._ensure_gemini_client") as mock_ensure, \
|
||||
patch("ai_client._gemini_tool_declaration", return_value=None):
|
||||
|
||||
# Ensure _gemini_client is restored by the mock ensure function
|
||||
import ai_client
|
||||
def restore_client():
|
||||
ai_client._gemini_client = mock_genai_client
|
||||
mock_ensure.side_effect = restore_client
|
||||
ai_client._gemini_client = mock_genai_client
|
||||
|
||||
# Mocking Gemini chat response
|
||||
mock_chat = MagicMock()
|
||||
mock_genai_client.chats.create.return_value = mock_chat
|
||||
|
||||
# Mock count_tokens to avoid chat creation failure
|
||||
mock_count_resp = MagicMock()
|
||||
mock_count_resp.total_tokens = 100
|
||||
mock_genai_client.models.count_tokens.return_value = mock_count_resp
|
||||
def restore_client():
|
||||
ai_client._gemini_client = mock_genai_client
|
||||
mock_ensure.side_effect = restore_client
|
||||
ai_client._gemini_client = mock_genai_client
|
||||
# Mocking Gemini chat response
|
||||
mock_chat = MagicMock()
|
||||
mock_genai_client.chats.create.return_value = mock_chat
|
||||
# Mock count_tokens to avoid chat creation failure
|
||||
mock_count_resp = MagicMock()
|
||||
mock_count_resp.total_tokens = 100
|
||||
mock_genai_client.models.count_tokens.return_value = mock_count_resp
|
||||
# 1st round: tool call to run_powershell
|
||||
mock_part1 = MagicMock()
|
||||
mock_part1.text = "I will run a command."
|
||||
mock_part1.function_call = MagicMock()
|
||||
mock_part1.function_call.name = "run_powershell"
|
||||
mock_part1.function_call.args = {"script": "dir"}
|
||||
mock_resp1 = MagicMock()
|
||||
mock_resp1.candidates = [MagicMock(content=MagicMock(parts=[mock_part1]), finish_reason=MagicMock(name="STOP"))]
|
||||
mock_resp1.usage_metadata.prompt_token_count = 10
|
||||
mock_resp1.usage_metadata.candidates_token_count = 5
|
||||
# 2nd round: Final text after tool result
|
||||
mock_part2 = MagicMock()
|
||||
mock_part2.text = "The command failed but I understand why. Task done."
|
||||
mock_part2.function_call = None
|
||||
mock_resp2 = MagicMock()
|
||||
mock_resp2.candidates = [MagicMock(content=MagicMock(parts=[mock_part2]), finish_reason=MagicMock(name="STOP"))]
|
||||
mock_resp2.usage_metadata.prompt_token_count = 20
|
||||
mock_resp2.usage_metadata.candidates_token_count = 10
|
||||
mock_chat.send_message.side_effect = [mock_resp1, mock_resp2]
|
||||
# Mock run_powershell behavior: it should call the qa_callback on error
|
||||
|
||||
# 1st round: tool call to run_powershell
|
||||
mock_part1 = MagicMock()
|
||||
mock_part1.text = "I will run a command."
|
||||
mock_part1.function_call = MagicMock()
|
||||
mock_part1.function_call.name = "run_powershell"
|
||||
mock_part1.function_call.args = {"script": "dir"}
|
||||
|
||||
mock_resp1 = MagicMock()
|
||||
mock_resp1.candidates = [MagicMock(content=MagicMock(parts=[mock_part1]), finish_reason=MagicMock(name="STOP"))]
|
||||
mock_resp1.usage_metadata.prompt_token_count = 10
|
||||
mock_resp1.usage_metadata.candidates_token_count = 5
|
||||
|
||||
# 2nd round: Final text after tool result
|
||||
mock_part2 = MagicMock()
|
||||
mock_part2.text = "The command failed but I understand why. Task done."
|
||||
mock_part2.function_call = None
|
||||
|
||||
mock_resp2 = MagicMock()
|
||||
mock_resp2.candidates = [MagicMock(content=MagicMock(parts=[mock_part2]), finish_reason=MagicMock(name="STOP"))]
|
||||
mock_resp2.usage_metadata.prompt_token_count = 20
|
||||
mock_resp2.usage_metadata.candidates_token_count = 10
|
||||
|
||||
mock_chat.send_message.side_effect = [mock_resp1, mock_resp2]
|
||||
|
||||
# Mock run_powershell behavior: it should call the qa_callback on error
|
||||
def run_side_effect(script, base_dir, qa_callback):
|
||||
if qa_callback:
|
||||
analysis = qa_callback("Error: file not found")
|
||||
return f"""STDERR: Error: file not found
|
||||
def run_side_effect(script, base_dir, qa_callback):
|
||||
if qa_callback:
|
||||
analysis = qa_callback("Error: file not found")
|
||||
return f"""STDERR: Error: file not found
|
||||
|
||||
QA ANALYSIS:
|
||||
{analysis}"""
|
||||
return "Error: file not found"
|
||||
|
||||
mock_run.side_effect = run_side_effect
|
||||
mock_qa.return_value = "FIX: Check if path exists."
|
||||
|
||||
await engine.run_linear()
|
||||
|
||||
# Verify QA analysis was triggered
|
||||
mock_qa.assert_called_once_with("Error: file not found")
|
||||
|
||||
# Verify the 2nd send_message call includes the QA ANALYSIS in its payload (f_resps)
|
||||
# The first call is the user message, the second is the tool response.
|
||||
assert mock_chat.send_message.call_count == 2
|
||||
args, kwargs = mock_chat.send_message.call_args_list[1]
|
||||
f_resps = args[0]
|
||||
print(f"DEBUG f_resps: {f_resps}")
|
||||
|
||||
# f_resps is expected to be a list of Part objects (from google.genai.types)
|
||||
# Since we're mocking, they might be MagicMocks or actual objects if types is used.
|
||||
# In our case, ai_client.Part.from_function_response is used.
|
||||
|
||||
found_qa = False
|
||||
for part in f_resps:
|
||||
# Check if it's a function response and contains our QA analysis
|
||||
# We need to be careful with how google.genai.types.Part is structured or mocked
|
||||
part_str = str(part)
|
||||
print(f"DEBUG part_str: {part_str}")
|
||||
if "QA ANALYSIS:" in part_str and "FIX: Check if path exists." in part_str:
|
||||
found_qa = True
|
||||
|
||||
assert found_qa, "QA Analysis was not injected into the next round"
|
||||
return "Error: file not found"
|
||||
mock_run.side_effect = run_side_effect
|
||||
mock_qa.return_value = "FIX: Check if path exists."
|
||||
await engine.run_linear()
|
||||
# Verify QA analysis was triggered
|
||||
mock_qa.assert_called_once_with("Error: file not found")
|
||||
# Verify the 2nd send_message call includes the QA ANALYSIS in its payload (f_resps)
|
||||
# The first call is the user message, the second is the tool response.
|
||||
assert mock_chat.send_message.call_count == 2
|
||||
args, kwargs = mock_chat.send_message.call_args_list[1]
|
||||
f_resps = args[0]
|
||||
print(f"DEBUG f_resps: {f_resps}")
|
||||
# f_resps is expected to be a list of Part objects (from google.genai.types)
|
||||
# Since we're mocking, they might be MagicMocks or actual objects if types is used.
|
||||
# In our case, ai_client.Part.from_function_response is used.
|
||||
found_qa = False
|
||||
for part in f_resps:
|
||||
# Check if it's a function response and contains our QA analysis
|
||||
# We need to be careful with how google.genai.types.Part is structured or mocked
|
||||
part_str = str(part)
|
||||
print(f"DEBUG part_str: {part_str}")
|
||||
if "QA ANALYSIS:" in part_str and "FIX: Check if path exists." in part_str:
|
||||
found_qa = True
|
||||
assert found_qa, "QA Analysis was not injected into the next round"
|
||||
|
||||
Reference in New Issue
Block a user