chore(conductor): Mark track 'MMA Core Engine Implementation' as complete and verify with Phase 6 tests
This commit is contained in:
@@ -679,7 +679,8 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
|
||||
# Only stable content (files + screenshots) goes in the cached system instruction.
|
||||
# Discussion history is sent as conversation messages so the cache isn't invalidated every turn.
|
||||
sys_instr = f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"
|
||||
tools_decl = [_gemini_tool_declaration()]
|
||||
td = _gemini_tool_declaration()
|
||||
tools_decl = [td] if td else None
|
||||
|
||||
# DYNAMIC CONTEXT: Check if files/context changed mid-session
|
||||
current_md_hash = hashlib.md5(md_content.encode()).hexdigest()
|
||||
|
||||
@@ -45,4 +45,15 @@
|
||||
- [x] Task: The Dispatcher Loop (1dacd36)
|
||||
- [x] Read Tier 2 JSON flat-lists, construct Tickets, execute Stub resolution paths (1dacd36)
|
||||
- [x] Task: UI Component Update (68861c0)
|
||||
- [x] Refactor `gui_2.py` to push `UserRequestEvent` instead of blocking on API generation (68861c0)
|
||||
- [x] Refactor `gui_2.py` to push `UserRequestEvent` instead of blocking on API generation (68861c0)
|
||||
|
||||
## Phase 6: Live & Headless Verification
|
||||
- [x] Task: Headless Engine Verification
|
||||
- [x] Run a comprehensive headless test scenario (e.g., using a mock or dedicated test script).
|
||||
- [x] Verify Ticket execution, "Context Amnesia" (statelessness), and Tier 4 error interception.
|
||||
- [x] Task: Live GUI Integration Verification
|
||||
- [x] Launch `gui_2.py` and verify Event Bus responsiveness.
|
||||
- [x] Confirm UI updates and async event handling during multi-model generation.
|
||||
- [x] Task: Comprehensive Regression Suite
|
||||
- [x] Run all tests in `tests/` related to MMA, Conductor, and Async Events.
|
||||
- [x] Verify that no regressions were introduced in existing functionality.
|
||||
4
gui_2.py
4
gui_2.py
@@ -111,7 +111,7 @@ class ConfirmDialog:
|
||||
return self._approved, self._script
|
||||
|
||||
|
||||
class ManualSlopGUI:
|
||||
class App:
|
||||
"""The main ImGui interface orchestrator for Manual Slop."""
|
||||
|
||||
def __init__(self):
|
||||
@@ -2595,7 +2595,7 @@ class ManualSlopGUI:
|
||||
session_logger.close_session()
|
||||
|
||||
def main():
|
||||
app = ManualSlopGUI()
|
||||
app = App()
|
||||
app.run()
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -18,6 +18,7 @@ import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
from tkinter import filedialog, Tk
|
||||
from typing import Optional, Callable
|
||||
import aggregate
|
||||
import ai_client
|
||||
from ai_client import ProviderError
|
||||
|
||||
@@ -89,6 +89,9 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
|
||||
Simulates the lifecycle of a single agent working on a ticket.
|
||||
Calls the AI client and updates the ticket status based on the response.
|
||||
"""
|
||||
# Enforce Context Amnesia: each ticket starts with a clean slate.
|
||||
ai_client.reset_session()
|
||||
|
||||
context_injection = ""
|
||||
if context_files:
|
||||
parser = ASTParser(language="python")
|
||||
|
||||
@@ -69,6 +69,10 @@ def get_model_for_role(role: str) -> str:
|
||||
return 'gemini-3.1-pro-preview'
|
||||
elif role == 'tier2-tech-lead' or role == 'tier2':
|
||||
return 'gemini-3-flash-preview'
|
||||
elif role == 'tier3-worker' or role == 'tier3':
|
||||
return 'gemini-2.5-flash-lite'
|
||||
elif role == 'tier4-qa' or role == 'tier4':
|
||||
return 'gemini-2.5-flash-lite'
|
||||
else:
|
||||
return 'gemini-3-flash-preview'
|
||||
|
||||
|
||||
@@ -79,6 +79,7 @@ def live_gui():
|
||||
print(f"\n[Fixture] Finally block triggered: Shutting down {gui_script}...")
|
||||
# Reset the GUI state before shutting down
|
||||
try:
|
||||
client = ApiHookClient()
|
||||
client.reset_session()
|
||||
time.sleep(0.5)
|
||||
except: pass
|
||||
|
||||
@@ -1,24 +1,26 @@
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch, AsyncMock
|
||||
import asyncio
|
||||
from gui_2 import ManualSlopGUI
|
||||
from gui_2 import App
|
||||
from events import UserRequestEvent
|
||||
|
||||
@pytest.fixture
|
||||
def mock_gui():
|
||||
with patch('gui_2.load_config', return_value={
|
||||
"ai": {"provider": "gemini", "model": "model-1"},
|
||||
"projects": {"paths": [], "active": ""},
|
||||
"gui": {"show_windows": {}}
|
||||
}):
|
||||
with patch('gui_2.project_manager.load_project', return_value={}):
|
||||
with patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}):
|
||||
with patch('gui_2.project_manager.save_project'):
|
||||
with patch('gui_2.session_logger.open_session'):
|
||||
with patch('gui_2.ManualSlopGUI._init_ai_and_hooks'):
|
||||
with patch('gui_2.ManualSlopGUI._fetch_models'):
|
||||
gui = ManualSlopGUI()
|
||||
return gui
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={
|
||||
"ai": {"provider": "gemini", "model": "model-1"},
|
||||
"projects": {"paths": [], "active": ""},
|
||||
"gui": {"show_windows": {}}
|
||||
}),
|
||||
patch('gui_2.project_manager.load_project', return_value={}),
|
||||
patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
|
||||
patch('gui_2.project_manager.save_project'),
|
||||
patch('gui_2.session_logger.open_session'),
|
||||
patch('gui_2.App._init_ai_and_hooks'),
|
||||
patch('gui_2.App._fetch_models')
|
||||
):
|
||||
gui = App()
|
||||
return gui
|
||||
|
||||
def test_handle_generate_send_pushes_event(mock_gui):
|
||||
# Mock _do_generate to return sample data
|
||||
|
||||
136
tests/test_headless_verification.py
Normal file
136
tests/test_headless_verification.py
Normal file
@@ -0,0 +1,136 @@
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch, call
|
||||
from models import Ticket, Track, WorkerContext
|
||||
from multi_agent_conductor import ConductorEngine
|
||||
import ai_client
|
||||
import json
|
||||
|
||||
def test_headless_verification_full_run():
|
||||
"""
|
||||
1. Initialize a ConductorEngine with a Track containing multiple dependent Tickets.
|
||||
2. Simulate a full execution run using engine.run_linear().
|
||||
3. Mock ai_client.send to simulate successful tool calls and final responses.
|
||||
4. Specifically verify that 'Context Amnesia' is maintained.
|
||||
"""
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker1", depends_on=["T1"])
|
||||
track = Track(id="track_verify", description="Verification Track", tickets=[t1, t2])
|
||||
engine = ConductorEngine(track=track)
|
||||
|
||||
with patch("ai_client.send") as mock_send, \
|
||||
patch("ai_client.reset_session") as mock_reset:
|
||||
|
||||
# We need mock_send to return something that doesn't contain "BLOCKED"
|
||||
mock_send.return_value = "Task completed successfully."
|
||||
|
||||
engine.run_linear()
|
||||
|
||||
# Verify both tickets are completed
|
||||
assert t1.status == "completed"
|
||||
assert t2.status == "completed"
|
||||
|
||||
# Verify that ai_client.send was called twice (once for each ticket)
|
||||
assert mock_send.call_count == 2
|
||||
|
||||
# Verify Context Amnesia: reset_session should be called for each ticket
|
||||
# This confirms that each worker call starts with a clean slate.
|
||||
assert mock_reset.call_count == 2
|
||||
|
||||
def test_headless_verification_error_and_qa_interceptor():
|
||||
"""
|
||||
5. Simulate a shell error and verify that the Tier 4 QA interceptor is triggered
|
||||
and its summary is injected into the worker's history for the next retry.
|
||||
"""
|
||||
t1 = Ticket(id="T1", description="Task with error", status="todo", assigned_to="worker1")
|
||||
track = Track(id="track_error", description="Error Track", tickets=[t1])
|
||||
engine = ConductorEngine(track=track)
|
||||
|
||||
# We need to simulate the tool loop inside ai_client._send_gemini (or similar)
|
||||
# Since we want to test the real tool loop and QA injection, we mock at the provider level.
|
||||
|
||||
with patch("ai_client._provider", "gemini"), \
|
||||
patch("ai_client._gemini_client") as mock_genai_client, \
|
||||
patch("ai_client.confirm_and_run_callback") as mock_run, \
|
||||
patch("ai_client.run_tier4_analysis") as mock_qa, \
|
||||
patch("ai_client._ensure_gemini_client") as mock_ensure, \
|
||||
patch("ai_client._gemini_tool_declaration", return_value=None):
|
||||
|
||||
# Ensure _gemini_client is restored by the mock ensure function
|
||||
import ai_client
|
||||
def restore_client():
|
||||
ai_client._gemini_client = mock_genai_client
|
||||
mock_ensure.side_effect = restore_client
|
||||
ai_client._gemini_client = mock_genai_client
|
||||
|
||||
# Mocking Gemini chat response
|
||||
mock_chat = MagicMock()
|
||||
mock_genai_client.chats.create.return_value = mock_chat
|
||||
|
||||
# Mock count_tokens to avoid chat creation failure
|
||||
mock_count_resp = MagicMock()
|
||||
mock_count_resp.total_tokens = 100
|
||||
mock_genai_client.models.count_tokens.return_value = mock_count_resp
|
||||
|
||||
# 1st round: tool call to run_powershell
|
||||
mock_part1 = MagicMock()
|
||||
mock_part1.text = "I will run a command."
|
||||
mock_part1.function_call = MagicMock()
|
||||
mock_part1.function_call.name = "run_powershell"
|
||||
mock_part1.function_call.args = {"script": "dir"}
|
||||
|
||||
mock_resp1 = MagicMock()
|
||||
mock_resp1.candidates = [MagicMock(content=MagicMock(parts=[mock_part1]), finish_reason=MagicMock(name="STOP"))]
|
||||
mock_resp1.usage_metadata.prompt_token_count = 10
|
||||
mock_resp1.usage_metadata.candidates_token_count = 5
|
||||
|
||||
# 2nd round: Final text after tool result
|
||||
mock_part2 = MagicMock()
|
||||
mock_part2.text = "The command failed but I understand why. Task done."
|
||||
mock_part2.function_call = None
|
||||
|
||||
mock_resp2 = MagicMock()
|
||||
mock_resp2.candidates = [MagicMock(content=MagicMock(parts=[mock_part2]), finish_reason=MagicMock(name="STOP"))]
|
||||
mock_resp2.usage_metadata.prompt_token_count = 20
|
||||
mock_resp2.usage_metadata.candidates_token_count = 10
|
||||
|
||||
mock_chat.send_message.side_effect = [mock_resp1, mock_resp2]
|
||||
|
||||
# Mock run_powershell behavior: it should call the qa_callback on error
|
||||
def run_side_effect(script, base_dir, qa_callback):
|
||||
if qa_callback:
|
||||
analysis = qa_callback("Error: file not found")
|
||||
return f"""STDERR: Error: file not found
|
||||
|
||||
QA ANALYSIS:
|
||||
{analysis}"""
|
||||
return "Error: file not found"
|
||||
|
||||
mock_run.side_effect = run_side_effect
|
||||
mock_qa.return_value = "FIX: Check if path exists."
|
||||
|
||||
engine.run_linear()
|
||||
|
||||
# Verify QA analysis was triggered
|
||||
mock_qa.assert_called_once_with("Error: file not found")
|
||||
|
||||
# Verify the 2nd send_message call includes the QA ANALYSIS in its payload (f_resps)
|
||||
# The first call is the user message, the second is the tool response.
|
||||
assert mock_chat.send_message.call_count == 2
|
||||
args, kwargs = mock_chat.send_message.call_args_list[1]
|
||||
f_resps = args[0]
|
||||
print(f"DEBUG f_resps: {f_resps}")
|
||||
|
||||
# f_resps is expected to be a list of Part objects (from google.genai.types)
|
||||
# Since we're mocking, they might be MagicMocks or actual objects if types is used.
|
||||
# In our case, ai_client.Part.from_function_response is used.
|
||||
|
||||
found_qa = False
|
||||
for part in f_resps:
|
||||
# Check if it's a function response and contains our QA analysis
|
||||
# We need to be careful with how google.genai.types.Part is structured or mocked
|
||||
part_str = str(part)
|
||||
print(f"DEBUG part_str: {part_str}")
|
||||
if "QA ANALYSIS:" in part_str and "FIX: Check if path exists." in part_str:
|
||||
found_qa = True
|
||||
|
||||
assert found_qa, "QA Analysis was not injected into the next round"
|
||||
127
tests/test_live_gui_integration.py
Normal file
127
tests/test_live_gui_integration.py
Normal file
@@ -0,0 +1,127 @@
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch, AsyncMock
|
||||
import asyncio
|
||||
import time
|
||||
from gui_2 import App
|
||||
from events import UserRequestEvent
|
||||
import ai_client
|
||||
|
||||
@pytest.fixture
|
||||
def mock_app():
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={
|
||||
"ai": {"provider": "gemini", "model": "model-1", "temperature": 0.0, "max_tokens": 100, "history_trunc_limit": 1000},
|
||||
"projects": {"paths": [], "active": ""},
|
||||
"gui": {"show_windows": {}}
|
||||
}),
|
||||
patch('gui_2.project_manager.load_project', return_value={
|
||||
"project": {"name": "test_proj"},
|
||||
"discussion": {"active": "main", "discussions": {"main": {"history": []}}},
|
||||
"files": {"paths": [], "base_dir": "."},
|
||||
"screenshots": {"paths": [], "base_dir": "."},
|
||||
"agent": {"tools": {}}
|
||||
}),
|
||||
patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
|
||||
patch('gui_2.project_manager.save_project'),
|
||||
patch('gui_2.session_logger.open_session'),
|
||||
patch('gui_2.App._init_ai_and_hooks'),
|
||||
patch('gui_2.App._fetch_models')
|
||||
):
|
||||
app = App()
|
||||
yield app
|
||||
# We don't have a clean way to stop the loop thread in gui_2.py App
|
||||
# so we just let it daemon-exit.
|
||||
|
||||
@pytest.mark.timeout(10)
|
||||
def test_user_request_integration_flow(mock_app):
|
||||
"""
|
||||
Verifies that pushing a UserRequestEvent to the event_queue:
|
||||
1. Triggers ai_client.send
|
||||
2. Results in a 'response' event back to the queue
|
||||
3. Eventually updates the UI state (ai_response, ai_status) after processing GUI tasks.
|
||||
"""
|
||||
app = mock_app
|
||||
|
||||
# Mock all ai_client methods called during _handle_request_event
|
||||
mock_response = "This is a test AI response"
|
||||
with (
|
||||
patch('ai_client.send', return_value=mock_response) as mock_send,
|
||||
patch('ai_client.set_custom_system_prompt'),
|
||||
patch('ai_client.set_model_params'),
|
||||
patch('ai_client.set_agent_tools')
|
||||
):
|
||||
# 1. Create and push a UserRequestEvent
|
||||
event = UserRequestEvent(
|
||||
prompt="Hello AI",
|
||||
stable_md="Context",
|
||||
file_items=[],
|
||||
disc_text="History",
|
||||
base_dir="."
|
||||
)
|
||||
|
||||
# 2. Push event to the app's internal loop
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
app.event_queue.put("user_request", event),
|
||||
app._loop
|
||||
)
|
||||
|
||||
# 3. Wait for ai_client.send to be called (polling background thread)
|
||||
start_time = time.time()
|
||||
while not mock_send.called and time.time() - start_time < 5:
|
||||
time.sleep(0.1)
|
||||
|
||||
assert mock_send.called, "ai_client.send was not called within timeout"
|
||||
mock_send.assert_called_once_with("Context", "Hello AI", ".", [], "History")
|
||||
|
||||
# 4. Wait for the response to propagate to _pending_gui_tasks and update UI
|
||||
# We call _process_pending_gui_tasks manually to simulate a GUI frame update.
|
||||
start_time = time.time()
|
||||
success = False
|
||||
while time.time() - start_time < 3:
|
||||
app._process_pending_gui_tasks()
|
||||
if app.ai_response == mock_response and app.ai_status == "done":
|
||||
success = True
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
assert success, f"UI state was not updated. ai_response: '{app.ai_response}', status: '{app.ai_status}'"
|
||||
assert app.ai_response == mock_response
|
||||
assert app.ai_status == "done"
|
||||
|
||||
@pytest.mark.timeout(10)
|
||||
def test_user_request_error_handling(mock_app):
|
||||
"""
|
||||
Verifies that if ai_client.send raises an exception, the UI is updated with the error state.
|
||||
"""
|
||||
app = mock_app
|
||||
|
||||
with (
|
||||
patch('ai_client.send', side_effect=Exception("API Failure")) as mock_send,
|
||||
patch('ai_client.set_custom_system_prompt'),
|
||||
patch('ai_client.set_model_params'),
|
||||
patch('ai_client.set_agent_tools')
|
||||
):
|
||||
event = UserRequestEvent(
|
||||
prompt="Trigger Error",
|
||||
stable_md="",
|
||||
file_items=[],
|
||||
disc_text="",
|
||||
base_dir="."
|
||||
)
|
||||
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
app.event_queue.put("user_request", event),
|
||||
app._loop
|
||||
)
|
||||
|
||||
# Poll for error state by processing GUI tasks
|
||||
start_time = time.time()
|
||||
success = False
|
||||
while time.time() - start_time < 5:
|
||||
app._process_pending_gui_tasks()
|
||||
if app.ai_status == "error" and "ERROR: API Failure" in app.ai_response:
|
||||
success = True
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
assert success, f"Error state was not reflected in UI. status: {app.ai_status}, response: {app.ai_response}"
|
||||
@@ -24,26 +24,26 @@ from gui_2 import App
|
||||
@pytest.fixture
|
||||
def mock_config(tmp_path):
|
||||
config_path = tmp_path / "config.toml"
|
||||
config_path.write_text("[projects]
|
||||
config_path.write_text("""[projects]
|
||||
paths = []
|
||||
active = ""
|
||||
[ai]
|
||||
provider = "gemini"
|
||||
model = "model"
|
||||
", encoding="utf-8")
|
||||
""", encoding="utf-8")
|
||||
return config_path
|
||||
|
||||
@pytest.fixture
|
||||
def mock_project(tmp_path):
|
||||
project_path = tmp_path / "project.toml"
|
||||
project_path.write_text("[project]
|
||||
project_path.write_text("""[project]
|
||||
name = "test"
|
||||
[discussion]
|
||||
roles = ["User", "AI"]
|
||||
active = "main"
|
||||
[discussion.discussions.main]
|
||||
history = []
|
||||
", encoding="utf-8")
|
||||
""", encoding="utf-8")
|
||||
return project_path
|
||||
|
||||
def test_log_management_init(mock_config, mock_project, monkeypatch):
|
||||
|
||||
@@ -27,7 +27,7 @@ def test_redundant_calls_in_process_pending_gui_tasks(app_instance):
|
||||
{'action': 'set_value', 'item': 'current_provider', 'value': 'anthropic'}
|
||||
]
|
||||
|
||||
with patch('ai_client.set_provider') as mock_set_provider,
|
||||
with patch('ai_client.set_provider') as mock_set_provider, \
|
||||
patch('ai_client.reset_session') as mock_reset_session:
|
||||
|
||||
# We need to make sure the property setter's internal calls are also tracked or mocked.
|
||||
|
||||
Reference in New Issue
Block a user