chore(conductor): Mark track 'MMA Core Engine Implementation' as complete and verify with Phase 6 tests

This commit is contained in:
2026-02-26 21:34:28 -05:00
parent 971202e21b
commit 8bb72e351d
12 changed files with 309 additions and 23 deletions

View File

@@ -679,7 +679,8 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
# Only stable content (files + screenshots) goes in the cached system instruction. # Only stable content (files + screenshots) goes in the cached system instruction.
# Discussion history is sent as conversation messages so the cache isn't invalidated every turn. # Discussion history is sent as conversation messages so the cache isn't invalidated every turn.
sys_instr = f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>" sys_instr = f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"
tools_decl = [_gemini_tool_declaration()] td = _gemini_tool_declaration()
tools_decl = [td] if td else None
# DYNAMIC CONTEXT: Check if files/context changed mid-session # DYNAMIC CONTEXT: Check if files/context changed mid-session
current_md_hash = hashlib.md5(md_content.encode()).hexdigest() current_md_hash = hashlib.md5(md_content.encode()).hexdigest()

View File

@@ -46,3 +46,14 @@
- [x] Read Tier 2 JSON flat-lists, construct Tickets, execute Stub resolution paths (1dacd36) - [x] Read Tier 2 JSON flat-lists, construct Tickets, execute Stub resolution paths (1dacd36)
- [x] Task: UI Component Update (68861c0) - [x] Task: UI Component Update (68861c0)
- [x] Refactor `gui_2.py` to push `UserRequestEvent` instead of blocking on API generation (68861c0) - [x] Refactor `gui_2.py` to push `UserRequestEvent` instead of blocking on API generation (68861c0)
## Phase 6: Live & Headless Verification
- [x] Task: Headless Engine Verification
- [x] Run a comprehensive headless test scenario (e.g., using a mock or dedicated test script).
- [x] Verify Ticket execution, "Context Amnesia" (statelessness), and Tier 4 error interception.
- [x] Task: Live GUI Integration Verification
- [x] Launch `gui_2.py` and verify Event Bus responsiveness.
- [x] Confirm UI updates and async event handling during multi-model generation.
- [x] Task: Comprehensive Regression Suite
- [x] Run all tests in `tests/` related to MMA, Conductor, and Async Events.
- [x] Verify that no regressions were introduced in existing functionality.

View File

@@ -111,7 +111,7 @@ class ConfirmDialog:
return self._approved, self._script return self._approved, self._script
class ManualSlopGUI: class App:
"""The main ImGui interface orchestrator for Manual Slop.""" """The main ImGui interface orchestrator for Manual Slop."""
def __init__(self): def __init__(self):
@@ -2595,7 +2595,7 @@ class ManualSlopGUI:
session_logger.close_session() session_logger.close_session()
def main(): def main():
app = ManualSlopGUI() app = App()
app.run() app.run()
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -18,6 +18,7 @@ import sys
import os import os
from pathlib import Path from pathlib import Path
from tkinter import filedialog, Tk from tkinter import filedialog, Tk
from typing import Optional, Callable
import aggregate import aggregate
import ai_client import ai_client
from ai_client import ProviderError from ai_client import ProviderError

View File

@@ -89,6 +89,9 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
Simulates the lifecycle of a single agent working on a ticket. Simulates the lifecycle of a single agent working on a ticket.
Calls the AI client and updates the ticket status based on the response. Calls the AI client and updates the ticket status based on the response.
""" """
# Enforce Context Amnesia: each ticket starts with a clean slate.
ai_client.reset_session()
context_injection = "" context_injection = ""
if context_files: if context_files:
parser = ASTParser(language="python") parser = ASTParser(language="python")

View File

@@ -69,6 +69,10 @@ def get_model_for_role(role: str) -> str:
return 'gemini-3.1-pro-preview' return 'gemini-3.1-pro-preview'
elif role == 'tier2-tech-lead' or role == 'tier2': elif role == 'tier2-tech-lead' or role == 'tier2':
return 'gemini-3-flash-preview' return 'gemini-3-flash-preview'
elif role == 'tier3-worker' or role == 'tier3':
return 'gemini-2.5-flash-lite'
elif role == 'tier4-qa' or role == 'tier4':
return 'gemini-2.5-flash-lite'
else: else:
return 'gemini-3-flash-preview' return 'gemini-3-flash-preview'

View File

@@ -79,6 +79,7 @@ def live_gui():
print(f"\n[Fixture] Finally block triggered: Shutting down {gui_script}...") print(f"\n[Fixture] Finally block triggered: Shutting down {gui_script}...")
# Reset the GUI state before shutting down # Reset the GUI state before shutting down
try: try:
client = ApiHookClient()
client.reset_session() client.reset_session()
time.sleep(0.5) time.sleep(0.5)
except: pass except: pass

View File

@@ -1,23 +1,25 @@
import pytest import pytest
from unittest.mock import MagicMock, patch, AsyncMock from unittest.mock import MagicMock, patch, AsyncMock
import asyncio import asyncio
from gui_2 import ManualSlopGUI from gui_2 import App
from events import UserRequestEvent from events import UserRequestEvent
@pytest.fixture @pytest.fixture
def mock_gui(): def mock_gui():
with patch('gui_2.load_config', return_value={ with (
patch('gui_2.load_config', return_value={
"ai": {"provider": "gemini", "model": "model-1"}, "ai": {"provider": "gemini", "model": "model-1"},
"projects": {"paths": [], "active": ""}, "projects": {"paths": [], "active": ""},
"gui": {"show_windows": {}} "gui": {"show_windows": {}}
}): }),
with patch('gui_2.project_manager.load_project', return_value={}): patch('gui_2.project_manager.load_project', return_value={}),
with patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}): patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
with patch('gui_2.project_manager.save_project'): patch('gui_2.project_manager.save_project'),
with patch('gui_2.session_logger.open_session'): patch('gui_2.session_logger.open_session'),
with patch('gui_2.ManualSlopGUI._init_ai_and_hooks'): patch('gui_2.App._init_ai_and_hooks'),
with patch('gui_2.ManualSlopGUI._fetch_models'): patch('gui_2.App._fetch_models')
gui = ManualSlopGUI() ):
gui = App()
return gui return gui
def test_handle_generate_send_pushes_event(mock_gui): def test_handle_generate_send_pushes_event(mock_gui):

View File

@@ -0,0 +1,136 @@
import pytest
from unittest.mock import MagicMock, patch, call
from models import Ticket, Track, WorkerContext
from multi_agent_conductor import ConductorEngine
import ai_client
import json
def test_headless_verification_full_run():
"""
1. Initialize a ConductorEngine with a Track containing multiple dependent Tickets.
2. Simulate a full execution run using engine.run_linear().
3. Mock ai_client.send to simulate successful tool calls and final responses.
4. Specifically verify that 'Context Amnesia' is maintained.
"""
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker1", depends_on=["T1"])
track = Track(id="track_verify", description="Verification Track", tickets=[t1, t2])
engine = ConductorEngine(track=track)
with patch("ai_client.send") as mock_send, \
patch("ai_client.reset_session") as mock_reset:
# We need mock_send to return something that doesn't contain "BLOCKED"
mock_send.return_value = "Task completed successfully."
engine.run_linear()
# Verify both tickets are completed
assert t1.status == "completed"
assert t2.status == "completed"
# Verify that ai_client.send was called twice (once for each ticket)
assert mock_send.call_count == 2
# Verify Context Amnesia: reset_session should be called for each ticket
# This confirms that each worker call starts with a clean slate.
assert mock_reset.call_count == 2
def test_headless_verification_error_and_qa_interceptor():
"""
5. Simulate a shell error and verify that the Tier 4 QA interceptor is triggered
and its summary is injected into the worker's history for the next retry.
"""
t1 = Ticket(id="T1", description="Task with error", status="todo", assigned_to="worker1")
track = Track(id="track_error", description="Error Track", tickets=[t1])
engine = ConductorEngine(track=track)
# We need to simulate the tool loop inside ai_client._send_gemini (or similar)
# Since we want to test the real tool loop and QA injection, we mock at the provider level.
with patch("ai_client._provider", "gemini"), \
patch("ai_client._gemini_client") as mock_genai_client, \
patch("ai_client.confirm_and_run_callback") as mock_run, \
patch("ai_client.run_tier4_analysis") as mock_qa, \
patch("ai_client._ensure_gemini_client") as mock_ensure, \
patch("ai_client._gemini_tool_declaration", return_value=None):
# Ensure _gemini_client is restored by the mock ensure function
import ai_client
def restore_client():
ai_client._gemini_client = mock_genai_client
mock_ensure.side_effect = restore_client
ai_client._gemini_client = mock_genai_client
# Mocking Gemini chat response
mock_chat = MagicMock()
mock_genai_client.chats.create.return_value = mock_chat
# Mock count_tokens to avoid chat creation failure
mock_count_resp = MagicMock()
mock_count_resp.total_tokens = 100
mock_genai_client.models.count_tokens.return_value = mock_count_resp
# 1st round: tool call to run_powershell
mock_part1 = MagicMock()
mock_part1.text = "I will run a command."
mock_part1.function_call = MagicMock()
mock_part1.function_call.name = "run_powershell"
mock_part1.function_call.args = {"script": "dir"}
mock_resp1 = MagicMock()
mock_resp1.candidates = [MagicMock(content=MagicMock(parts=[mock_part1]), finish_reason=MagicMock(name="STOP"))]
mock_resp1.usage_metadata.prompt_token_count = 10
mock_resp1.usage_metadata.candidates_token_count = 5
# 2nd round: Final text after tool result
mock_part2 = MagicMock()
mock_part2.text = "The command failed but I understand why. Task done."
mock_part2.function_call = None
mock_resp2 = MagicMock()
mock_resp2.candidates = [MagicMock(content=MagicMock(parts=[mock_part2]), finish_reason=MagicMock(name="STOP"))]
mock_resp2.usage_metadata.prompt_token_count = 20
mock_resp2.usage_metadata.candidates_token_count = 10
mock_chat.send_message.side_effect = [mock_resp1, mock_resp2]
# Mock run_powershell behavior: it should call the qa_callback on error
def run_side_effect(script, base_dir, qa_callback):
if qa_callback:
analysis = qa_callback("Error: file not found")
return f"""STDERR: Error: file not found
QA ANALYSIS:
{analysis}"""
return "Error: file not found"
mock_run.side_effect = run_side_effect
mock_qa.return_value = "FIX: Check if path exists."
engine.run_linear()
# Verify QA analysis was triggered
mock_qa.assert_called_once_with("Error: file not found")
# Verify the 2nd send_message call includes the QA ANALYSIS in its payload (f_resps)
# The first call is the user message, the second is the tool response.
assert mock_chat.send_message.call_count == 2
args, kwargs = mock_chat.send_message.call_args_list[1]
f_resps = args[0]
print(f"DEBUG f_resps: {f_resps}")
# f_resps is expected to be a list of Part objects (from google.genai.types)
# Since we're mocking, they might be MagicMocks or actual objects if types is used.
# In our case, ai_client.Part.from_function_response is used.
found_qa = False
for part in f_resps:
# Check if it's a function response and contains our QA analysis
# We need to be careful with how google.genai.types.Part is structured or mocked
part_str = str(part)
print(f"DEBUG part_str: {part_str}")
if "QA ANALYSIS:" in part_str and "FIX: Check if path exists." in part_str:
found_qa = True
assert found_qa, "QA Analysis was not injected into the next round"

View File

@@ -0,0 +1,127 @@
import pytest
from unittest.mock import MagicMock, patch, AsyncMock
import asyncio
import time
from gui_2 import App
from events import UserRequestEvent
import ai_client
@pytest.fixture
def mock_app():
with (
patch('gui_2.load_config', return_value={
"ai": {"provider": "gemini", "model": "model-1", "temperature": 0.0, "max_tokens": 100, "history_trunc_limit": 1000},
"projects": {"paths": [], "active": ""},
"gui": {"show_windows": {}}
}),
patch('gui_2.project_manager.load_project', return_value={
"project": {"name": "test_proj"},
"discussion": {"active": "main", "discussions": {"main": {"history": []}}},
"files": {"paths": [], "base_dir": "."},
"screenshots": {"paths": [], "base_dir": "."},
"agent": {"tools": {}}
}),
patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
patch('gui_2.project_manager.save_project'),
patch('gui_2.session_logger.open_session'),
patch('gui_2.App._init_ai_and_hooks'),
patch('gui_2.App._fetch_models')
):
app = App()
yield app
# We don't have a clean way to stop the loop thread in gui_2.py App
# so we just let it daemon-exit.
@pytest.mark.timeout(10)
def test_user_request_integration_flow(mock_app):
"""
Verifies that pushing a UserRequestEvent to the event_queue:
1. Triggers ai_client.send
2. Results in a 'response' event back to the queue
3. Eventually updates the UI state (ai_response, ai_status) after processing GUI tasks.
"""
app = mock_app
# Mock all ai_client methods called during _handle_request_event
mock_response = "This is a test AI response"
with (
patch('ai_client.send', return_value=mock_response) as mock_send,
patch('ai_client.set_custom_system_prompt'),
patch('ai_client.set_model_params'),
patch('ai_client.set_agent_tools')
):
# 1. Create and push a UserRequestEvent
event = UserRequestEvent(
prompt="Hello AI",
stable_md="Context",
file_items=[],
disc_text="History",
base_dir="."
)
# 2. Push event to the app's internal loop
asyncio.run_coroutine_threadsafe(
app.event_queue.put("user_request", event),
app._loop
)
# 3. Wait for ai_client.send to be called (polling background thread)
start_time = time.time()
while not mock_send.called and time.time() - start_time < 5:
time.sleep(0.1)
assert mock_send.called, "ai_client.send was not called within timeout"
mock_send.assert_called_once_with("Context", "Hello AI", ".", [], "History")
# 4. Wait for the response to propagate to _pending_gui_tasks and update UI
# We call _process_pending_gui_tasks manually to simulate a GUI frame update.
start_time = time.time()
success = False
while time.time() - start_time < 3:
app._process_pending_gui_tasks()
if app.ai_response == mock_response and app.ai_status == "done":
success = True
break
time.sleep(0.1)
assert success, f"UI state was not updated. ai_response: '{app.ai_response}', status: '{app.ai_status}'"
assert app.ai_response == mock_response
assert app.ai_status == "done"
@pytest.mark.timeout(10)
def test_user_request_error_handling(mock_app):
"""
Verifies that if ai_client.send raises an exception, the UI is updated with the error state.
"""
app = mock_app
with (
patch('ai_client.send', side_effect=Exception("API Failure")) as mock_send,
patch('ai_client.set_custom_system_prompt'),
patch('ai_client.set_model_params'),
patch('ai_client.set_agent_tools')
):
event = UserRequestEvent(
prompt="Trigger Error",
stable_md="",
file_items=[],
disc_text="",
base_dir="."
)
asyncio.run_coroutine_threadsafe(
app.event_queue.put("user_request", event),
app._loop
)
# Poll for error state by processing GUI tasks
start_time = time.time()
success = False
while time.time() - start_time < 5:
app._process_pending_gui_tasks()
if app.ai_status == "error" and "ERROR: API Failure" in app.ai_response:
success = True
break
time.sleep(0.1)
assert success, f"Error state was not reflected in UI. status: {app.ai_status}, response: {app.ai_response}"

View File

@@ -24,26 +24,26 @@ from gui_2 import App
@pytest.fixture @pytest.fixture
def mock_config(tmp_path): def mock_config(tmp_path):
config_path = tmp_path / "config.toml" config_path = tmp_path / "config.toml"
config_path.write_text("[projects] config_path.write_text("""[projects]
paths = [] paths = []
active = "" active = ""
[ai] [ai]
provider = "gemini" provider = "gemini"
model = "model" model = "model"
", encoding="utf-8") """, encoding="utf-8")
return config_path return config_path
@pytest.fixture @pytest.fixture
def mock_project(tmp_path): def mock_project(tmp_path):
project_path = tmp_path / "project.toml" project_path = tmp_path / "project.toml"
project_path.write_text("[project] project_path.write_text("""[project]
name = "test" name = "test"
[discussion] [discussion]
roles = ["User", "AI"] roles = ["User", "AI"]
active = "main" active = "main"
[discussion.discussions.main] [discussion.discussions.main]
history = [] history = []
", encoding="utf-8") """, encoding="utf-8")
return project_path return project_path
def test_log_management_init(mock_config, mock_project, monkeypatch): def test_log_management_init(mock_config, mock_project, monkeypatch):

View File

@@ -27,7 +27,7 @@ def test_redundant_calls_in_process_pending_gui_tasks(app_instance):
{'action': 'set_value', 'item': 'current_provider', 'value': 'anthropic'} {'action': 'set_value', 'item': 'current_provider', 'value': 'anthropic'}
] ]
with patch('ai_client.set_provider') as mock_set_provider, with patch('ai_client.set_provider') as mock_set_provider, \
patch('ai_client.reset_session') as mock_reset_session: patch('ai_client.reset_session') as mock_reset_session:
# We need to make sure the property setter's internal calls are also tracked or mocked. # We need to make sure the property setter's internal calls are also tracked or mocked.