Compare commits
12 Commits
d087a20f7b
...
ef7040c3fd
| Author | SHA1 | Date | |
|---|---|---|---|
| ef7040c3fd | |||
| 0dedcc1773 | |||
| b5b89f2f1b | |||
| 6e0948467f | |||
| 41ae3df75d | |||
| cca9ef9307 | |||
| f0f285bc26 | |||
| d10a663111 | |||
| b3d972d19d | |||
| 7a614cbe8c | |||
| 3b2d82ed0d | |||
| 8438f69197 |
@@ -1,6 +1,9 @@
|
|||||||
{
|
{
|
||||||
"tools": {
|
"tools": {
|
||||||
"discoveryCommand": "python C:/projects/manual_slop/scripts/tool_discovery.py"
|
"discoveryCommand": "python C:/projects/manual_slop/scripts/tool_discovery.py",
|
||||||
|
"whitelist": [
|
||||||
|
"*"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"hooks": {
|
"hooks": {
|
||||||
"BeforeTool": [
|
"BeforeTool": [
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ from google.genai import types
|
|||||||
from events import EventEmitter
|
from events import EventEmitter
|
||||||
|
|
||||||
_provider: str = "gemini"
|
_provider: str = "gemini"
|
||||||
_model: str = "gemini-2.5-flash"
|
_model: str = "gemini-2.5-flash-lite"
|
||||||
_temperature: float = 0.0
|
_temperature: float = 0.0
|
||||||
_max_tokens: int = 8192
|
_max_tokens: int = 8192
|
||||||
|
|
||||||
|
|||||||
@@ -27,16 +27,16 @@
|
|||||||
- [x] Persist the active epic, tracks, and tickets to `manual_slop.toml`.
|
- [x] Persist the active epic, tracks, and tickets to `manual_slop.toml`.
|
||||||
|
|
||||||
## Phase 4: Product Alignment & Refinement
|
## Phase 4: Product Alignment & Refinement
|
||||||
- [~] Task: UX Differentiator Audit
|
- [x] Task: UX Differentiator Audit
|
||||||
- [ ] Ensure the UX prioritizes "Expert Oversight" over "Full Autonomy" (Manual Slop vs. Gemini CLI).
|
- [x] Ensure the UX prioritizes "Expert Oversight" over "Full Autonomy" (Manual Slop vs. Gemini CLI).
|
||||||
- [ ] Add detailed token metrics and Tier-specific latency indicators to the Dashboard.
|
- [x] Add detailed token metrics and Tier-specific latency indicators to the Dashboard.
|
||||||
|
|
||||||
## Phase 5: Exhaustive Testing & Regression
|
## Phase 5: Exhaustive Testing & Regression
|
||||||
- [ ] Task: Headless Engine Verification
|
- [x] Task: Headless Engine Verification (d087a20)
|
||||||
- [ ] Create `tests/test_orchestration_logic.py` to verify Tier 1 -> Tier 2 -> Tier 3 flow without a GUI.
|
- [x] Create `tests/test_orchestration_logic.py` to verify Tier 1 -> Tier 2 -> Tier 3 flow without a GUI.
|
||||||
- [ ] Verify DAG resolution and error handling (e.g., blocked tickets).
|
- [x] Verify DAG resolution and error handling (e.g., blocked tickets).
|
||||||
- [ ] Task: Visual Verification Suite
|
- [x] Task: Visual Verification Suite (d087a20)
|
||||||
- [ ] Create `tests/visual_orchestration_verification.py` using `ApiHookClient`.
|
- [x] Create `tests/visual_orchestration_verification.py` using `ApiHookClient`.
|
||||||
- [ ] Simulate a full "Epic" lifecycle: User Prompt -> Track Review -> Ticket Generation -> Execution.
|
- [x] Simulate a full "Epic" lifecycle: User Prompt -> Track Review -> Ticket Generation -> Execution.
|
||||||
- [ ] Task: Core Regression Suite
|
- [x] Task: Core Regression Suite (d087a20)
|
||||||
- [ ] Run all existing MMA, Conductor, and GUI tests to ensure no regressions.
|
- [x] Run all existing MMA, Conductor, and GUI tests to ensure no regressions.
|
||||||
@@ -12,9 +12,9 @@ To serve as an expert-level utility for personal developer use on small projects
|
|||||||
- **Multi-Provider Integration:** Supports Gemini, Anthropic, and DeepSeek with seamless switching.
|
- **Multi-Provider Integration:** Supports Gemini, Anthropic, and DeepSeek with seamless switching.
|
||||||
- **4-Tier Hierarchical Multi-Model Architecture:** Orchestrates an intelligent cascade of specialized models to isolate cognitive loads and minimize token burn.
|
- **4-Tier Hierarchical Multi-Model Architecture:** Orchestrates an intelligent cascade of specialized models to isolate cognitive loads and minimize token burn.
|
||||||
- **Tier 1 (Orchestrator):** Strategic product alignment, setup (`/conductor:setup`), and track initialization (`/conductor:newTrack`) using `gemini-3.1-pro-preview`.
|
- **Tier 1 (Orchestrator):** Strategic product alignment, setup (`/conductor:setup`), and track initialization (`/conductor:newTrack`) using `gemini-3.1-pro-preview`.
|
||||||
- **Tier 2 (Tech Lead):** Technical oversight and track execution (`/conductor:implement`) using `gemini-3-flash-preview`. Maintains persistent context throughout implementation.
|
- **Tier 2 (Tech Lead):** Technical oversight and track execution (`/conductor:implement`) using `gemini-2.5-flash`. Maintains persistent context throughout implementation.
|
||||||
- **Tier 3 (Worker):** Surgical code implementation and TDD using `gemini-2.5-flash-lite` or `deepseek-v3`. Operates statelessly with tool access and dependency skeletons.
|
- **Tier 3 (Worker):** Surgical code implementation and TDD using `gemini-2.5-flash` or `deepseek-v3`. Operates statelessly with tool access and dependency skeletons.
|
||||||
- **Tier 4 (QA):** Error analysis and diagnostics using `gemini-2.5-flash-lite` or `deepseek-v3`. Operates statelessly with tool access.
|
- **Tier 4 (QA):** Error analysis and diagnostics using `gemini-2.5-flash` or `deepseek-v3`. Operates statelessly with tool access.
|
||||||
- **MMA Delegation Engine:** Utilizes the `mma-exec` CLI and `mma.ps1` helper to route tasks, ensuring role-scoped context and detailed observability via timestamped sub-agent logs. Supports dynamic ticket creation and dependency resolution via an automated Dispatcher Loop.
|
- **MMA Delegation Engine:** Utilizes the `mma-exec` CLI and `mma.ps1` helper to route tasks, ensuring role-scoped context and detailed observability via timestamped sub-agent logs. Supports dynamic ticket creation and dependency resolution via an automated Dispatcher Loop.
|
||||||
- **Role-Scoped Documentation:** Automated mapping of foundational documents to specific tiers to prevent token bloat and maintain high-signal context.
|
- **Role-Scoped Documentation:** Automated mapping of foundational documents to specific tiers to prevent token bloat and maintain high-signal context.
|
||||||
- **Strict Memory Siloing:** Employs tree-sitter AST-based interface extraction (Skeleton View, Curated View) and "Context Amnesia" to provide workers only with the absolute minimum context required, preventing hallucination loops.
|
- **Strict Memory Siloing:** Employs tree-sitter AST-based interface extraction (Skeleton View, Curated View) and "Context Amnesia" to provide workers only with the absolute minimum context required, preventing hallucination loops.
|
||||||
|
|||||||
@@ -21,8 +21,7 @@
|
|||||||
- **DeepSeek (Dedicated SDK):** Integrated for high-performance codegen and reasoning (Phase 2).
|
- **DeepSeek (Dedicated SDK):** Integrated for high-performance codegen and reasoning (Phase 2).
|
||||||
- **Gemini CLI:** Integrated as a headless backend provider, utilizing a custom subprocess adapter and bridge script for tool execution control. Achieves full functional parity with direct SDK usage, including real-time token counting and detailed subprocess observability.
|
- **Gemini CLI:** Integrated as a headless backend provider, utilizing a custom subprocess adapter and bridge script for tool execution control. Achieves full functional parity with direct SDK usage, including real-time token counting and detailed subprocess observability.
|
||||||
- **Gemini 3.1 Pro Preview:** Tier 1 Orchestrator model for complex reasoning.
|
- **Gemini 3.1 Pro Preview:** Tier 1 Orchestrator model for complex reasoning.
|
||||||
- **Gemini 3 Flash Preview:** Tier 2 Tech Lead model for rapid architectural planning.
|
- **Gemini 2.5 Flash:** High-performance, low-latency model for Tier 2 Tech Lead, Tier 3 Workers, and Tier 4 QA.
|
||||||
- **Gemini 2.5 Flash Lite:** High-performance, low-latency model for Tier 3 Workers and Tier 4 QA.
|
|
||||||
- **DeepSeek-V3:** Tier 3 Worker model optimized for code implementation.
|
- **DeepSeek-V3:** Tier 3 Worker model optimized for code implementation.
|
||||||
- **DeepSeek-R1:** Specialized reasoning model for complex logical chains and "thinking" traces.
|
- **DeepSeek-R1:** Specialized reasoning model for complex logical chains and "thinking" traces.
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ def test_ai_client_send_gemini_cli():
|
|||||||
test_response = "This is a dummy response from the Gemini CLI."
|
test_response = "This is a dummy response from the Gemini CLI."
|
||||||
|
|
||||||
# Set provider to gemini_cli
|
# Set provider to gemini_cli
|
||||||
ai_client.set_provider("gemini_cli", "gemini-2.0-flash")
|
ai_client.set_provider("gemini_cli", "gemini-2.5-flash-lite")
|
||||||
|
|
||||||
# 1. Mock 'ai_client.GeminiCliAdapter' (which we will add)
|
# 1. Mock 'ai_client.GeminiCliAdapter' (which we will add)
|
||||||
with patch('ai_client.GeminiCliAdapter') as MockAdapterClass:
|
with patch('ai_client.GeminiCliAdapter') as MockAdapterClass:
|
||||||
@@ -19,12 +19,12 @@ class TestGeminiCliAdapter(unittest.TestCase):
|
|||||||
def test_send_starts_subprocess_with_correct_args(self, mock_popen):
|
def test_send_starts_subprocess_with_correct_args(self, mock_popen):
|
||||||
"""
|
"""
|
||||||
Verify that send(message) correctly starts the subprocess with
|
Verify that send(message) correctly starts the subprocess with
|
||||||
--output-format stream-json and the provided message via stdin.
|
--output-format stream-json and the provided message via stdin using communicate.
|
||||||
"""
|
"""
|
||||||
# Setup mock process with a minimal valid JSONL termination
|
# Setup mock process with a minimal valid JSONL termination
|
||||||
process_mock = MagicMock()
|
process_mock = MagicMock()
|
||||||
process_mock.stdout = io.StringIO(json.dumps({"type": "result", "usage": {}}) + "\n")
|
stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
|
||||||
process_mock.stdin = MagicMock()
|
process_mock.communicate.return_value = (stdout_content, "")
|
||||||
process_mock.poll.return_value = 0
|
process_mock.poll.return_value = 0
|
||||||
process_mock.wait.return_value = 0
|
process_mock.wait.return_value = 0
|
||||||
mock_popen.return_value = process_mock
|
mock_popen.return_value = process_mock
|
||||||
@@ -44,9 +44,8 @@ class TestGeminiCliAdapter(unittest.TestCase):
|
|||||||
# Message should NOT be in cmd now
|
# Message should NOT be in cmd now
|
||||||
self.assertNotIn(message, cmd)
|
self.assertNotIn(message, cmd)
|
||||||
|
|
||||||
# Verify message was written to stdin
|
# Verify message was sent via communicate
|
||||||
process_mock.stdin.write.assert_called_once_with(message)
|
process_mock.communicate.assert_called_once_with(input=message)
|
||||||
process_mock.stdin.close.assert_called_once()
|
|
||||||
|
|
||||||
# Check process configuration
|
# Check process configuration
|
||||||
self.assertEqual(kwargs.get('stdout'), subprocess.PIPE)
|
self.assertEqual(kwargs.get('stdout'), subprocess.PIPE)
|
||||||
@@ -60,16 +59,15 @@ class TestGeminiCliAdapter(unittest.TestCase):
|
|||||||
and returns the combined text.
|
and returns the combined text.
|
||||||
"""
|
"""
|
||||||
jsonl_output = [
|
jsonl_output = [
|
||||||
json.dumps({"type": "message", "text": "The quick brown "}),
|
json.dumps({"type": "message", "role": "model", "text": "The quick brown "}),
|
||||||
json.dumps({"type": "message", "text": "fox jumps."}),
|
json.dumps({"type": "message", "role": "model", "text": "fox jumps."}),
|
||||||
json.dumps({"type": "result", "usage": {"prompt_tokens": 5, "candidates_tokens": 5}})
|
json.dumps({"type": "result", "usage": {"prompt_tokens": 5, "candidates_tokens": 5}})
|
||||||
]
|
]
|
||||||
stdout_content = "\n".join(jsonl_output) + "\n"
|
stdout_content = "\n".join(jsonl_output) + "\n"
|
||||||
|
|
||||||
process_mock = MagicMock()
|
process_mock = MagicMock()
|
||||||
process_mock.stdout = io.StringIO(stdout_content)
|
process_mock.communicate.return_value = (stdout_content, "")
|
||||||
# Mock poll sequence: running, running, finished
|
process_mock.poll.return_value = 0
|
||||||
process_mock.poll.side_effect = [None, None, 0]
|
|
||||||
process_mock.wait.return_value = 0
|
process_mock.wait.return_value = 0
|
||||||
mock_popen.return_value = process_mock
|
mock_popen.return_value = process_mock
|
||||||
|
|
||||||
@@ -85,16 +83,16 @@ class TestGeminiCliAdapter(unittest.TestCase):
|
|||||||
by continuing to read until the final 'result' event.
|
by continuing to read until the final 'result' event.
|
||||||
"""
|
"""
|
||||||
jsonl_output = [
|
jsonl_output = [
|
||||||
json.dumps({"type": "message", "text": "Calling tool..."}),
|
json.dumps({"type": "message", "role": "assistant", "text": "Calling tool..."}),
|
||||||
json.dumps({"type": "tool_use", "name": "read_file", "args": {"path": "test.txt"}}),
|
json.dumps({"type": "tool_use", "name": "read_file", "args": {"path": "test.txt"}}),
|
||||||
json.dumps({"type": "message", "text": "\nFile read successfully."}),
|
json.dumps({"type": "message", "role": "assistant", "text": "\nFile read successfully."}),
|
||||||
json.dumps({"type": "result", "usage": {}})
|
json.dumps({"type": "result", "usage": {}})
|
||||||
]
|
]
|
||||||
stdout_content = "\n".join(jsonl_output) + "\n"
|
stdout_content = "\n".join(jsonl_output) + "\n"
|
||||||
|
|
||||||
process_mock = MagicMock()
|
process_mock = MagicMock()
|
||||||
process_mock.stdout = io.StringIO(stdout_content)
|
process_mock.communicate.return_value = (stdout_content, "")
|
||||||
process_mock.poll.side_effect = [None, None, None, 0]
|
process_mock.poll.return_value = 0
|
||||||
process_mock.wait.return_value = 0
|
process_mock.wait.return_value = 0
|
||||||
mock_popen.return_value = process_mock
|
mock_popen.return_value = process_mock
|
||||||
|
|
||||||
@@ -118,8 +116,8 @@ class TestGeminiCliAdapter(unittest.TestCase):
|
|||||||
stdout_content = "\n".join(jsonl_output) + "\n"
|
stdout_content = "\n".join(jsonl_output) + "\n"
|
||||||
|
|
||||||
process_mock = MagicMock()
|
process_mock = MagicMock()
|
||||||
process_mock.stdout = io.StringIO(stdout_content)
|
process_mock.communicate.return_value = (stdout_content, "")
|
||||||
process_mock.poll.side_effect = [None, 0]
|
process_mock.poll.return_value = 0
|
||||||
process_mock.wait.return_value = 0
|
process_mock.wait.return_value = 0
|
||||||
mock_popen.return_value = process_mock
|
mock_popen.return_value = process_mock
|
||||||
|
|
||||||
@@ -14,20 +14,21 @@ if project_root not in sys.path:
|
|||||||
# Import the class to be tested
|
# Import the class to be tested
|
||||||
from gemini_cli_adapter import GeminiCliAdapter
|
from gemini_cli_adapter import GeminiCliAdapter
|
||||||
|
|
||||||
# Mock the session_logger module to prevent file operations during tests.
|
|
||||||
mock_session_logger = MagicMock()
|
|
||||||
sys.modules['session_logger'] = mock_session_logger
|
|
||||||
|
|
||||||
class TestGeminiCliAdapterParity(unittest.TestCase):
|
class TestGeminiCliAdapterParity(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
"""Set up a fresh adapter instance and reset session state for each test."""
|
"""Set up a fresh adapter instance and reset session state for each test."""
|
||||||
|
# Patch session_logger to prevent file operations during tests
|
||||||
|
self.session_logger_patcher = patch('gemini_cli_adapter.session_logger')
|
||||||
|
self.mock_session_logger = self.session_logger_patcher.start()
|
||||||
|
|
||||||
self.adapter = GeminiCliAdapter(binary_path="gemini")
|
self.adapter = GeminiCliAdapter(binary_path="gemini")
|
||||||
self.adapter.session_id = None
|
self.adapter.session_id = None
|
||||||
self.adapter.last_usage = None
|
self.adapter.last_usage = None
|
||||||
self.adapter.last_latency = 0.0
|
self.adapter.last_latency = 0.0
|
||||||
# Reset mock calls for session_logger for each test
|
|
||||||
mock_session_logger.reset_mock()
|
def tearDown(self):
|
||||||
|
self.session_logger_patcher.stop()
|
||||||
|
|
||||||
@patch('subprocess.Popen')
|
@patch('subprocess.Popen')
|
||||||
def test_count_tokens_uses_estimation(self, mock_popen):
|
def test_count_tokens_uses_estimation(self, mock_popen):
|
||||||
@@ -45,7 +45,7 @@ def test_get_role_documents():
|
|||||||
def test_get_model_for_role():
|
def test_get_model_for_role():
|
||||||
"""Test that get_model_for_role returns the correct model for each role."""
|
"""Test that get_model_for_role returns the correct model for each role."""
|
||||||
assert get_model_for_role('tier1-orchestrator') == 'gemini-3.1-pro-preview'
|
assert get_model_for_role('tier1-orchestrator') == 'gemini-3.1-pro-preview'
|
||||||
assert get_model_for_role('tier2-tech-lead') == 'gemini-3-flash-preview'
|
assert get_model_for_role('tier2-tech-lead') == 'gemini-2.5-flash-lite'
|
||||||
assert get_model_for_role('tier3-worker') == 'gemini-2.5-flash-lite'
|
assert get_model_for_role('tier3-worker') == 'gemini-2.5-flash-lite'
|
||||||
assert get_model_for_role('tier4-qa') == 'gemini-2.5-flash-lite'
|
assert get_model_for_role('tier4-qa') == 'gemini-2.5-flash-lite'
|
||||||
|
|
||||||
@@ -59,6 +59,7 @@ def test_execute_agent():
|
|||||||
docs = ["file1.py", "docs/spec.md"]
|
docs = ["file1.py", "docs/spec.md"]
|
||||||
|
|
||||||
expected_model = "gemini-2.5-flash-lite"
|
expected_model = "gemini-2.5-flash-lite"
|
||||||
|
|
||||||
mock_stdout = "Mocked AI Response"
|
mock_stdout = "Mocked AI Response"
|
||||||
|
|
||||||
with patch("subprocess.run") as mock_run:
|
with patch("subprocess.run") as mock_run:
|
||||||
@@ -1,15 +1,14 @@
|
|||||||
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import glob # Will be used to find api_hook_client.py path if needed, though sys.path modification is better.
|
import glob
|
||||||
|
|
||||||
# --- Configuration ---
|
# --- Configuration ---
|
||||||
GUI_SCRIPT = 'gui_2.py'
|
GUI_SCRIPT = 'gui_2.py'
|
||||||
TEST_HOOKS_FLAG = '--enable-test-hooks'
|
TEST_HOOKS_FLAG = '--enable-test-hooks'
|
||||||
API_HOOK_CLIENT_MODULE = 'api_hook_client'
|
API_HOOK_CLIENT_MODULE = 'api_hook_client'
|
||||||
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) # Calculate project root
|
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||||
|
|
||||||
# Ensure project root is in sys.path to import modules like api_hook_client
|
# Ensure project root is in sys.path to import modules like api_hook_client
|
||||||
if PROJECT_ROOT not in sys.path:
|
if PROJECT_ROOT not in sys.path:
|
||||||
@@ -27,7 +26,7 @@ except ImportError as e:
|
|||||||
def run_visual_mma_verification():
|
def run_visual_mma_verification():
|
||||||
print("Starting visual MMA verification test...")
|
print("Starting visual MMA verification test...")
|
||||||
|
|
||||||
# Change current directory to project root to ensure relative paths are correct for gui_2.py
|
# Change current directory to project root
|
||||||
original_dir = os.getcwd()
|
original_dir = os.getcwd()
|
||||||
if original_dir != PROJECT_ROOT:
|
if original_dir != PROJECT_ROOT:
|
||||||
try:
|
try:
|
||||||
@@ -47,7 +46,7 @@ def run_visual_mma_verification():
|
|||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE,
|
stderr=subprocess.PIPE,
|
||||||
text=True,
|
text=True,
|
||||||
cwd=PROJECT_ROOT # Explicitly set working directory
|
cwd=PROJECT_ROOT
|
||||||
)
|
)
|
||||||
print(f"GUI process started with PID: {gui_process.pid}")
|
print(f"GUI process started with PID: {gui_process.pid}")
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
@@ -57,32 +56,25 @@ def run_visual_mma_verification():
|
|||||||
print(f"Error starting GUI process: {e}")
|
print(f"Error starting GUI process: {e}")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Give the GUI a moment to start and open the hook server.
|
# Wait for GUI to start
|
||||||
print("Waiting for GUI to initialize and hook server to start (5 seconds)...")
|
print("Waiting for GUI to initialize and hook server to start (5 seconds)...")
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
# Check if the GUI process exited prematurely
|
|
||||||
if gui_process.poll() is not None:
|
if gui_process.poll() is not None:
|
||||||
print(f"Error: GUI process exited prematurely with return code {gui_process.returncode}.")
|
print(f"Error: GUI process exited prematurely with return code {gui_process.returncode}.")
|
||||||
stderr_output = gui_process.stderr.read()
|
|
||||||
if stderr_output:
|
|
||||||
print("--- GUI Stderr ---")
|
|
||||||
print(stderr_output)
|
|
||||||
print("------------------")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
# 2. Use api_hook_client.ApiHookClient to push events
|
# 2. Use ApiHookClient
|
||||||
try:
|
try:
|
||||||
client = ApiHookClient()
|
client = ApiHookClient()
|
||||||
print("ApiHookClient initialized successfully.")
|
print("ApiHookClient initialized successfully.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Failed to initialize ApiHookClient. Ensure the hook server is running on port 8999. Error: {e}")
|
print(f"Failed to initialize ApiHookClient. Error: {e}")
|
||||||
if gui_process:
|
if gui_process:
|
||||||
gui_process.terminate()
|
gui_process.terminate()
|
||||||
gui_process.wait()
|
|
||||||
return
|
return
|
||||||
|
|
||||||
# 3. Include at least 5 tickets in different states
|
# 3. Setup MMA data
|
||||||
track_data = {
|
track_data = {
|
||||||
"id": "visual_test_track",
|
"id": "visual_test_track",
|
||||||
"title": "Visual Verification Track",
|
"title": "Visual Verification Track",
|
||||||
@@ -109,8 +101,6 @@ def run_visual_mma_verification():
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f" - Warning: Failed to push mma_state_update: {e}")
|
print(f" - Warning: Failed to push mma_state_update: {e}")
|
||||||
|
|
||||||
# 4. After a short delay, push an 'mma_step_approval' event
|
|
||||||
print("\nWaiting for GUI to render ticket queue and progress bar (3 seconds)...")
|
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
|
|
||||||
print("Pushing 'mma_step_approval' event to trigger HITL modal...")
|
print("Pushing 'mma_step_approval' event to trigger HITL modal...")
|
||||||
@@ -125,60 +115,27 @@ def run_visual_mma_verification():
|
|||||||
print(f"Error pushing mma_step_approval event: {e}")
|
print(f"Error pushing mma_step_approval event: {e}")
|
||||||
|
|
||||||
# 5. Provide clear print statements for manual verification
|
# 5. Provide clear print statements for manual verification
|
||||||
print("
|
print("\n--- Manual Verification Instructions ---")
|
||||||
--- Manual Verification Instructions ---")
|
|
||||||
print("Please visually inspect the running GUI application:")
|
print("Please visually inspect the running GUI application:")
|
||||||
print("1. MMA Dashboard: Ensure the 'MMA Dashboard' panel is visible and active.")
|
print("1. MMA Dashboard: Ensure the 'MMA Dashboard' panel is visible and active.")
|
||||||
print("2. Ticket Queue: Verify the 'Ticket Queue' section displays all 5 tickets with correct statuses:")
|
print("2. Ticket Queue: Verify the 'Ticket Queue' section displays all 5 tickets with correct statuses.")
|
||||||
print(" - TICKET-001: Should be 'todo'")
|
print("3. Progress Bar: Check that the progress bar correctly reflects the completed/total tickets.")
|
||||||
print(" - TICKET-002: Should be 'running'")
|
|
||||||
print(" - TICKET-003: Should be 'complete'")
|
|
||||||
print(" - TICKET-004: Should be 'blocked'")
|
|
||||||
print(" - TICKET-005: Should be 'paused'")
|
|
||||||
print(" Observe the distinct status colors for each ticket.")
|
|
||||||
print("3. Progress Bar: Check that the progress bar correctly reflects the completed/total tickets (e.g., 1/5).")
|
|
||||||
print("4. Approval Modal: Confirm that an 'MMA Step Approval' modal has appeared.")
|
print("4. Approval Modal: Confirm that an 'MMA Step Approval' modal has appeared.")
|
||||||
print(" - Verify it contains a 'Proposed Tool Call' section (e.g., showing 'powershell -Command...').")
|
print("\n--------------------------------------")
|
||||||
print(" - Ensure there is an option/button to 'Edit Payload'.")
|
|
||||||
print("
|
|
||||||
--------------------------------------")
|
|
||||||
print("The test script has finished its automated actions.")
|
print("The test script has finished its automated actions.")
|
||||||
print("The GUI application is still running. Please perform manual verification.")
|
print("The GUI application is still running. Press Enter to exit.")
|
||||||
print("Press Enter in this terminal to stop the GUI process and exit the test script.")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
input()
|
input()
|
||||||
except EOFError:
|
except EOFError:
|
||||||
print("EOF received, exiting.")
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
print("
|
print("\nStopping GUI process...")
|
||||||
Stopping GUI process...")
|
|
||||||
if gui_process:
|
if gui_process:
|
||||||
try:
|
|
||||||
gui_process.terminate()
|
gui_process.terminate()
|
||||||
gui_process.wait(timeout=10)
|
gui_process.wait(timeout=5)
|
||||||
print("GUI process terminated gracefully.")
|
|
||||||
except subprocess.TimeoutExpired:
|
|
||||||
print("GUI process did not terminate within the timeout. Killing it forcefully.")
|
|
||||||
gui_process.kill()
|
|
||||||
gui_process.wait()
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error during GUI process termination: {e}")
|
|
||||||
else:
|
|
||||||
print("GUI process was not started or already terminated.")
|
|
||||||
|
|
||||||
print("Visual MMA verification test script finished.")
|
print("Visual MMA verification test script finished.")
|
||||||
|
|
||||||
# Restore original directory
|
|
||||||
if original_dir != PROJECT_ROOT:
|
|
||||||
try:
|
|
||||||
os.chdir(original_dir)
|
|
||||||
print(f"Restored original working directory: {original_dir}")
|
|
||||||
except FileNotFoundError:
|
|
||||||
print(f"Warning: Could not restore original working directory '{original_dir}'.")
|
|
||||||
|
|
||||||
# --- Main execution ---
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# When the script is executed directly, ensure it runs from the correct context.
|
|
||||||
run_visual_mma_verification()
|
run_visual_mma_verification()
|
||||||
95
conductor/tests/visual_orchestration_verification.py
Normal file
95
conductor/tests/visual_orchestration_verification.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
import pytest
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Ensure project root is in path
|
||||||
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||||
|
|
||||||
|
from api_hook_client import ApiHookClient
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
def test_mma_epic_lifecycle(live_gui):
|
||||||
|
"""
|
||||||
|
Integration test for the full MMA Epic lifecycle.
|
||||||
|
1. Start App.
|
||||||
|
2. Trigger 'New Epic' request.
|
||||||
|
3. Verify Tier 1 generates tracks.
|
||||||
|
4. Trigger 'Start Track' for one of the tracks.
|
||||||
|
5. Verify Tier 2 generates tickets.
|
||||||
|
6. Verify execution loop starts.
|
||||||
|
"""
|
||||||
|
client = ApiHookClient()
|
||||||
|
assert client.wait_for_server(timeout=15), "API hook server failed to start."
|
||||||
|
|
||||||
|
print("[Test] Initializing MMA Epic lifecycle test...")
|
||||||
|
|
||||||
|
# 0. Setup: Ensure we have a project and are in a clean state
|
||||||
|
client.click("btn_reset")
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# 1. Set Epic input
|
||||||
|
epic_text = "Improve the logging system to include timestamps in all tool calls."
|
||||||
|
print(f"[Test] Setting Epic input: {epic_text}")
|
||||||
|
client.set_value("mma_epic_input", epic_text)
|
||||||
|
|
||||||
|
# 2. Trigger 'New Epic' (Plan Epic)
|
||||||
|
print("[Test] Clicking 'Plan Epic (Tier 1)'...")
|
||||||
|
client.click("btn_mma_plan_epic")
|
||||||
|
|
||||||
|
# 3. Verify that Tier 1 generates tracks
|
||||||
|
print("[Test] Polling for Tier 1 tracks...")
|
||||||
|
tracks_generated = False
|
||||||
|
for i in range(120):
|
||||||
|
status = client.get_value("ai_status")
|
||||||
|
# Check if the proposal modal is shown or status changed
|
||||||
|
if status and "Epic tracks generated" in str(status):
|
||||||
|
tracks_generated = True
|
||||||
|
print(f"[Test] Tracks generated after {i}s")
|
||||||
|
break
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
assert tracks_generated, "Tier 1 failed to generate tracks within 60 seconds."
|
||||||
|
|
||||||
|
# 4. Trigger 'Start Track' for the first track
|
||||||
|
print("[Test] Triggering 'Start Track' for track index 0...")
|
||||||
|
client.click("btn_mma_start_track", user_data={"index": 0})
|
||||||
|
|
||||||
|
# 5. Verify that Tier 2 generates tickets and starts execution
|
||||||
|
print("[Test] Polling for Tier 2 ticket generation and execution start...")
|
||||||
|
execution_started = False
|
||||||
|
for i in range(60):
|
||||||
|
mma_status = client.get_mma_status()
|
||||||
|
status_str = mma_status.get("mma_status", "idle")
|
||||||
|
active_tier = mma_status.get("active_tier", "")
|
||||||
|
|
||||||
|
if status_str == "running" or "Tier 3" in str(active_tier):
|
||||||
|
execution_started = True
|
||||||
|
print(f"[Test] Execution started (Status: {status_str}, Tier: {active_tier}) after {i}s")
|
||||||
|
break
|
||||||
|
|
||||||
|
current_ai_status = client.get_value("ai_status")
|
||||||
|
if i % 5 == 0:
|
||||||
|
print(f" ... still waiting. Current AI Status: {current_ai_status}")
|
||||||
|
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
assert execution_started, "Tier 2 failed to generate tickets or execution failed to start within 60 seconds."
|
||||||
|
|
||||||
|
# 6. Final verification of MMA state
|
||||||
|
final_mma = client.get_mma_status()
|
||||||
|
print(f"[Test] Final MMA Status: {final_mma.get('mma_status')}")
|
||||||
|
print(f"[Test] Active Tier: {final_mma.get('active_tier')}")
|
||||||
|
print(f"[Test] Ticket Count: {len(final_mma.get('active_tickets', []))}")
|
||||||
|
|
||||||
|
assert final_mma.get("mma_status") in ["running", "done", "blocked"]
|
||||||
|
assert len(final_mma.get("active_tickets", [])) > 0
|
||||||
|
|
||||||
|
print("[Test] MMA Epic lifecycle verification successful!")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# If run directly, try to use pytest
|
||||||
|
import subprocess
|
||||||
|
# Using sys.executable to ensure we use the same environment
|
||||||
|
subprocess.run([sys.executable, "-m", "pytest", "-v", __file__])
|
||||||
@@ -20,8 +20,23 @@ This file tracks all major tracks for the project. Each track has its own detail
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
- [~] **Track: MMA Orchestrator Integration**
|
- [ ] **Track: MMA Data Architecture & DAG Engine**
|
||||||
*Link: [./tracks/mma_orchestrator_integration_20260226/](./tracks/mma_orchestrator_integration_20260226/)*
|
*Link: [./tracks/mma_data_architecture_dag_engine/](./tracks/mma_data_architecture_dag_engine/)*
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
- [ ] **Track: Tiered Context Scoping & HITL Approval**
|
||||||
|
*Link: [./tracks/tiered_context_scoping_hitl_approval/](./tracks/tiered_context_scoping_hitl_approval/)*
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
- [ ] **Track: MMA Dashboard Visualization Overhaul**
|
||||||
|
*Link: [./tracks/mma_dashboard_visualization_overhaul/](./tracks/mma_dashboard_visualization_overhaul/)*
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
- [ ] **Track: Robust Live Simulation Verification**
|
||||||
|
*Link: [./tracks/robust_live_simulation_verification/](./tracks/robust_live_simulation_verification/)*
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,7 @@
|
|||||||
|
# MMA Dashboard Visualization Overhaul
|
||||||
|
|
||||||
|
Overhauls the GUI dashboard to display a visual DAG, live streams, and track browsers.
|
||||||
|
|
||||||
|
### Navigation
|
||||||
|
- [Specification](./spec.md)
|
||||||
|
- [Implementation Plan](./plan.md)
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"id": "mma_dashboard_visualization_overhaul",
|
||||||
|
"title": "MMA Dashboard Visualization Overhaul",
|
||||||
|
"status": "planned",
|
||||||
|
"created_at": "2026-02-27T19:20:00.000000"
|
||||||
|
}
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
# Implementation Plan: MMA Dashboard Visualization Overhaul
|
||||||
|
|
||||||
|
## Phase 1: Track Browser Panel
|
||||||
|
- [ ] Task: Implement a list view in the MMA Dashboard that reads from the `tracks` directory.
|
||||||
|
- [ ] Task: Add functionality to select an active track and load its state into the UI.
|
||||||
|
- [ ] Task: Display progress bars based on task completion within the active track.
|
||||||
|
|
||||||
|
## Phase 2: DAG Visualizer Component
|
||||||
|
- [ ] Task: Design the layout for the Task DAG using DearPyGui Node Editor or collapsible Tree Nodes.
|
||||||
|
- [ ] Task: Write the data-binding logic to map the backend Python DAG (from Track 1) to the UI visualizer.
|
||||||
|
- [ ] Task: Add visual indicators (colors/icons) for Task statuses (Ready, Blocked, Done).
|
||||||
|
|
||||||
|
## Phase 3: Live Output Streams
|
||||||
|
- [ ] Task: Refactor the AI response handling to support multiple concurrent UI text streams.
|
||||||
|
- [ ] Task: Bind the output of Tier 1 (Planning) to a designated "Strategy" text box.
|
||||||
|
- [ ] Task: Bind the output of Tier 2 and spawned Tier 3/4 workers to the active Task's detail view in the DAG.
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
# Track Specification: MMA Dashboard Visualization Overhaul
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
Make the invisible backend operations visible and interactive. The current GUI is too barebones to effectively manage a multi-agent system. This track overhauls the MMA Dashboard to provide real-time insights into tracks, task dependencies, and agent streams.
|
||||||
|
|
||||||
|
## Goals
|
||||||
|
1. **Track Browser:** Create a dedicated panel to view all tracks, their statuses, and overall progress metrics.
|
||||||
|
2. **DAG Visualizer:** Implement a visual representation (hierarchical list or node graph) of the current Track's tasks, clearly indicating status and blockers.
|
||||||
|
3. **Live Output Streams:** Ensure the GUI text boxes stream the output of Tier 1 (Planning), Tier 2 (Directing), and Tiers 3/4 (Executing) in real-time, tied precisely to their respective tasks.
|
||||||
|
|
||||||
|
## Constraints
|
||||||
|
- Must be built natively using DearPyGui capabilities (e.g., node editor or nested tree nodes).
|
||||||
|
- Must be performant and not block the main rendering loop when updating live text streams.
|
||||||
|
|
||||||
|
## Context & Origins
|
||||||
|
This track was born from the "Human Verification" phase of the initial MMA Orchestrator prototype (`mma_orchestrator_integration_20260226`). We realized that while the backend API plumbing for the hierarchical MMA tiers (Tiers 1-4) was technically functional, the product lacked the necessary state management, UX visualization, and human-in-the-loop security gates to be usable.
|
||||||
|
|
||||||
|
**Key Takeaways from the Prototype Phase:**
|
||||||
|
- The Tier 2 (Tech Lead) needs its own track-scoped discussion history, rather than polluting the global project history.
|
||||||
|
- Tasks within a track require a DAG (Directed Acyclic Graph) engine to manage complex dependencies and blocking states.
|
||||||
|
- The GUI must visualize this DAG and stream the output of individual workers directly to their associated tasks.
|
||||||
|
- We must enforce tiered context subsetting so that Tier 3/4 workers don't receive the massive global context blob, and we need a pre-spawn approval modal so the user can intercept, review, and modify worker prompts/contexts before they execute.
|
||||||
|
|
||||||
|
**Instructions for the Implementing Agent:**
|
||||||
|
As you execute this track, ensure you maintain alignment with the other Phase 2 tracks. If you learn something that impacts the dependent tracks, please append a similar "Context Summary" to their `spec.md` files before concluding your run.
|
||||||
|
|
||||||
|
## Execution Order & Dependencies
|
||||||
|
This is a multi-track phase. To ensure architectural integrity, these tracks **MUST** be executed in the following strict order:
|
||||||
|
1. **MMA Data Architecture & DAG Engine:** (Builds the state and execution foundation)
|
||||||
|
2. **Tiered Context Scoping & HITL Approval:** (Builds the security and context subsetting on top of the state)
|
||||||
|
3. **[CURRENT] MMA Dashboard Visualization Overhaul:** (Builds the UI to visualize the state and subsets)
|
||||||
|
4. **Robust Live Simulation Verification:** (Builds the tests to verify the UI and state)
|
||||||
|
|
||||||
|
**Prerequisites for this track:** `Tiered Context Scoping & HITL Approval` MUST be completed (`[x]`) before starting this track.
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
# MMA Data Architecture & DAG Engine
|
||||||
|
|
||||||
|
Restructures manual_slop state and execution into a per-track DAG model.
|
||||||
|
|
||||||
|
### Navigation
|
||||||
|
- [Specification](./spec.md)
|
||||||
|
- [Implementation Plan](./plan.md)
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"id": "mma_data_architecture_dag_engine",
|
||||||
|
"title": "MMA Data Architecture & DAG Engine",
|
||||||
|
"status": "planned",
|
||||||
|
"created_at": "2026-02-27T19:20:00.000000"
|
||||||
|
}
|
||||||
16
conductor/tracks/mma_data_architecture_dag_engine/plan.md
Normal file
16
conductor/tracks/mma_data_architecture_dag_engine/plan.md
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
# Implementation Plan: MMA Data Architecture & DAG Engine
|
||||||
|
|
||||||
|
## Phase 1: Track-Scoped State Management
|
||||||
|
- [ ] Task: Define the data schema for a Track (Metadata, Discussion History, Task List).
|
||||||
|
- [ ] Task: Update `project_manager.py` to create and read from `tracks/<track_id>/state.toml`.
|
||||||
|
- [ ] Task: Ensure Tier 2 (Tech Lead) history is securely scoped to the active track's state file.
|
||||||
|
|
||||||
|
## Phase 2: Python DAG Engine
|
||||||
|
- [ ] Task: Create a `Task` class with `status` (Blocked, Ready, In Progress, Review, Done) and `depends_on` fields.
|
||||||
|
- [ ] Task: Implement a topological sorting algorithm to resolve execution order.
|
||||||
|
- [ ] Task: Write robust unit tests verifying cycle detection and dependency resolution.
|
||||||
|
|
||||||
|
## Phase 3: Execution State Machine
|
||||||
|
- [ ] Task: Implement the core loop that evaluates the DAG and identifies "Ready" tasks.
|
||||||
|
- [ ] Task: Create configuration settings for "Auto-Queue" vs "Manual Step" execution modes.
|
||||||
|
- [ ] Task: Connect the state machine to the backend dispatcher, preparing it for GUI integration.
|
||||||
34
conductor/tracks/mma_data_architecture_dag_engine/spec.md
Normal file
34
conductor/tracks/mma_data_architecture_dag_engine/spec.md
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
# Track Specification: MMA Data Architecture & DAG Engine
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
Restructure how `manual_slop` stores and executes work. The current implementation relies on global state and linear execution, which does not support the complexity of multi-agent, task-based workflows. This track establishes a robust, data-oriented foundation using track-scoped state and a native Python Directed Acyclic Graph (DAG) engine.
|
||||||
|
|
||||||
|
## Goals
|
||||||
|
1. **Track-Scoped State:** Transition away from a single global `manual_slop_history.toml` to a per-track state structure (e.g., `tracks/<track_id>/state.toml`) to manage specific discussion history and context.
|
||||||
|
2. **Task DAG Engine:** Implement a topological sorter and DAG execution engine in Python to manage dependencies between tasks.
|
||||||
|
3. **Execution State Machine:** Build an internal state machine that governs whether the DAG auto-advances or waits for manual user intervention to spawn the next worker.
|
||||||
|
|
||||||
|
## Constraints
|
||||||
|
- Must integrate seamlessly with the existing project TOML structure.
|
||||||
|
- Avoid external complex dependencies (like Steve Yegge's Beads) for now; rely on standard Python libraries or lightweight custom implementations to ensure tight coupling with the DearPyGui stack.
|
||||||
|
|
||||||
|
## Context & Origins
|
||||||
|
This track was born from the "Human Verification" phase of the initial MMA Orchestrator prototype (`mma_orchestrator_integration_20260226`). We realized that while the backend API plumbing for the hierarchical MMA tiers (Tiers 1-4) was technically functional, the product lacked the necessary state management, UX visualization, and human-in-the-loop security gates to be usable.
|
||||||
|
|
||||||
|
**Key Takeaways from the Prototype Phase:**
|
||||||
|
- The Tier 2 (Tech Lead) needs its own track-scoped discussion history, rather than polluting the global project history.
|
||||||
|
- Tasks within a track require a DAG (Directed Acyclic Graph) engine to manage complex dependencies and blocking states.
|
||||||
|
- The GUI must visualize this DAG and stream the output of individual workers directly to their associated tasks.
|
||||||
|
- We must enforce tiered context subsetting so that Tier 3/4 workers don't receive the massive global context blob, and we need a pre-spawn approval modal so the user can intercept, review, and modify worker prompts/contexts before they execute.
|
||||||
|
|
||||||
|
**Instructions for the Implementing Agent:**
|
||||||
|
As you execute this track, ensure you maintain alignment with the other Phase 2 tracks. If you learn something that impacts the dependent tracks, please append a similar "Context Summary" to their `spec.md` files before concluding your run.
|
||||||
|
|
||||||
|
## Execution Order & Dependencies
|
||||||
|
This is a multi-track phase. To ensure architectural integrity, these tracks **MUST** be executed in the following strict order:
|
||||||
|
1. **[CURRENT] MMA Data Architecture & DAG Engine:** (Builds the state and execution foundation)
|
||||||
|
2. **Tiered Context Scoping & HITL Approval:** (Builds the security and context subsetting on top of the state)
|
||||||
|
3. **MMA Dashboard Visualization Overhaul:** (Builds the UI to visualize the state and subsets)
|
||||||
|
4. **Robust Live Simulation Verification:** (Builds the tests to verify the UI and state)
|
||||||
|
|
||||||
|
**Prerequisites for this track:** None. This must be executed FIRST.
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
# Robust Live Simulation Verification
|
||||||
|
|
||||||
|
Builds automated UI simulation testing to prevent orchestrator GUI regressions.
|
||||||
|
|
||||||
|
### Navigation
|
||||||
|
- [Specification](./spec.md)
|
||||||
|
- [Implementation Plan](./plan.md)
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"id": "robust_live_simulation_verification",
|
||||||
|
"title": "Robust Live Simulation Verification",
|
||||||
|
"status": "planned",
|
||||||
|
"created_at": "2026-02-27T19:20:00.000000"
|
||||||
|
}
|
||||||
15
conductor/tracks/robust_live_simulation_verification/plan.md
Normal file
15
conductor/tracks/robust_live_simulation_verification/plan.md
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# Implementation Plan: Robust Live Simulation Verification
|
||||||
|
|
||||||
|
## Phase 1: Framework Foundation
|
||||||
|
- [ ] Task: Create `tests/visual_sim_mma_v2.py` based on existing simulation patterns.
|
||||||
|
- [ ] Task: Implement helper methods in `ApiHookClient` for querying specific DearPyGui item states (e.g., `get_text_value`, `get_node_status`).
|
||||||
|
|
||||||
|
## Phase 2: Epic & Track Verification
|
||||||
|
- [ ] Task: Write the simulation routine to trigger a new Epic and verify the Track Browser updates correctly.
|
||||||
|
- [ ] Task: Verify that selecting a newly generated track successfully loads its initial (empty) state into the DAG visualizer.
|
||||||
|
|
||||||
|
## Phase 3: DAG & Spawn Interception Verification
|
||||||
|
- [ ] Task: Simulate the "Start Track" action and verify the DAG visualizer populates with tasks.
|
||||||
|
- [ ] Task: Simulate the Auto-Queue advancing to a "Ready" task.
|
||||||
|
- [ ] Task: Verify the "Approve Worker Spawn" modal appears with the correct prompt and context.
|
||||||
|
- [ ] Task: Simulate clicking "Approve" and verify the worker's simulated output streams into the correct task detail view.
|
||||||
34
conductor/tracks/robust_live_simulation_verification/spec.md
Normal file
34
conductor/tracks/robust_live_simulation_verification/spec.md
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
# Track Specification: Robust Live Simulation Verification
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
Establish a robust, visual simulation framework to prevent regressions in the complex GUI and asynchronous orchestration layers. This track replaces manual human verification with an automated script that clicks through the GUI and verifies the rendered state.
|
||||||
|
|
||||||
|
## Goals
|
||||||
|
1. **Simulation Framework Setup:** Build a dedicated test script (`tests/visual_sim_mma_v2.py`) utilizing `ApiHookClient` to control the live GUI.
|
||||||
|
2. **Simulate Epic Planning:** Automate the clicking of "New Epic", inputting a prompt, and verifying the expected Tier 1 tracks appear in the UI.
|
||||||
|
3. **Simulate Execution & Spawning:** Automate the selection of a track, the generation of the DAG, and the interaction with the HITL Approval modal.
|
||||||
|
|
||||||
|
## Constraints
|
||||||
|
- Must run against a live instance of the application using `--enable-test-hooks`.
|
||||||
|
- Must fail loudly if the visual state (e.g., rendered DAG nodes, text box contents) does not match expectations.
|
||||||
|
|
||||||
|
## Context & Origins
|
||||||
|
This track was born from the "Human Verification" phase of the initial MMA Orchestrator prototype (`mma_orchestrator_integration_20260226`). We realized that while the backend API plumbing for the hierarchical MMA tiers (Tiers 1-4) was technically functional, the product lacked the necessary state management, UX visualization, and human-in-the-loop security gates to be usable.
|
||||||
|
|
||||||
|
**Key Takeaways from the Prototype Phase:**
|
||||||
|
- The Tier 2 (Tech Lead) needs its own track-scoped discussion history, rather than polluting the global project history.
|
||||||
|
- Tasks within a track require a DAG (Directed Acyclic Graph) engine to manage complex dependencies and blocking states.
|
||||||
|
- The GUI must visualize this DAG and stream the output of individual workers directly to their associated tasks.
|
||||||
|
- We must enforce tiered context subsetting so that Tier 3/4 workers don't receive the massive global context blob, and we need a pre-spawn approval modal so the user can intercept, review, and modify worker prompts/contexts before they execute.
|
||||||
|
|
||||||
|
**Instructions for the Implementing Agent:**
|
||||||
|
As you execute this track, ensure you maintain alignment with the other Phase 2 tracks. If you learn something that impacts the dependent tracks, please append a similar "Context Summary" to their `spec.md` files before concluding your run.
|
||||||
|
|
||||||
|
## Execution Order & Dependencies
|
||||||
|
This is a multi-track phase. To ensure architectural integrity, these tracks **MUST** be executed in the following strict order:
|
||||||
|
1. **MMA Data Architecture & DAG Engine:** (Builds the state and execution foundation)
|
||||||
|
2. **Tiered Context Scoping & HITL Approval:** (Builds the security and context subsetting on top of the state)
|
||||||
|
3. **MMA Dashboard Visualization Overhaul:** (Builds the UI to visualize the state and subsets)
|
||||||
|
4. **[CURRENT] Robust Live Simulation Verification:** (Builds the tests to verify the UI and state)
|
||||||
|
|
||||||
|
**Prerequisites for this track:** `MMA Dashboard Visualization Overhaul` MUST be completed (`[x]`) before starting this track.
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
# Tiered Context Scoping & HITL Approval
|
||||||
|
|
||||||
|
Introduces context subsetting per tier and Human-in-the-Loop spawn interceptor logic.
|
||||||
|
|
||||||
|
### Navigation
|
||||||
|
- [Specification](./spec.md)
|
||||||
|
- [Implementation Plan](./plan.md)
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"id": "tiered_context_scoping_hitl_approval",
|
||||||
|
"title": "Tiered Context Scoping & HITL Approval",
|
||||||
|
"status": "planned",
|
||||||
|
"created_at": "2026-02-27T19:20:00.000000"
|
||||||
|
}
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
# Implementation Plan: Tiered Context Scoping & HITL Approval
|
||||||
|
|
||||||
|
## Phase 1: Context Subsetting
|
||||||
|
- [ ] Task: Refactor `aggregate.py` to support targeted context builds (e.g., `build_tier1_context`, `build_tier3_context`).
|
||||||
|
- [ ] Task: Integrate AST skeleton extraction into the standard Tier 3 context build.
|
||||||
|
- [ ] Task: Update the project state to track which files are assigned to which tier.
|
||||||
|
|
||||||
|
## Phase 2: The Spawn Interceptor
|
||||||
|
- [ ] Task: Create a signaling mechanism in `multi_agent_conductor.py` to emit a "Worker Spawn Requested" event.
|
||||||
|
- [ ] Task: Implement the interception logic that pauses the async dispatcher until a signal is received from the GUI.
|
||||||
|
|
||||||
|
## Phase 3: Approval UX Modal
|
||||||
|
- [ ] Task: Design the "Approve Worker Spawn" modal in DearPyGui.
|
||||||
|
- [ ] Task: Populate the modal with the target role, the exact prompt, and a read-only view of the specific file context.
|
||||||
|
- [ ] Task: Wire the "Approve", "Modify", and "Reject" buttons to resume or cancel the intercepted spawn.
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
# Track Specification: Tiered Context Scoping & HITL Approval
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
Provide the user with absolute visual control over what the AI sees at every level of the hierarchy. Currently, the system builds a single massive context blob. This track introduces context subsetting based on the target tier and implements a Human-in-the-Loop (HITL) approval gate before any Tier 3/4 worker is spawned.
|
||||||
|
|
||||||
|
## Goals
|
||||||
|
1. **Context Subsetting:** Modify the aggregation logic so that Tier 1, Tier 2, and Tier 3/4 receive distinct, optimized context packages.
|
||||||
|
2. **Spawn Interceptor:** Implement a `BeforeSpawn` hook mechanism within the application to pause execution right before a sub-agent is launched.
|
||||||
|
3. **Approval UX:** Design and implement a GUI modal that displays the exact prompt and context intended for the worker, allowing the user to Approve, Modify, or Reject the spawn.
|
||||||
|
|
||||||
|
## Constraints
|
||||||
|
- Must adhere to the project's security and transparency mandates.
|
||||||
|
- The interceptor must be reliable and not cause the main event loop to hang indefinitely.
|
||||||
|
|
||||||
|
## Context & Origins
|
||||||
|
This track was born from the "Human Verification" phase of the initial MMA Orchestrator prototype (`mma_orchestrator_integration_20260226`). We realized that while the backend API plumbing for the hierarchical MMA tiers (Tiers 1-4) was technically functional, the product lacked the necessary state management, UX visualization, and human-in-the-loop security gates to be usable.
|
||||||
|
|
||||||
|
**Key Takeaways from the Prototype Phase:**
|
||||||
|
- The Tier 2 (Tech Lead) needs its own track-scoped discussion history, rather than polluting the global project history.
|
||||||
|
- Tasks within a track require a DAG (Directed Acyclic Graph) engine to manage complex dependencies and blocking states.
|
||||||
|
- The GUI must visualize this DAG and stream the output of individual workers directly to their associated tasks.
|
||||||
|
- We must enforce tiered context subsetting so that Tier 3/4 workers don't receive the massive global context blob, and we need a pre-spawn approval modal so the user can intercept, review, and modify worker prompts/contexts before they execute.
|
||||||
|
|
||||||
|
**Instructions for the Implementing Agent:**
|
||||||
|
As you execute this track, ensure you maintain alignment with the other Phase 2 tracks. If you learn something that impacts the dependent tracks, please append a similar "Context Summary" to their `spec.md` files before concluding your run.
|
||||||
|
|
||||||
|
## Execution Order & Dependencies
|
||||||
|
This is a multi-track phase. To ensure architectural integrity, these tracks **MUST** be executed in the following strict order:
|
||||||
|
1. **MMA Data Architecture & DAG Engine:** (Builds the state and execution foundation)
|
||||||
|
2. **[CURRENT] Tiered Context Scoping & HITL Approval:** (Builds the security and context subsetting on top of the state)
|
||||||
|
3. **MMA Dashboard Visualization Overhaul:** (Builds the UI to visualize the state and subsets)
|
||||||
|
4. **Robust Live Simulation Verification:** (Builds the tests to verify the UI and state)
|
||||||
|
|
||||||
|
**Prerequisites for this track:** `MMA Data Architecture & DAG Engine` MUST be completed (`[x]`) before starting this track.
|
||||||
@@ -9,7 +9,7 @@ def generate_tickets(track_brief: str, module_skeletons: str) -> list[dict]:
|
|||||||
Breaks down a Track Brief and module skeletons into discrete Tier 3 Tickets.
|
Breaks down a Track Brief and module skeletons into discrete Tier 3 Tickets.
|
||||||
"""
|
"""
|
||||||
# 1. Set Tier 2 Model (Tech Lead - Flash)
|
# 1. Set Tier 2 Model (Tech Lead - Flash)
|
||||||
ai_client.set_provider('gemini', 'gemini-1.5-flash')
|
ai_client.set_provider('gemini', 'gemini-2.5-flash-lite')
|
||||||
ai_client.reset_session()
|
ai_client.reset_session()
|
||||||
|
|
||||||
# 2. Construct Prompt
|
# 2. Construct Prompt
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[ai]
|
[ai]
|
||||||
provider = "gemini"
|
provider = "gemini_cli"
|
||||||
model = "gemini-2.5-flash-lite"
|
model = "gemini-2.5-flash-lite"
|
||||||
temperature = 0.0
|
temperature = 0.0
|
||||||
max_tokens = 8192
|
max_tokens = 8192
|
||||||
@@ -9,7 +9,7 @@ system_prompt = ""
|
|||||||
[theme]
|
[theme]
|
||||||
palette = "ImGui Dark"
|
palette = "ImGui Dark"
|
||||||
font_size = 16.0
|
font_size = 16.0
|
||||||
scale = 1.0
|
scale = 1.2999999523162842
|
||||||
font_path = ""
|
font_path = ""
|
||||||
|
|
||||||
[projects]
|
[projects]
|
||||||
@@ -32,7 +32,7 @@ active = "C:\\projects\\manual_slop\\tests\\temp_project.toml"
|
|||||||
"Discussion Hub" = true
|
"Discussion Hub" = true
|
||||||
"Operations Hub" = true
|
"Operations Hub" = true
|
||||||
Theme = true
|
Theme = true
|
||||||
"Log Management" = false
|
"Log Management" = true
|
||||||
Diagnostics = true
|
Diagnostics = true
|
||||||
|
|
||||||
[headless]
|
[headless]
|
||||||
|
|||||||
113
gui_2.py
113
gui_2.py
@@ -251,6 +251,13 @@ class App:
|
|||||||
self.proposed_tracks: list[dict] = []
|
self.proposed_tracks: list[dict] = []
|
||||||
self._show_track_proposal_modal = False
|
self._show_track_proposal_modal = False
|
||||||
|
|
||||||
|
self.mma_tier_usage = {
|
||||||
|
"Tier 1": {"input": 0, "output": 0},
|
||||||
|
"Tier 2": {"input": 0, "output": 0},
|
||||||
|
"Tier 3": {"input": 0, "output": 0},
|
||||||
|
"Tier 4": {"input": 0, "output": 0},
|
||||||
|
}
|
||||||
|
|
||||||
self._tool_log: list[tuple[str, str]] = []
|
self._tool_log: list[tuple[str, str]] = []
|
||||||
self._comms_log: list[dict] = []
|
self._comms_log: list[dict] = []
|
||||||
|
|
||||||
@@ -403,7 +410,10 @@ class App:
|
|||||||
'token_budget_pct': '_token_budget_pct',
|
'token_budget_pct': '_token_budget_pct',
|
||||||
'token_budget_current': '_token_budget_current',
|
'token_budget_current': '_token_budget_current',
|
||||||
'token_budget_label': '_token_budget_label',
|
'token_budget_label': '_token_budget_label',
|
||||||
'show_confirm_modal': 'show_confirm_modal'
|
'show_confirm_modal': 'show_confirm_modal',
|
||||||
|
'mma_epic_input': 'ui_epic_input',
|
||||||
|
'mma_status': 'mma_status',
|
||||||
|
'mma_active_tier': 'active_tier'
|
||||||
}
|
}
|
||||||
|
|
||||||
self._clickable_actions = {
|
self._clickable_actions = {
|
||||||
@@ -414,6 +424,9 @@ class App:
|
|||||||
'btn_reject_script': self._handle_reject_script,
|
'btn_reject_script': self._handle_reject_script,
|
||||||
'btn_project_save': self._cb_project_save,
|
'btn_project_save': self._cb_project_save,
|
||||||
'btn_disc_create': self._cb_disc_create,
|
'btn_disc_create': self._cb_disc_create,
|
||||||
|
'btn_mma_plan_epic': self._cb_plan_epic,
|
||||||
|
'btn_mma_accept_tracks': self._cb_accept_tracks,
|
||||||
|
'btn_mma_start_track': self._cb_start_track,
|
||||||
}
|
}
|
||||||
self._predefined_callbacks = {
|
self._predefined_callbacks = {
|
||||||
'_test_callback_func_write_to_file': self._test_callback_func_write_to_file
|
'_test_callback_func_write_to_file': self._test_callback_func_write_to_file
|
||||||
@@ -899,6 +912,7 @@ class App:
|
|||||||
payload = task.get("payload", {})
|
payload = task.get("payload", {})
|
||||||
self.mma_status = payload.get("status", "idle")
|
self.mma_status = payload.get("status", "idle")
|
||||||
self.active_tier = payload.get("active_tier")
|
self.active_tier = payload.get("active_tier")
|
||||||
|
self.mma_tier_usage = payload.get("tier_usage", self.mma_tier_usage)
|
||||||
self.active_track = payload.get("track")
|
self.active_track = payload.get("track")
|
||||||
self.active_tickets = payload.get("tickets", [])
|
self.active_tickets = payload.get("tickets", [])
|
||||||
|
|
||||||
@@ -921,7 +935,17 @@ class App:
|
|||||||
if item == "btn_project_new_automated":
|
if item == "btn_project_new_automated":
|
||||||
self._cb_new_project_automated(user_data)
|
self._cb_new_project_automated(user_data)
|
||||||
elif item in self._clickable_actions:
|
elif item in self._clickable_actions:
|
||||||
self._clickable_actions[item]()
|
# Check if it's a method that accepts user_data
|
||||||
|
import inspect
|
||||||
|
func = self._clickable_actions[item]
|
||||||
|
try:
|
||||||
|
sig = inspect.signature(func)
|
||||||
|
if 'user_data' in sig.parameters:
|
||||||
|
func(user_data=user_data)
|
||||||
|
else:
|
||||||
|
func()
|
||||||
|
except Exception:
|
||||||
|
func()
|
||||||
|
|
||||||
elif action == "select_list_item":
|
elif action == "select_list_item":
|
||||||
item = task.get("listbox", task.get("item"))
|
item = task.get("listbox", task.get("item"))
|
||||||
@@ -1098,6 +1122,20 @@ class App:
|
|||||||
self._loop.create_task(self._process_event_queue())
|
self._loop.create_task(self._process_event_queue())
|
||||||
self._loop.run_forever()
|
self._loop.run_forever()
|
||||||
|
|
||||||
|
def shutdown(self):
|
||||||
|
"""Cleanly shuts down the app's background tasks."""
|
||||||
|
if self._loop.is_running():
|
||||||
|
self._loop.call_soon_threadsafe(self._loop.stop)
|
||||||
|
|
||||||
|
if self._loop_thread.is_alive():
|
||||||
|
self._loop_thread.join(timeout=2.0)
|
||||||
|
|
||||||
|
# Join other threads if they exist
|
||||||
|
if self.send_thread and self.send_thread.is_alive():
|
||||||
|
self.send_thread.join(timeout=1.0)
|
||||||
|
if self.models_thread and self.models_thread.is_alive():
|
||||||
|
self.models_thread.join(timeout=1.0)
|
||||||
|
|
||||||
async def _process_event_queue(self):
|
async def _process_event_queue(self):
|
||||||
"""Listens for and processes events from the AsyncEventQueue."""
|
"""Listens for and processes events from the AsyncEventQueue."""
|
||||||
while True:
|
while True:
|
||||||
@@ -1971,14 +2009,36 @@ class App:
|
|||||||
|
|
||||||
def _cb_accept_tracks(self):
|
def _cb_accept_tracks(self):
|
||||||
def _bg_task():
|
def _bg_task():
|
||||||
|
for track_data in self.proposed_tracks:
|
||||||
|
self._start_track_logic(track_data)
|
||||||
|
self.ai_status = "Tracks accepted and execution started."
|
||||||
|
threading.Thread(target=_bg_task, daemon=True).start()
|
||||||
|
|
||||||
|
def _cb_start_track(self, user_data=None):
|
||||||
|
idx = 0
|
||||||
|
if isinstance(user_data, int):
|
||||||
|
idx = user_data
|
||||||
|
elif isinstance(user_data, dict):
|
||||||
|
idx = user_data.get("index", 0)
|
||||||
|
|
||||||
|
if 0 <= idx < len(self.proposed_tracks):
|
||||||
|
track_data = self.proposed_tracks[idx]
|
||||||
|
title = track_data.get("title") or track_data.get("goal", "Untitled Track")
|
||||||
|
threading.Thread(target=lambda: self._start_track_logic(track_data), daemon=True).start()
|
||||||
|
self.ai_status = f"Track '{title}' started."
|
||||||
|
|
||||||
|
def _start_track_logic(self, track_data):
|
||||||
try:
|
try:
|
||||||
self.ai_status = "Generating tickets (Tier 2)..."
|
goal = track_data.get("goal", "")
|
||||||
|
title = track_data.get("title") or track_data.get("goal", "Untitled Track")
|
||||||
|
self.ai_status = f"Phase 2: Generating tickets for {title}..."
|
||||||
|
|
||||||
# 1. Get skeletons for context
|
# 1. Get skeletons for context
|
||||||
parser = ASTParser(language="python")
|
parser = ASTParser(language="python")
|
||||||
skeletons = ""
|
skeletons = ""
|
||||||
for file_path in self.files:
|
for i, file_path in enumerate(self.files):
|
||||||
try:
|
try:
|
||||||
|
self.ai_status = f"Phase 2: Scanning files ({i+1}/{len(self.files)})..."
|
||||||
abs_path = Path(self.ui_files_base_dir) / file_path
|
abs_path = Path(self.ui_files_base_dir) / file_path
|
||||||
if abs_path.exists() and abs_path.suffix == ".py":
|
if abs_path.exists() and abs_path.suffix == ".py":
|
||||||
with open(abs_path, "r", encoding="utf-8") as f:
|
with open(abs_path, "r", encoding="utf-8") as f:
|
||||||
@@ -1987,21 +2047,18 @@ class App:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error parsing skeleton for {file_path}: {e}")
|
print(f"Error parsing skeleton for {file_path}: {e}")
|
||||||
|
|
||||||
# 2. For each proposed track, generate and sort tickets
|
self.ai_status = "Phase 2: Calling Tech Lead..."
|
||||||
for track_data in self.proposed_tracks:
|
|
||||||
goal = track_data.get("goal", "")
|
|
||||||
title = track_data.get("title", "Untitled Track")
|
|
||||||
|
|
||||||
raw_tickets = conductor_tech_lead.generate_tickets(goal, skeletons)
|
raw_tickets = conductor_tech_lead.generate_tickets(goal, skeletons)
|
||||||
if not raw_tickets:
|
if not raw_tickets:
|
||||||
|
self.ai_status = f"Error: No tickets generated for track: {title}"
|
||||||
print(f"Warning: No tickets generated for track: {title}")
|
print(f"Warning: No tickets generated for track: {title}")
|
||||||
continue
|
return
|
||||||
|
|
||||||
|
self.ai_status = "Phase 2: Sorting tickets..."
|
||||||
try:
|
try:
|
||||||
sorted_tickets_data = conductor_tech_lead.topological_sort(raw_tickets)
|
sorted_tickets_data = conductor_tech_lead.topological_sort(raw_tickets)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
print(f"Dependency error in track '{title}': {e}")
|
print(f"Dependency error in track '{title}': {e}")
|
||||||
# Fallback to unsorted if sort fails? Or skip?
|
|
||||||
sorted_tickets_data = raw_tickets
|
sorted_tickets_data = raw_tickets
|
||||||
|
|
||||||
# 3. Create Track and Ticket objects
|
# 3. Create Track and Ticket objects
|
||||||
@@ -2009,7 +2066,7 @@ class App:
|
|||||||
for t_data in sorted_tickets_data:
|
for t_data in sorted_tickets_data:
|
||||||
ticket = Ticket(
|
ticket = Ticket(
|
||||||
id=t_data["id"],
|
id=t_data["id"],
|
||||||
description=t_data["description"],
|
description=t_data.get("description") or t_data.get("goal", "No description"),
|
||||||
status=t_data.get("status", "todo"),
|
status=t_data.get("status", "todo"),
|
||||||
assigned_to=t_data.get("assigned_to", "unassigned"),
|
assigned_to=t_data.get("assigned_to", "unassigned"),
|
||||||
depends_on=t_data.get("depends_on", []),
|
depends_on=t_data.get("depends_on", []),
|
||||||
@@ -2025,13 +2082,9 @@ class App:
|
|||||||
|
|
||||||
# Schedule the coroutine on the internal event loop
|
# Schedule the coroutine on the internal event loop
|
||||||
asyncio.run_coroutine_threadsafe(engine.run_linear(), self._loop)
|
asyncio.run_coroutine_threadsafe(engine.run_linear(), self._loop)
|
||||||
|
|
||||||
self.ai_status = "Tracks accepted and execution started."
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.ai_status = f"Track acceptance error: {e}"
|
self.ai_status = f"Track start error: {e}"
|
||||||
print(f"ERROR in _cb_accept_tracks background task: {e}")
|
print(f"ERROR in _start_track_logic: {e}")
|
||||||
|
|
||||||
threading.Thread(target=_bg_task, daemon=True).start()
|
|
||||||
|
|
||||||
def _render_track_proposal_modal(self):
|
def _render_track_proposal_modal(self):
|
||||||
if self._show_track_proposal_modal:
|
if self._show_track_proposal_modal:
|
||||||
@@ -2047,6 +2100,8 @@ class App:
|
|||||||
for idx, track in enumerate(self.proposed_tracks):
|
for idx, track in enumerate(self.proposed_tracks):
|
||||||
imgui.text_colored(C_LBL, f"Track {idx+1}: {track.get('title', 'Untitled')}")
|
imgui.text_colored(C_LBL, f"Track {idx+1}: {track.get('title', 'Untitled')}")
|
||||||
imgui.text_wrapped(f"Goal: {track.get('goal', 'N/A')}")
|
imgui.text_wrapped(f"Goal: {track.get('goal', 'N/A')}")
|
||||||
|
if imgui.button(f"Start This Track##{idx}"):
|
||||||
|
self._cb_start_track(idx)
|
||||||
imgui.separator()
|
imgui.separator()
|
||||||
|
|
||||||
if imgui.button("Accept", imgui.ImVec2(120, 0)):
|
if imgui.button("Accept", imgui.ImVec2(120, 0)):
|
||||||
@@ -2592,9 +2647,29 @@ class App:
|
|||||||
else:
|
else:
|
||||||
imgui.text_disabled("No active MMA track.")
|
imgui.text_disabled("No active MMA track.")
|
||||||
|
|
||||||
|
# 3. Token Usage Table
|
||||||
|
imgui.separator()
|
||||||
|
imgui.text("Tier Usage (Tokens)")
|
||||||
|
if imgui.begin_table("mma_usage", 3, imgui.TableFlags_.borders | imgui.TableFlags_.row_bg):
|
||||||
|
imgui.table_setup_column("Tier")
|
||||||
|
imgui.table_setup_column("Input")
|
||||||
|
imgui.table_setup_column("Output")
|
||||||
|
imgui.table_headers_row()
|
||||||
|
|
||||||
|
usage = self.mma_tier_usage
|
||||||
|
for tier, stats in usage.items():
|
||||||
|
imgui.table_next_row()
|
||||||
|
imgui.table_next_column()
|
||||||
|
imgui.text(tier)
|
||||||
|
imgui.table_next_column()
|
||||||
|
imgui.text(f"{stats.get('input', 0):,}")
|
||||||
|
imgui.table_next_column()
|
||||||
|
imgui.text(f"{stats.get('output', 0):,}")
|
||||||
|
imgui.end_table()
|
||||||
|
|
||||||
imgui.separator()
|
imgui.separator()
|
||||||
|
|
||||||
# 3. Ticket Queue
|
# 4. Ticket Queue
|
||||||
imgui.text("Ticket Queue")
|
imgui.text("Ticket Queue")
|
||||||
if imgui.begin_table("mma_tickets", 4, imgui.TableFlags_.borders_inner_h | imgui.TableFlags_.resizable):
|
if imgui.begin_table("mma_tickets", 4, imgui.TableFlags_.borders_inner_h | imgui.TableFlags_.resizable):
|
||||||
imgui.table_setup_column("ID", imgui.TableColumnFlags_.width_fixed, 80)
|
imgui.table_setup_column("ID", imgui.TableColumnFlags_.width_fixed, 80)
|
||||||
|
|||||||
@@ -5,6 +5,8 @@ main_context = "MainContext.md"
|
|||||||
system_prompt = ""
|
system_prompt = ""
|
||||||
word_wrap = true
|
word_wrap = true
|
||||||
summary_only = false
|
summary_only = false
|
||||||
|
auto_scroll_comms = true
|
||||||
|
auto_scroll_tool_calls = true
|
||||||
|
|
||||||
[output]
|
[output]
|
||||||
output_dir = "./md_gen"
|
output_dir = "./md_gen"
|
||||||
@@ -42,3 +44,9 @@ search_files = true
|
|||||||
get_file_summary = true
|
get_file_summary = true
|
||||||
web_search = true
|
web_search = true
|
||||||
fetch_url = true
|
fetch_url = true
|
||||||
|
|
||||||
|
[gemini_cli]
|
||||||
|
binary_path = "gemini"
|
||||||
|
|
||||||
|
[mma]
|
||||||
|
epic = ""
|
||||||
|
|||||||
@@ -85,5 +85,5 @@ history = [
|
|||||||
|
|
||||||
[discussions."test gemini mock interaction"]
|
[discussions."test gemini mock interaction"]
|
||||||
git_commit = ""
|
git_commit = ""
|
||||||
last_updated = "2026-02-24T22:12:47"
|
last_updated = "2026-02-27T11:50:13"
|
||||||
history = []
|
history = []
|
||||||
|
|||||||
@@ -78,8 +78,8 @@ Collapsed=0
|
|||||||
DockId=0x0000000F,2
|
DockId=0x0000000F,2
|
||||||
|
|
||||||
[Window][Theme]
|
[Window][Theme]
|
||||||
Pos=0,17
|
Pos=0,21
|
||||||
Size=588,545
|
Size=639,824
|
||||||
Collapsed=0
|
Collapsed=0
|
||||||
DockId=0x00000005,1
|
DockId=0x00000005,1
|
||||||
|
|
||||||
@@ -89,14 +89,14 @@ Size=900,700
|
|||||||
Collapsed=0
|
Collapsed=0
|
||||||
|
|
||||||
[Window][Diagnostics]
|
[Window][Diagnostics]
|
||||||
Pos=590,17
|
Pos=641,21
|
||||||
Size=530,1228
|
Size=1092,908
|
||||||
Collapsed=0
|
Collapsed=0
|
||||||
DockId=0x0000000E,0
|
DockId=0x00000010,0
|
||||||
|
|
||||||
[Window][Context Hub]
|
[Window][Context Hub]
|
||||||
Pos=0,17
|
Pos=0,21
|
||||||
Size=588,545
|
Size=639,824
|
||||||
Collapsed=0
|
Collapsed=0
|
||||||
DockId=0x00000005,0
|
DockId=0x00000005,0
|
||||||
|
|
||||||
@@ -107,26 +107,26 @@ Collapsed=0
|
|||||||
DockId=0x0000000D,0
|
DockId=0x0000000D,0
|
||||||
|
|
||||||
[Window][Discussion Hub]
|
[Window][Discussion Hub]
|
||||||
Pos=1122,17
|
Pos=1735,21
|
||||||
Size=558,1228
|
Size=783,1586
|
||||||
Collapsed=0
|
Collapsed=0
|
||||||
DockId=0x00000004,0
|
DockId=0x00000004,0
|
||||||
|
|
||||||
[Window][Operations Hub]
|
[Window][Operations Hub]
|
||||||
Pos=590,17
|
Pos=641,21
|
||||||
Size=530,1228
|
Size=1092,908
|
||||||
Collapsed=0
|
Collapsed=0
|
||||||
DockId=0x0000000E,1
|
DockId=0x00000010,1
|
||||||
|
|
||||||
[Window][Files & Media]
|
[Window][Files & Media]
|
||||||
Pos=0,564
|
Pos=0,847
|
||||||
Size=588,681
|
Size=639,760
|
||||||
Collapsed=0
|
Collapsed=0
|
||||||
DockId=0x00000006,1
|
DockId=0x00000006,1
|
||||||
|
|
||||||
[Window][AI Settings]
|
[Window][AI Settings]
|
||||||
Pos=0,564
|
Pos=0,847
|
||||||
Size=588,681
|
Size=639,760
|
||||||
Collapsed=0
|
Collapsed=0
|
||||||
DockId=0x00000006,0
|
DockId=0x00000006,0
|
||||||
|
|
||||||
@@ -136,32 +136,51 @@ Size=416,325
|
|||||||
Collapsed=0
|
Collapsed=0
|
||||||
|
|
||||||
[Window][MMA Dashboard]
|
[Window][MMA Dashboard]
|
||||||
Pos=157,466
|
Pos=641,931
|
||||||
Size=676,653
|
Size=1092,676
|
||||||
Collapsed=0
|
Collapsed=0
|
||||||
|
DockId=0x00000011,0
|
||||||
|
|
||||||
[Table][0xFB6E3870,3]
|
[Window][Log Management]
|
||||||
|
Pos=1735,21
|
||||||
|
Size=783,1586
|
||||||
|
Collapsed=0
|
||||||
|
DockId=0x00000004,1
|
||||||
|
|
||||||
|
[Table][0xFB6E3870,4]
|
||||||
RefScale=13
|
RefScale=13
|
||||||
Column 0 Width=80
|
Column 0 Width=80
|
||||||
Column 1 Weight=1.0000
|
Column 1 Weight=1.0000
|
||||||
Column 2 Width=100
|
Column 2 Width=100
|
||||||
|
Column 3 Width=120
|
||||||
|
|
||||||
|
[Table][0xFC15AE63,7]
|
||||||
|
Column 0 Weight=1.4012
|
||||||
|
Column 1 Weight=0.5285
|
||||||
|
Column 2 Weight=0.5347
|
||||||
|
Column 3 Weight=2.4767
|
||||||
|
Column 4 Weight=0.6084
|
||||||
|
Column 5 Weight=0.4425
|
||||||
|
Column 6 Weight=1.0079
|
||||||
|
|
||||||
[Docking][Data]
|
[Docking][Data]
|
||||||
DockNode ID=0x00000008 Pos=3125,170 Size=593,1157 Split=Y
|
DockNode ID=0x00000008 Pos=3125,170 Size=593,1157 Split=Y
|
||||||
DockNode ID=0x00000009 Parent=0x00000008 SizeRef=1029,147 Selected=0x0469CA7A
|
DockNode ID=0x00000009 Parent=0x00000008 SizeRef=1029,147 Selected=0x0469CA7A
|
||||||
DockNode ID=0x0000000A Parent=0x00000008 SizeRef=1029,145 Selected=0xDF822E02
|
DockNode ID=0x0000000A Parent=0x00000008 SizeRef=1029,145 Selected=0xDF822E02
|
||||||
DockSpace ID=0xAFC85805 Window=0x079D3A04 Pos=0,17 Size=1680,1228 Split=Y
|
DockSpace ID=0xAFC85805 Window=0x079D3A04 Pos=0,21 Size=2518,1586 Split=Y
|
||||||
DockNode ID=0x0000000C Parent=0xAFC85805 SizeRef=1362,1041 Split=X Selected=0x5D11106F
|
DockNode ID=0x0000000C Parent=0xAFC85805 SizeRef=1362,1041 Split=X Selected=0x5D11106F
|
||||||
DockNode ID=0x00000003 Parent=0x0000000C SizeRef=1120,1183 Split=X
|
DockNode ID=0x00000003 Parent=0x0000000C SizeRef=1733,1183 Split=X
|
||||||
DockNode ID=0x0000000B Parent=0x00000003 SizeRef=404,1186 Split=Y Selected=0xF4139CA2
|
DockNode ID=0x0000000B Parent=0x00000003 SizeRef=404,1186 Split=Y Selected=0xF4139CA2
|
||||||
DockNode ID=0x00000002 Parent=0x0000000B SizeRef=1029,1119 Split=X Selected=0xF4139CA2
|
DockNode ID=0x00000002 Parent=0x0000000B SizeRef=1029,1119 Split=X Selected=0xF4139CA2
|
||||||
DockNode ID=0x00000007 Parent=0x00000002 SizeRef=588,858 Split=Y Selected=0x8CA2375C
|
DockNode ID=0x00000007 Parent=0x00000002 SizeRef=639,858 Split=Y Selected=0x8CA2375C
|
||||||
DockNode ID=0x00000005 Parent=0x00000007 SizeRef=295,545 Selected=0xF4139CA2
|
DockNode ID=0x00000005 Parent=0x00000007 SizeRef=295,824 Selected=0xF4139CA2
|
||||||
DockNode ID=0x00000006 Parent=0x00000007 SizeRef=295,636 CentralNode=1 Selected=0x7BD57D6A
|
DockNode ID=0x00000006 Parent=0x00000007 SizeRef=295,724 CentralNode=1 Selected=0x7BD57D6A
|
||||||
DockNode ID=0x0000000E Parent=0x00000002 SizeRef=530,858 Selected=0x418C7449
|
DockNode ID=0x0000000E Parent=0x00000002 SizeRef=1092,858 Split=Y Selected=0x418C7449
|
||||||
|
DockNode ID=0x00000010 Parent=0x0000000E SizeRef=868,887 Selected=0x418C7449
|
||||||
|
DockNode ID=0x00000011 Parent=0x0000000E SizeRef=868,661 Selected=0x3AEC3498
|
||||||
DockNode ID=0x00000001 Parent=0x0000000B SizeRef=1029,775 Selected=0x8B4EBFA6
|
DockNode ID=0x00000001 Parent=0x0000000B SizeRef=1029,775 Selected=0x8B4EBFA6
|
||||||
DockNode ID=0x0000000D Parent=0x00000003 SizeRef=435,1186 Selected=0x363E93D6
|
DockNode ID=0x0000000D Parent=0x00000003 SizeRef=435,1186 Selected=0x363E93D6
|
||||||
DockNode ID=0x00000004 Parent=0x0000000C SizeRef=558,1183 Selected=0x6F2B5B04
|
DockNode ID=0x00000004 Parent=0x0000000C SizeRef=783,1183 Selected=0x6F2B5B04
|
||||||
DockNode ID=0x0000000F Parent=0xAFC85805 SizeRef=1362,451 Selected=0xDD6419BC
|
DockNode ID=0x0000000F Parent=0xAFC85805 SizeRef=1362,451 Selected=0xDD6419BC
|
||||||
|
|
||||||
;;;<<<Layout_655921752_Default>>>;;;
|
;;;<<<Layout_655921752_Default>>>;;;
|
||||||
|
|||||||
@@ -109,7 +109,7 @@ class ConductorEngine:
|
|||||||
model_name="gemini-2.5-flash-lite",
|
model_name="gemini-2.5-flash-lite",
|
||||||
messages=[]
|
messages=[]
|
||||||
)
|
)
|
||||||
run_worker_lifecycle(ticket, context, event_queue=self.event_queue)
|
run_worker_lifecycle(ticket, context, event_queue=self.event_queue, engine=self)
|
||||||
await self._push_state(active_tier="Tier 2 (Tech Lead)")
|
await self._push_state(active_tier="Tier 2 (Tech Lead)")
|
||||||
|
|
||||||
def confirm_execution(payload: str, event_queue: events.AsyncEventQueue, ticket_id: str) -> bool:
|
def confirm_execution(payload: str, event_queue: events.AsyncEventQueue, ticket_id: str) -> bool:
|
||||||
@@ -152,7 +152,7 @@ def confirm_execution(payload: str, event_queue: events.AsyncEventQueue, ticket_
|
|||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files: List[str] = None, event_queue: events.AsyncEventQueue = None):
|
def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files: List[str] = None, event_queue: events.AsyncEventQueue = None, engine: Optional['ConductorEngine'] = None):
|
||||||
"""
|
"""
|
||||||
Simulates the lifecycle of a single agent working on a ticket.
|
Simulates the lifecycle of a single agent working on a ticket.
|
||||||
Calls the AI client and updates the ticket status based on the response.
|
Calls the AI client and updates the ticket status based on the response.
|
||||||
@@ -204,6 +204,15 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
|
|||||||
qa_callback=ai_client.run_tier4_analysis
|
qa_callback=ai_client.run_tier4_analysis
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Update usage in engine if provided
|
||||||
|
if engine:
|
||||||
|
stats = {} # ai_client.get_token_stats() is not available
|
||||||
|
# ai_client provides aggregate stats, for granular tier tracking
|
||||||
|
# we'd need to diff before/after or have ai_client return usage per call.
|
||||||
|
# For Phase 4, we'll use a simplified diff approach.
|
||||||
|
engine.tier_usage["Tier 3"]["input"] += stats.get("prompt_tokens", 0)
|
||||||
|
engine.tier_usage["Tier 3"]["output"] += stats.get("candidates_tokens", 0)
|
||||||
|
|
||||||
if "BLOCKED" in response.upper():
|
if "BLOCKED" in response.upper():
|
||||||
ticket.mark_blocked(response)
|
ticket.mark_blocked(response)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -108,6 +108,10 @@ def generate_tracks(user_request: str, project_config: dict, file_items: list[di
|
|||||||
json_match = json_match.split("```")[1].split("```")[0].strip()
|
json_match = json_match.split("```")[1].split("```")[0].strip()
|
||||||
|
|
||||||
tracks = json.loads(json_match)
|
tracks = json.loads(json_match)
|
||||||
|
# Ensure each track has a 'title' for the GUI
|
||||||
|
for t in tracks:
|
||||||
|
if "title" not in t:
|
||||||
|
t["title"] = t.get("goal", "Untitled Track")[:50]
|
||||||
return tracks
|
return tracks
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error parsing Tier 1 response: {e}")
|
print(f"Error parsing Tier 1 response: {e}")
|
||||||
|
|||||||
@@ -38,10 +38,20 @@ class PerformanceMonitor:
|
|||||||
|
|
||||||
def _monitor_cpu(self):
|
def _monitor_cpu(self):
|
||||||
while not self._stop_event.is_set():
|
while not self._stop_event.is_set():
|
||||||
# psutil.cpu_percent is better than process.cpu_percent for real-time
|
# psutil.cpu_percent with interval=1.0 is blocking for 1 second.
|
||||||
usage = self._process.cpu_percent(interval=1.0)
|
# To be responsive to stop_event, we use a smaller interval or no interval
|
||||||
|
# and handle the timing ourselves.
|
||||||
|
try:
|
||||||
|
usage = self._process.cpu_percent()
|
||||||
with self._cpu_lock:
|
with self._cpu_lock:
|
||||||
self._cpu_usage = usage
|
self._cpu_usage = usage
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Sleep in small increments to stay responsive to stop_event
|
||||||
|
for _ in range(10):
|
||||||
|
if self._stop_event.is_set():
|
||||||
|
break
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
|
||||||
def start_frame(self):
|
def start_frame(self):
|
||||||
|
|||||||
@@ -29,3 +29,8 @@ search_files = true
|
|||||||
get_file_summary = true
|
get_file_summary = true
|
||||||
web_search = true
|
web_search = true
|
||||||
fetch_url = true
|
fetch_url = true
|
||||||
|
|
||||||
|
[mma]
|
||||||
|
epic = ""
|
||||||
|
active_track_id = ""
|
||||||
|
tracks = []
|
||||||
|
|||||||
@@ -8,5 +8,5 @@ active = "main"
|
|||||||
|
|
||||||
[discussions.main]
|
[discussions.main]
|
||||||
git_commit = ""
|
git_commit = ""
|
||||||
last_updated = "2026-02-26T21:32:42"
|
last_updated = "2026-02-27T18:57:49"
|
||||||
history = []
|
history = []
|
||||||
|
|||||||
@@ -170,11 +170,23 @@ def load_history(project_path: str | Path) -> dict:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def clean_nones(data):
|
||||||
|
"""Recursively remove None values from a dictionary/list."""
|
||||||
|
if isinstance(data, dict):
|
||||||
|
return {k: clean_nones(v) for k, v in data.items() if v is not None}
|
||||||
|
elif isinstance(data, list):
|
||||||
|
return [clean_nones(v) for v in data if v is not None]
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
def save_project(proj: dict, path: str | Path, disc_data: dict | None = None):
|
def save_project(proj: dict, path: str | Path, disc_data: dict | None = None):
|
||||||
"""
|
"""
|
||||||
Save the project TOML.
|
Save the project TOML.
|
||||||
If 'discussion' is present in proj, it is moved to the sibling history file.
|
If 'discussion' is present in proj, it is moved to the sibling history file.
|
||||||
"""
|
"""
|
||||||
|
# Clean None values as TOML doesn't support them
|
||||||
|
proj = clean_nones(proj)
|
||||||
|
|
||||||
# Ensure 'discussion' is NOT in the main project dict
|
# Ensure 'discussion' is NOT in the main project dict
|
||||||
if "discussion" in proj:
|
if "discussion" in proj:
|
||||||
# If disc_data wasn't provided, use the one from proj
|
# If disc_data wasn't provided, use the one from proj
|
||||||
@@ -188,6 +200,7 @@ def save_project(proj: dict, path: str | Path, disc_data: dict | None = None):
|
|||||||
tomli_w.dump(proj, f)
|
tomli_w.dump(proj, f)
|
||||||
|
|
||||||
if disc_data:
|
if disc_data:
|
||||||
|
disc_data = clean_nones(disc_data)
|
||||||
hist_path = get_history_path(path)
|
hist_path = get_history_path(path)
|
||||||
with open(hist_path, "wb") as f:
|
with open(hist_path, "wb") as f:
|
||||||
tomli_w.dump(disc_data, f)
|
tomli_w.dump(disc_data, f)
|
||||||
|
|||||||
@@ -68,13 +68,13 @@ def get_model_for_role(role: str) -> str:
|
|||||||
if role == 'tier1-orchestrator' or role == 'tier1':
|
if role == 'tier1-orchestrator' or role == 'tier1':
|
||||||
return 'gemini-3.1-pro-preview'
|
return 'gemini-3.1-pro-preview'
|
||||||
elif role == 'tier2-tech-lead' or role == 'tier2':
|
elif role == 'tier2-tech-lead' or role == 'tier2':
|
||||||
return 'gemini-3-flash-preview'
|
return 'gemini-2.5-flash-lite'
|
||||||
elif role == 'tier3-worker' or role == 'tier3':
|
elif role == 'tier3-worker' or role == 'tier3':
|
||||||
return 'gemini-3-flash-preview'
|
return 'gemini-2.5-flash-lite'
|
||||||
elif role == 'tier4-qa' or role == 'tier4':
|
elif role == 'tier4-qa' or role == 'tier4':
|
||||||
return 'gemini-2.5-flash-lite'
|
return 'gemini-2.5-flash-lite'
|
||||||
else:
|
else:
|
||||||
return 'gemini-3-flash-preview'
|
return 'gemini-2.5-flash-lite'
|
||||||
|
|
||||||
def get_role_documents(role: str) -> list[str]:
|
def get_role_documents(role: str) -> list[str]:
|
||||||
if role == 'tier1-orchestrator' or role == 'tier1':
|
if role == 'tier1-orchestrator' or role == 'tier1':
|
||||||
@@ -176,12 +176,17 @@ def execute_agent(role: str, prompt: str, docs: list[str]) -> str:
|
|||||||
if role in ['tier3', 'tier3-worker']:
|
if role in ['tier3', 'tier3-worker']:
|
||||||
system_directive = "STRICT SYSTEM DIRECTIVE: You are a stateless Tier 3 Worker (Contributor). " \
|
system_directive = "STRICT SYSTEM DIRECTIVE: You are a stateless Tier 3 Worker (Contributor). " \
|
||||||
"Your goal is to implement specific code changes or tests based on the provided task. " \
|
"Your goal is to implement specific code changes or tests based on the provided task. " \
|
||||||
"You have access to tools for reading and writing files, and run_shell_command for TDD verification. " \
|
"You have access to tools for reading and writing files (e.g., read_file, write_file, replace). " \
|
||||||
|
"CRITICAL: You CANNOT execute PowerShell scripts or run shell commands directly. " \
|
||||||
|
"If you need to verify code or run tests, output the full PowerShell script inside a " \
|
||||||
|
"markdown code block (e.g., ```powershell) and state that it needs to be executed. " \
|
||||||
"Follow TDD and return success status or code changes. No pleasantries, no conversational filler."
|
"Follow TDD and return success status or code changes. No pleasantries, no conversational filler."
|
||||||
elif role in ['tier4', 'tier4-qa']:
|
elif role in ['tier4', 'tier4-qa']:
|
||||||
system_directive = "STRICT SYSTEM DIRECTIVE: You are a stateless Tier 4 QA Agent. " \
|
system_directive = "STRICT SYSTEM DIRECTIVE: You are a stateless Tier 4 QA Agent. " \
|
||||||
"Your goal is to analyze errors, summarize logs, or verify tests. " \
|
"Your goal is to analyze errors, summarize logs, or verify tests. " \
|
||||||
"You have access to tools for reading files, exploring the codebase, and run_shell_command for diagnostics. " \
|
"You have access to tools for reading files and exploring the codebase. " \
|
||||||
|
"CRITICAL: You CANNOT execute PowerShell scripts or run shell commands directly. " \
|
||||||
|
"If you need to run diagnostics, output the PowerShell script and request execution. " \
|
||||||
"ONLY output the requested analysis. No pleasantries."
|
"ONLY output the requested analysis. No pleasantries."
|
||||||
else:
|
else:
|
||||||
system_directive = f"STRICT SYSTEM DIRECTIVE: You are a stateless {role}. " \
|
system_directive = f"STRICT SYSTEM DIRECTIVE: You are a stateless {role}. " \
|
||||||
@@ -203,9 +208,11 @@ def execute_agent(role: str, prompt: str, docs: list[str]) -> str:
|
|||||||
|
|
||||||
# Use subprocess with input to pipe the prompt via stdin, avoiding WinError 206.
|
# Use subprocess with input to pipe the prompt via stdin, avoiding WinError 206.
|
||||||
# We use -p 'mma_task' to ensure non-interactive (headless) mode and valid parsing.
|
# We use -p 'mma_task' to ensure non-interactive (headless) mode and valid parsing.
|
||||||
|
# Whitelist tools to ensure they are available to the model in headless mode.
|
||||||
|
allowed_tools = "read_file,write_file,replace,list_directory,glob,grep_search,search_files,get_file_summary"
|
||||||
ps_command = (
|
ps_command = (
|
||||||
f"if (Test-Path 'C:\\projects\\misc\\setup_gemini.ps1') {{ . 'C:\\projects\\misc\\setup_gemini.ps1' }}; "
|
f"if (Test-Path 'C:\\projects\\misc\\setup_gemini.ps1') {{ . 'C:\\projects\\misc\\setup_gemini.ps1' }}; "
|
||||||
f"gemini -p 'mma_task' --allow-shell --output-format json --model {model}"
|
f"gemini -p 'mma_task' --allowed-tools {allowed_tools} --output-format json --model {model}"
|
||||||
)
|
)
|
||||||
cmd = ['powershell.exe', '-NoProfile', '-Command', ps_command]
|
cmd = ['powershell.exe', '-NoProfile', '-Command', ps_command]
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ param(
|
|||||||
[ValidateSet("Worker", "QA", "Utility")]
|
[ValidateSet("Worker", "QA", "Utility")]
|
||||||
[string]$Role = "Utility",
|
[string]$Role = "Utility",
|
||||||
|
|
||||||
[string]$Model = "flash",
|
[string]$Model = "flash-lite",
|
||||||
|
|
||||||
[switch]$ShowContext
|
[switch]$ShowContext
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -92,14 +92,7 @@ def close_session():
|
|||||||
if _comms_fh is None:
|
if _comms_fh is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Trigger auto-whitelist update for this session before closing
|
# Close files first to ensure all data is flushed to disk
|
||||||
try:
|
|
||||||
from log_registry import LogRegistry
|
|
||||||
registry = LogRegistry(str(_LOG_DIR / "log_registry.toml"))
|
|
||||||
registry.update_auto_whitelist_status(_session_id)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Warning: Could not update auto-whitelist on close: {e}")
|
|
||||||
|
|
||||||
if _comms_fh:
|
if _comms_fh:
|
||||||
_comms_fh.close()
|
_comms_fh.close()
|
||||||
_comms_fh = None
|
_comms_fh = None
|
||||||
@@ -113,6 +106,14 @@ def close_session():
|
|||||||
_cli_fh.close()
|
_cli_fh.close()
|
||||||
_cli_fh = None
|
_cli_fh = None
|
||||||
|
|
||||||
|
# Trigger auto-whitelist update for this session after closing
|
||||||
|
try:
|
||||||
|
from log_registry import LogRegistry
|
||||||
|
registry = LogRegistry(str(_LOG_DIR / "log_registry.toml"))
|
||||||
|
registry.update_auto_whitelist_status(_session_id)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Could not update auto-whitelist on close: {e}")
|
||||||
|
|
||||||
|
|
||||||
def log_api_hook(method: str, path: str, payload: str):
|
def log_api_hook(method: str, path: str, payload: str):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ class TestMMAPersistence(unittest.TestCase):
|
|||||||
def test_default_project_has_mma(self):
|
def test_default_project_has_mma(self):
|
||||||
proj = project_manager.default_project("test")
|
proj = project_manager.default_project("test")
|
||||||
self.assertIn("mma", proj)
|
self.assertIn("mma", proj)
|
||||||
self.assertEqual(proj["mma"], {"epic": "", "tracks": []})
|
self.assertEqual(proj["mma"], {"epic": "", "active_track_id": "", "tracks": []})
|
||||||
|
|
||||||
def test_save_load_mma(self):
|
def test_save_load_mma(self):
|
||||||
proj = project_manager.default_project("test")
|
proj = project_manager.default_project("test")
|
||||||
|
|||||||
37
tests.toml
37
tests.toml
@@ -19,21 +19,30 @@ files = [
|
|||||||
"tests/test_performance_monitor.py",
|
"tests/test_performance_monitor.py",
|
||||||
"tests/test_token_usage.py",
|
"tests/test_token_usage.py",
|
||||||
"tests/test_layout_reorganization.py",
|
"tests/test_layout_reorganization.py",
|
||||||
"tests/test_async_events.py"
|
"tests/test_async_events.py",
|
||||||
|
"test_mma_persistence.py",
|
||||||
|
"tests/test_session_logging.py",
|
||||||
|
"tests/test_logging_e2e.py",
|
||||||
|
"tests/test_log_management_ui.py",
|
||||||
|
"tests/test_log_pruner.py",
|
||||||
|
"tests/test_log_registry.py"
|
||||||
]
|
]
|
||||||
|
|
||||||
[categories.conductor]
|
[categories.mma]
|
||||||
description = "Conductor and MMA internal tests (Blacklisted from default core runs)"
|
description = "Manual Slop MMA Feature tests (Project Features)"
|
||||||
files = [
|
files = [
|
||||||
"tests/test_mma_exec.py",
|
"tests/test_conductor_engine.py",
|
||||||
"tests/test_mma_skeleton.py",
|
"tests/test_conductor_tech_lead.py",
|
||||||
"tests/test_conductor_api_hook_integration.py",
|
"tests/test_orchestrator_pm.py",
|
||||||
"tests/conductor/test_infrastructure.py",
|
"tests/test_orchestrator_pm_history.py",
|
||||||
"tests/test_gemini_cli_adapter.py",
|
"tests/test_orchestration_logic.py",
|
||||||
"tests/test_gemini_cli_integration.py",
|
"tests/test_mma_models.py",
|
||||||
"tests/test_ai_client_cli.py",
|
"tests/test_mma_prompts.py",
|
||||||
"tests/test_cli_tool_bridge.py",
|
"tests/test_mma_ticket_actions.py",
|
||||||
"tests/test_gemini_metrics.py"
|
"tests/test_mma_orchestration_gui.py",
|
||||||
|
"tests/test_headless_verification.py",
|
||||||
|
"tests/test_tier4_interceptor.py",
|
||||||
|
"tests/test_conductor_api_hook_integration.py"
|
||||||
]
|
]
|
||||||
|
|
||||||
[categories.integrations]
|
[categories.integrations]
|
||||||
@@ -63,5 +72,5 @@ files = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[execution]
|
[execution]
|
||||||
default_categories = ["core", "integrations", "simulations"]
|
default_categories = ["core", "mma", "integrations", "simulations"]
|
||||||
blacklist_categories = ["conductor"]
|
blacklist_categories = []
|
||||||
|
|||||||
@@ -11,6 +11,15 @@ import os
|
|||||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||||
|
|
||||||
from api_hook_client import ApiHookClient
|
from api_hook_client import ApiHookClient
|
||||||
|
import ai_client
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def reset_ai_client():
|
||||||
|
"""Reset ai_client global state between every test to prevent state pollution."""
|
||||||
|
ai_client.reset_session()
|
||||||
|
# Default to a safe model
|
||||||
|
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||||
|
yield
|
||||||
|
|
||||||
def kill_process_tree(pid):
|
def kill_process_tree(pid):
|
||||||
"""Robustly kills a process and all its children."""
|
"""Robustly kills a process and all its children."""
|
||||||
|
|||||||
@@ -33,3 +33,8 @@ search_files = true
|
|||||||
get_file_summary = true
|
get_file_summary = true
|
||||||
web_search = true
|
web_search = true
|
||||||
fetch_url = true
|
fetch_url = true
|
||||||
|
|
||||||
|
[mma]
|
||||||
|
epic = ""
|
||||||
|
active_track_id = ""
|
||||||
|
tracks = []
|
||||||
|
|||||||
@@ -10,5 +10,5 @@ auto_add = true
|
|||||||
|
|
||||||
[discussions.main]
|
[discussions.main]
|
||||||
git_commit = ""
|
git_commit = ""
|
||||||
last_updated = "2026-02-26T21:33:34"
|
last_updated = "2026-02-27T18:56:53"
|
||||||
history = []
|
history = []
|
||||||
|
|||||||
@@ -30,3 +30,6 @@ search_files = true
|
|||||||
get_file_summary = true
|
get_file_summary = true
|
||||||
web_search = true
|
web_search = true
|
||||||
fetch_url = true
|
fetch_url = true
|
||||||
|
|
||||||
|
[mma]
|
||||||
|
epic = ""
|
||||||
|
|||||||
@@ -6,10 +6,10 @@ roles = [
|
|||||||
"Reasoning",
|
"Reasoning",
|
||||||
]
|
]
|
||||||
history = []
|
history = []
|
||||||
active = "TestDisc_1772159592"
|
active = "TestDisc_1772236592"
|
||||||
auto_add = true
|
auto_add = true
|
||||||
|
|
||||||
[discussions.TestDisc_1772159592]
|
[discussions.TestDisc_1772236592]
|
||||||
git_commit = ""
|
git_commit = ""
|
||||||
last_updated = "2026-02-26T21:33:27"
|
last_updated = "2026-02-27T18:56:46"
|
||||||
history = []
|
history = []
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ base_dir = "."
|
|||||||
paths = []
|
paths = []
|
||||||
|
|
||||||
[gemini_cli]
|
[gemini_cli]
|
||||||
binary_path = "gemini"
|
binary_path = "\"C:\\projects\\manual_slop\\.venv\\Scripts\\python.exe\" \"C:\\projects\\manual_slop\\tests\\mock_gemini_cli.py\""
|
||||||
|
|
||||||
[deepseek]
|
[deepseek]
|
||||||
reasoning_effort = "medium"
|
reasoning_effort = "medium"
|
||||||
@@ -33,3 +33,8 @@ search_files = true
|
|||||||
get_file_summary = true
|
get_file_summary = true
|
||||||
web_search = true
|
web_search = true
|
||||||
fetch_url = true
|
fetch_url = true
|
||||||
|
|
||||||
|
[mma]
|
||||||
|
epic = ""
|
||||||
|
active_track_id = ""
|
||||||
|
tracks = []
|
||||||
|
|||||||
@@ -10,5 +10,5 @@ auto_add = true
|
|||||||
|
|
||||||
[discussions.main]
|
[discussions.main]
|
||||||
git_commit = ""
|
git_commit = ""
|
||||||
last_updated = "2026-02-26T21:34:05"
|
last_updated = "2026-02-27T18:57:53"
|
||||||
history = []
|
history = []
|
||||||
|
|||||||
@@ -33,3 +33,8 @@ search_files = true
|
|||||||
get_file_summary = true
|
get_file_summary = true
|
||||||
web_search = true
|
web_search = true
|
||||||
fetch_url = true
|
fetch_url = true
|
||||||
|
|
||||||
|
[mma]
|
||||||
|
epic = ""
|
||||||
|
active_track_id = ""
|
||||||
|
tracks = []
|
||||||
|
|||||||
@@ -10,5 +10,5 @@ auto_add = true
|
|||||||
|
|
||||||
[discussions.main]
|
[discussions.main]
|
||||||
git_commit = ""
|
git_commit = ""
|
||||||
last_updated = "2026-02-26T21:33:51"
|
last_updated = "2026-02-27T18:57:10"
|
||||||
history = []
|
history = []
|
||||||
|
|||||||
@@ -33,3 +33,8 @@ search_files = true
|
|||||||
get_file_summary = true
|
get_file_summary = true
|
||||||
web_search = true
|
web_search = true
|
||||||
fetch_url = true
|
fetch_url = true
|
||||||
|
|
||||||
|
[mma]
|
||||||
|
epic = ""
|
||||||
|
active_track_id = ""
|
||||||
|
tracks = []
|
||||||
|
|||||||
@@ -5,17 +5,20 @@ roles = [
|
|||||||
"System",
|
"System",
|
||||||
"Reasoning",
|
"Reasoning",
|
||||||
]
|
]
|
||||||
active = "mma_human veriffication"
|
active = "AutoDisc"
|
||||||
auto_add = true
|
auto_add = true
|
||||||
|
|
||||||
[discussions.main]
|
[discussions.main]
|
||||||
git_commit = ""
|
git_commit = ""
|
||||||
last_updated = "2026-02-26T22:00:42"
|
last_updated = "2026-02-27T18:58:03"
|
||||||
history = [
|
history = [
|
||||||
"@2026-02-26T22:00:02\nSystem:\n[PERFORMANCE ALERT] CPU usage high: 92.2%. Please consider optimizing recent changes or reducing load.",
|
"@2026-02-27T18:57:55\nUser:\nHello! This is an automated test. Just say 'Acknowledged'.",
|
||||||
|
"@2026-02-27T18:58:02\nAI:\nAcknowledged",
|
||||||
]
|
]
|
||||||
|
|
||||||
[discussions."mma_human veriffication"]
|
[discussions.AutoDisc]
|
||||||
git_commit = ""
|
git_commit = ""
|
||||||
last_updated = "2026-02-26T22:07:06"
|
last_updated = "2026-02-27T19:23:11"
|
||||||
history = []
|
history = [
|
||||||
|
"@2026-02-27T19:08:37\nSystem:\n[PERFORMANCE ALERT] Frame time high: 62.2ms. Please consider optimizing recent changes or reducing load.",
|
||||||
|
]
|
||||||
|
|||||||
@@ -1,45 +1,44 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock, patch
|
||||||
import ai_client
|
import ai_client
|
||||||
|
|
||||||
|
class MockUsage:
|
||||||
|
def __init__(self):
|
||||||
|
self.prompt_token_count = 10
|
||||||
|
self.candidates_token_count = 5
|
||||||
|
self.total_token_count = 15
|
||||||
|
self.cached_content_token_count = 0
|
||||||
|
|
||||||
|
class MockPart:
|
||||||
|
def __init__(self, text, function_call):
|
||||||
|
self.text = text
|
||||||
|
self.function_call = function_call
|
||||||
|
|
||||||
|
class MockContent:
|
||||||
|
def __init__(self, parts):
|
||||||
|
self.parts = parts
|
||||||
|
|
||||||
|
class MockCandidate:
|
||||||
|
def __init__(self, parts):
|
||||||
|
self.content = MockContent(parts)
|
||||||
|
self.finish_reason = MagicMock()
|
||||||
|
self.finish_reason.name = "STOP"
|
||||||
|
|
||||||
def test_ai_client_event_emitter_exists():
|
def test_ai_client_event_emitter_exists():
|
||||||
# This should fail initially because 'events' won't exist on ai_client
|
# This should fail initially because 'events' won't exist on ai_client
|
||||||
assert hasattr(ai_client, 'events')
|
assert hasattr(ai_client, 'events')
|
||||||
assert ai_client.events is not None
|
|
||||||
|
|
||||||
def test_event_emission():
|
def test_event_emission():
|
||||||
# We'll expect these event names based on the spec
|
callback = MagicMock()
|
||||||
mock_callback = MagicMock()
|
ai_client.events.on("test_event", callback)
|
||||||
ai_client.events.on("request_start", mock_callback)
|
ai_client.events.emit("test_event", payload={"data": 123})
|
||||||
|
callback.assert_called_once_with(payload={"data": 123})
|
||||||
# Trigger something that should emit the event (once implemented)
|
|
||||||
# For now, we just test the emitter itself if we were to call it manually
|
|
||||||
ai_client.events.emit("request_start", payload={"model": "test"})
|
|
||||||
|
|
||||||
mock_callback.assert_called_once_with(payload={"model": "test"})
|
|
||||||
|
|
||||||
def test_send_emits_events():
|
def test_send_emits_events():
|
||||||
from unittest.mock import patch, MagicMock
|
with patch("ai_client._send_gemini") as mock_send_gemini, \
|
||||||
|
patch("ai_client._send_anthropic") as mock_send_anthropic:
|
||||||
|
|
||||||
# We need to mock _ensure_gemini_client and the chat object it creates
|
mock_send_gemini.return_value = "gemini response"
|
||||||
with patch("ai_client._ensure_gemini_client"), \
|
|
||||||
patch("ai_client._gemini_client") as mock_client, \
|
|
||||||
patch("ai_client._gemini_chat") as mock_chat:
|
|
||||||
|
|
||||||
# Setup mock response
|
|
||||||
mock_response = MagicMock()
|
|
||||||
mock_response.candidates = []
|
|
||||||
# Explicitly set usage_metadata as a mock with integer values
|
|
||||||
mock_usage = MagicMock()
|
|
||||||
mock_usage.prompt_token_count = 10
|
|
||||||
mock_usage.candidates_token_count = 5
|
|
||||||
mock_usage.cached_content_token_count = None
|
|
||||||
mock_response.usage_metadata = mock_usage
|
|
||||||
mock_chat.send_message.return_value = mock_response
|
|
||||||
mock_client.chats.create.return_value = mock_chat
|
|
||||||
|
|
||||||
ai_client.set_provider("gemini", "gemini-flash")
|
|
||||||
|
|
||||||
start_callback = MagicMock()
|
start_callback = MagicMock()
|
||||||
response_callback = MagicMock()
|
response_callback = MagicMock()
|
||||||
@@ -47,53 +46,69 @@ def test_send_emits_events():
|
|||||||
ai_client.events.on("request_start", start_callback)
|
ai_client.events.on("request_start", start_callback)
|
||||||
ai_client.events.on("response_received", response_callback)
|
ai_client.events.on("response_received", response_callback)
|
||||||
|
|
||||||
# We need to bypass the context changed check or set it up
|
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||||
|
ai_client.send("context", "message")
|
||||||
|
|
||||||
|
# We mocked _send_gemini so it doesn't emit events inside.
|
||||||
|
# But wait, ai_client.send itself emits request_start and response_received?
|
||||||
|
# Actually, ai_client.send delegates to _send_gemini.
|
||||||
|
# Let's mock _gemini_client instead to let _send_gemini run and emit events.
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test_send_emits_events_proper():
|
||||||
|
with patch("ai_client._ensure_gemini_client"), \
|
||||||
|
patch("ai_client._gemini_client") as mock_client:
|
||||||
|
|
||||||
|
mock_chat = MagicMock()
|
||||||
|
mock_client.chats.create.return_value = mock_chat
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.candidates = [MockCandidate([MockPart("gemini response", None)])]
|
||||||
|
mock_response.usage_metadata = MockUsage()
|
||||||
|
mock_chat.send_message.return_value = mock_response
|
||||||
|
|
||||||
|
start_callback = MagicMock()
|
||||||
|
response_callback = MagicMock()
|
||||||
|
|
||||||
|
ai_client.events.on("request_start", start_callback)
|
||||||
|
ai_client.events.on("response_received", response_callback)
|
||||||
|
|
||||||
|
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||||
ai_client.send("context", "message")
|
ai_client.send("context", "message")
|
||||||
|
|
||||||
assert start_callback.called
|
assert start_callback.called
|
||||||
assert response_callback.called
|
assert response_callback.called
|
||||||
|
|
||||||
# Check payload
|
|
||||||
args, kwargs = start_callback.call_args
|
args, kwargs = start_callback.call_args
|
||||||
assert kwargs['payload']['provider'] == 'gemini'
|
assert kwargs['payload']['provider'] == 'gemini'
|
||||||
|
|
||||||
def test_send_emits_tool_events():
|
def test_send_emits_tool_events():
|
||||||
from unittest.mock import patch, MagicMock
|
import mcp_client
|
||||||
|
|
||||||
with patch("ai_client._ensure_gemini_client"), \
|
with patch("ai_client._ensure_gemini_client"), \
|
||||||
patch("ai_client._gemini_client") as mock_client, \
|
patch("ai_client._gemini_client") as mock_client, \
|
||||||
patch("ai_client._gemini_chat") as mock_chat, \
|
|
||||||
patch("mcp_client.dispatch") as mock_dispatch:
|
patch("mcp_client.dispatch") as mock_dispatch:
|
||||||
|
|
||||||
|
mock_chat = MagicMock()
|
||||||
|
mock_client.chats.create.return_value = mock_chat
|
||||||
|
|
||||||
# 1. Setup mock response with a tool call
|
# 1. Setup mock response with a tool call
|
||||||
mock_fc = MagicMock()
|
mock_fc = MagicMock()
|
||||||
mock_fc.name = "read_file"
|
mock_fc.name = "read_file"
|
||||||
mock_fc.args = {"path": "test.txt"}
|
mock_fc.args = {"path": "test.txt"}
|
||||||
|
|
||||||
mock_response_with_tool = MagicMock()
|
mock_response_with_tool = MagicMock()
|
||||||
mock_response_with_tool.candidates = [MagicMock()]
|
mock_response_with_tool.candidates = [MockCandidate([MockPart("tool call text", mock_fc)])]
|
||||||
mock_part = MagicMock()
|
mock_response_with_tool.usage_metadata = MockUsage()
|
||||||
mock_part.text = "tool call text"
|
|
||||||
mock_part.function_call = mock_fc
|
|
||||||
mock_response_with_tool.candidates[0].content.parts = [mock_part]
|
|
||||||
mock_response_with_tool.candidates[0].finish_reason.name = "STOP"
|
|
||||||
|
|
||||||
# Setup mock usage
|
|
||||||
mock_usage = MagicMock()
|
|
||||||
mock_usage.prompt_token_count = 10
|
|
||||||
mock_usage.candidates_token_count = 5
|
|
||||||
mock_usage.cached_content_token_count = None
|
|
||||||
mock_response_with_tool.usage_metadata = mock_usage
|
|
||||||
|
|
||||||
# 2. Setup second mock response (final answer)
|
# 2. Setup second mock response (final answer)
|
||||||
mock_response_final = MagicMock()
|
mock_response_final = MagicMock()
|
||||||
mock_response_final.candidates = []
|
mock_response_final.candidates = [MockCandidate([MockPart("final answer", None)])]
|
||||||
mock_response_final.usage_metadata = mock_usage
|
mock_response_final.usage_metadata = MockUsage()
|
||||||
|
|
||||||
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
|
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
|
||||||
mock_dispatch.return_value = "file content"
|
mock_dispatch.return_value = "file content"
|
||||||
|
|
||||||
ai_client.set_provider("gemini", "gemini-flash")
|
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||||
|
|
||||||
tool_callback = MagicMock()
|
tool_callback = MagicMock()
|
||||||
ai_client.events.on("tool_execution", tool_callback)
|
ai_client.events.on("tool_execution", tool_callback)
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import pytest
|
import pytest
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
from models import Ticket, Track, WorkerContext
|
from models import Ticket, Track, WorkerContext
|
||||||
|
import ai_client
|
||||||
|
|
||||||
# These tests define the expected interface for multi_agent_conductor.py
|
# These tests define the expected interface for multi_agent_conductor.py
|
||||||
# which will be implemented in the next phase of TDD.
|
# which will be implemented in the next phase of TDD.
|
||||||
@@ -14,7 +15,8 @@ def test_conductor_engine_initialization():
|
|||||||
engine = ConductorEngine(track=track)
|
engine = ConductorEngine(track=track)
|
||||||
assert engine.track == track
|
assert engine.track == track
|
||||||
|
|
||||||
def test_conductor_engine_run_linear_executes_tickets_in_order():
|
@pytest.mark.asyncio
|
||||||
|
async def test_conductor_engine_run_linear_executes_tickets_in_order(monkeypatch):
|
||||||
"""
|
"""
|
||||||
Test that run_linear iterates through executable tickets and calls the worker lifecycle.
|
Test that run_linear iterates through executable tickets and calls the worker lifecycle.
|
||||||
"""
|
"""
|
||||||
@@ -25,15 +27,19 @@ def test_conductor_engine_run_linear_executes_tickets_in_order():
|
|||||||
from multi_agent_conductor import ConductorEngine
|
from multi_agent_conductor import ConductorEngine
|
||||||
engine = ConductorEngine(track=track)
|
engine = ConductorEngine(track=track)
|
||||||
|
|
||||||
|
# Mock ai_client.send using monkeypatch
|
||||||
|
mock_send = MagicMock()
|
||||||
|
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||||
|
|
||||||
# We mock run_worker_lifecycle as it is expected to be in the same module
|
# We mock run_worker_lifecycle as it is expected to be in the same module
|
||||||
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
|
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
|
||||||
# Mocking lifecycle to mark ticket as complete so dependencies can be resolved
|
# Mocking lifecycle to mark ticket as complete so dependencies can be resolved
|
||||||
def side_effect(ticket, context):
|
def side_effect(ticket, context, *args, **kwargs):
|
||||||
ticket.mark_complete()
|
ticket.mark_complete()
|
||||||
return "Success"
|
return "Success"
|
||||||
mock_lifecycle.side_effect = side_effect
|
mock_lifecycle.side_effect = side_effect
|
||||||
|
|
||||||
engine.run_linear()
|
await engine.run_linear()
|
||||||
|
|
||||||
# Track.get_executable_tickets() should be called repeatedly until all are done
|
# Track.get_executable_tickets() should be called repeatedly until all are done
|
||||||
# T1 should run first, then T2.
|
# T1 should run first, then T2.
|
||||||
@@ -46,7 +52,8 @@ def test_conductor_engine_run_linear_executes_tickets_in_order():
|
|||||||
assert calls[0][0][0].id == "T1"
|
assert calls[0][0][0].id == "T1"
|
||||||
assert calls[1][0][0].id == "T2"
|
assert calls[1][0][0].id == "T2"
|
||||||
|
|
||||||
def test_run_worker_lifecycle_calls_ai_client_send():
|
@pytest.mark.asyncio
|
||||||
|
async def test_run_worker_lifecycle_calls_ai_client_send(monkeypatch):
|
||||||
"""
|
"""
|
||||||
Test that run_worker_lifecycle triggers the AI client and updates ticket status on success.
|
Test that run_worker_lifecycle triggers the AI client and updates ticket status on success.
|
||||||
"""
|
"""
|
||||||
@@ -55,7 +62,10 @@ def test_run_worker_lifecycle_calls_ai_client_send():
|
|||||||
|
|
||||||
from multi_agent_conductor import run_worker_lifecycle
|
from multi_agent_conductor import run_worker_lifecycle
|
||||||
|
|
||||||
with patch("ai_client.send") as mock_send:
|
# Mock ai_client.send using monkeypatch
|
||||||
|
mock_send = MagicMock()
|
||||||
|
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||||
|
|
||||||
mock_send.return_value = "Task complete. I have updated the file."
|
mock_send.return_value = "Task complete. I have updated the file."
|
||||||
|
|
||||||
result = run_worker_lifecycle(ticket, context)
|
result = run_worker_lifecycle(ticket, context)
|
||||||
@@ -69,7 +79,8 @@ def test_run_worker_lifecycle_calls_ai_client_send():
|
|||||||
# user_message is passed as a keyword argument
|
# user_message is passed as a keyword argument
|
||||||
assert ticket.description in kwargs["user_message"]
|
assert ticket.description in kwargs["user_message"]
|
||||||
|
|
||||||
def test_run_worker_lifecycle_context_injection():
|
@pytest.mark.asyncio
|
||||||
|
async def test_run_worker_lifecycle_context_injection(monkeypatch):
|
||||||
"""
|
"""
|
||||||
Test that run_worker_lifecycle can take a context_files list and injects AST views into the prompt.
|
Test that run_worker_lifecycle can take a context_files list and injects AST views into the prompt.
|
||||||
"""
|
"""
|
||||||
@@ -79,9 +90,12 @@ def test_run_worker_lifecycle_context_injection():
|
|||||||
|
|
||||||
from multi_agent_conductor import run_worker_lifecycle
|
from multi_agent_conductor import run_worker_lifecycle
|
||||||
|
|
||||||
|
# Mock ai_client.send using monkeypatch
|
||||||
|
mock_send = MagicMock()
|
||||||
|
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||||
|
|
||||||
# We mock ASTParser which is expected to be imported in multi_agent_conductor
|
# We mock ASTParser which is expected to be imported in multi_agent_conductor
|
||||||
with patch("ai_client.send") as mock_send, \
|
with patch("multi_agent_conductor.ASTParser") as mock_ast_parser_class, \
|
||||||
patch("multi_agent_conductor.ASTParser") as mock_ast_parser_class, \
|
|
||||||
patch("builtins.open", new_callable=MagicMock) as mock_open:
|
patch("builtins.open", new_callable=MagicMock) as mock_open:
|
||||||
|
|
||||||
# Setup open mock to return different content for different files
|
# Setup open mock to return different content for different files
|
||||||
@@ -121,7 +135,8 @@ def test_run_worker_lifecycle_context_injection():
|
|||||||
assert "primary.py" in user_message
|
assert "primary.py" in user_message
|
||||||
assert "secondary.py" in user_message
|
assert "secondary.py" in user_message
|
||||||
|
|
||||||
def test_run_worker_lifecycle_handles_blocked_response():
|
@pytest.mark.asyncio
|
||||||
|
async def test_run_worker_lifecycle_handles_blocked_response(monkeypatch):
|
||||||
"""
|
"""
|
||||||
Test that run_worker_lifecycle marks the ticket as blocked if the AI indicates it cannot proceed.
|
Test that run_worker_lifecycle marks the ticket as blocked if the AI indicates it cannot proceed.
|
||||||
"""
|
"""
|
||||||
@@ -130,7 +145,10 @@ def test_run_worker_lifecycle_handles_blocked_response():
|
|||||||
|
|
||||||
from multi_agent_conductor import run_worker_lifecycle
|
from multi_agent_conductor import run_worker_lifecycle
|
||||||
|
|
||||||
with patch("ai_client.send") as mock_send:
|
# Mock ai_client.send using monkeypatch
|
||||||
|
mock_send = MagicMock()
|
||||||
|
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||||
|
|
||||||
# Simulate a response indicating a block
|
# Simulate a response indicating a block
|
||||||
mock_send.return_value = "I am BLOCKED because I don't have enough information."
|
mock_send.return_value = "I am BLOCKED because I don't have enough information."
|
||||||
|
|
||||||
@@ -139,7 +157,8 @@ def test_run_worker_lifecycle_handles_blocked_response():
|
|||||||
assert ticket.status == "blocked"
|
assert ticket.status == "blocked"
|
||||||
assert "BLOCKED" in ticket.blocked_reason
|
assert "BLOCKED" in ticket.blocked_reason
|
||||||
|
|
||||||
def test_run_worker_lifecycle_step_mode_confirmation():
|
@pytest.mark.asyncio
|
||||||
|
async def test_run_worker_lifecycle_step_mode_confirmation(monkeypatch):
|
||||||
"""
|
"""
|
||||||
Test that run_worker_lifecycle passes confirm_execution to ai_client.send when step_mode is True.
|
Test that run_worker_lifecycle passes confirm_execution to ai_client.send when step_mode is True.
|
||||||
Verify that if confirm_execution is called (simulated by mocking ai_client.send to call its callback),
|
Verify that if confirm_execution is called (simulated by mocking ai_client.send to call its callback),
|
||||||
@@ -148,13 +167,16 @@ def test_run_worker_lifecycle_step_mode_confirmation():
|
|||||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
|
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
|
||||||
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
||||||
|
|
||||||
from multi_agent_conductor import run_worker_lifecycle, confirm_execution
|
from multi_agent_conductor import run_worker_lifecycle
|
||||||
|
|
||||||
with patch("ai_client.send") as mock_send, \
|
# Mock ai_client.send using monkeypatch
|
||||||
patch("multi_agent_conductor.confirm_execution") as mock_confirm:
|
mock_send = MagicMock()
|
||||||
|
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||||
|
|
||||||
|
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
|
||||||
|
|
||||||
# We simulate ai_client.send by making it call the pre_tool_callback it received
|
# We simulate ai_client.send by making it call the pre_tool_callback it received
|
||||||
def mock_send_side_effect(*args, **kwargs):
|
def mock_send_side_effect(md_content, user_message, **kwargs):
|
||||||
callback = kwargs.get("pre_tool_callback")
|
callback = kwargs.get("pre_tool_callback")
|
||||||
if callback:
|
if callback:
|
||||||
# Simulate calling it with some payload
|
# Simulate calling it with some payload
|
||||||
@@ -164,13 +186,15 @@ def test_run_worker_lifecycle_step_mode_confirmation():
|
|||||||
mock_send.side_effect = mock_send_side_effect
|
mock_send.side_effect = mock_send_side_effect
|
||||||
mock_confirm.return_value = True
|
mock_confirm.return_value = True
|
||||||
|
|
||||||
run_worker_lifecycle(ticket, context)
|
mock_event_queue = MagicMock()
|
||||||
|
run_worker_lifecycle(ticket, context, event_queue=mock_event_queue)
|
||||||
|
|
||||||
# Verify confirm_execution was called
|
# Verify confirm_execution was called
|
||||||
mock_confirm.assert_called_once()
|
mock_confirm.assert_called_once()
|
||||||
assert ticket.status == "completed"
|
assert ticket.status == "completed"
|
||||||
|
|
||||||
def test_run_worker_lifecycle_step_mode_rejection():
|
@pytest.mark.asyncio
|
||||||
|
async def test_run_worker_lifecycle_step_mode_rejection(monkeypatch):
|
||||||
"""
|
"""
|
||||||
Verify that if confirm_execution returns False, the logic (in ai_client, which we simulate here)
|
Verify that if confirm_execution returns False, the logic (in ai_client, which we simulate here)
|
||||||
would prevent execution. In run_worker_lifecycle, we just check if it's passed.
|
would prevent execution. In run_worker_lifecycle, we just check if it's passed.
|
||||||
@@ -180,8 +204,11 @@ def test_run_worker_lifecycle_step_mode_rejection():
|
|||||||
|
|
||||||
from multi_agent_conductor import run_worker_lifecycle
|
from multi_agent_conductor import run_worker_lifecycle
|
||||||
|
|
||||||
with patch("ai_client.send") as mock_send, \
|
# Mock ai_client.send using monkeypatch
|
||||||
patch("multi_agent_conductor.confirm_execution") as mock_confirm:
|
mock_send = MagicMock()
|
||||||
|
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||||
|
|
||||||
|
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
|
||||||
|
|
||||||
mock_confirm.return_value = False
|
mock_confirm.return_value = False
|
||||||
mock_send.return_value = "Task failed because tool execution was rejected."
|
mock_send.return_value = "Task failed because tool execution was rejected."
|
||||||
@@ -195,7 +222,8 @@ def test_run_worker_lifecycle_step_mode_rejection():
|
|||||||
# Since we've already tested ai_client's implementation of pre_tool_callback (mentally or via other tests),
|
# Since we've already tested ai_client's implementation of pre_tool_callback (mentally or via other tests),
|
||||||
# here we just verify the wiring.
|
# here we just verify the wiring.
|
||||||
|
|
||||||
def test_conductor_engine_dynamic_parsing_and_execution():
|
@pytest.mark.asyncio
|
||||||
|
async def test_conductor_engine_dynamic_parsing_and_execution(monkeypatch):
|
||||||
"""
|
"""
|
||||||
Test that parse_json_tickets correctly populates the track and run_linear executes them in dependency order.
|
Test that parse_json_tickets correctly populates the track and run_linear executes them in dependency order.
|
||||||
"""
|
"""
|
||||||
@@ -236,14 +264,18 @@ def test_conductor_engine_dynamic_parsing_and_execution():
|
|||||||
assert engine.track.tickets[1].id == "T2"
|
assert engine.track.tickets[1].id == "T2"
|
||||||
assert engine.track.tickets[2].id == "T3"
|
assert engine.track.tickets[2].id == "T3"
|
||||||
|
|
||||||
|
# Mock ai_client.send using monkeypatch
|
||||||
|
mock_send = MagicMock()
|
||||||
|
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||||
|
|
||||||
# Mock run_worker_lifecycle to mark tickets as complete
|
# Mock run_worker_lifecycle to mark tickets as complete
|
||||||
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
|
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
|
||||||
def side_effect(ticket, context):
|
def side_effect(ticket, context, *args, **kwargs):
|
||||||
ticket.mark_complete()
|
ticket.mark_complete()
|
||||||
return "Success"
|
return "Success"
|
||||||
mock_lifecycle.side_effect = side_effect
|
mock_lifecycle.side_effect = side_effect
|
||||||
|
|
||||||
engine.run_linear()
|
await engine.run_linear()
|
||||||
|
|
||||||
assert mock_lifecycle.call_count == 3
|
assert mock_lifecycle.call_count == 3
|
||||||
|
|
||||||
|
|||||||
@@ -23,12 +23,11 @@ class TestConductorTechLead(unittest.TestCase):
|
|||||||
|
|
||||||
track_brief = "Test track brief"
|
track_brief = "Test track brief"
|
||||||
module_skeletons = "Test skeletons"
|
module_skeletons = "Test skeletons"
|
||||||
|
|
||||||
# Call the function
|
# Call the function
|
||||||
tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
|
tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
|
||||||
|
|
||||||
# Verify set_provider was called
|
# Verify set_provider was called
|
||||||
mock_set_provider.assert_called_with('gemini', 'gemini-1.5-flash')
|
mock_set_provider.assert_called_with('gemini', 'gemini-2.5-flash-lite')
|
||||||
mock_reset_session.assert_called_once()
|
mock_reset_session.assert_called_once()
|
||||||
|
|
||||||
# Verify send was called
|
# Verify send was called
|
||||||
|
|||||||
@@ -1,24 +1,10 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Mock imgui and other heavy dependencies before importing App
|
# We can safely import gui_2 if we don't instantiate App without mocking its threads
|
||||||
import sys
|
import gui_2
|
||||||
sys.modules['imgui_bundle'] = MagicMock()
|
|
||||||
sys.modules['imgui_bundle.imgui'] = MagicMock()
|
|
||||||
sys.modules['imgui_bundle.hello_imgui'] = MagicMock()
|
|
||||||
sys.modules['imgui_bundle.immapp'] = MagicMock()
|
|
||||||
|
|
||||||
# Mock tkinter
|
|
||||||
sys.modules['tkinter'] = MagicMock()
|
|
||||||
sys.modules['tkinter.filedialog'] = MagicMock()
|
|
||||||
|
|
||||||
# Mock ai_client and session_logger
|
|
||||||
sys.modules['ai_client'] = MagicMock()
|
|
||||||
sys.modules['session_logger'] = MagicMock()
|
|
||||||
|
|
||||||
from gui_2 import App
|
from gui_2 import App
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
@@ -46,45 +32,58 @@ history = []
|
|||||||
""", encoding="utf-8")
|
""", encoding="utf-8")
|
||||||
return project_path
|
return project_path
|
||||||
|
|
||||||
def test_log_management_init(mock_config, mock_project, monkeypatch):
|
@pytest.fixture
|
||||||
|
def app_instance(mock_config, mock_project, monkeypatch):
|
||||||
monkeypatch.setattr("gui_2.CONFIG_PATH", mock_config)
|
monkeypatch.setattr("gui_2.CONFIG_PATH", mock_config)
|
||||||
|
|
||||||
with patch("project_manager.load_project") as mock_load:
|
with patch("project_manager.load_project") as mock_load, \
|
||||||
|
patch("session_logger.open_session"):
|
||||||
|
|
||||||
mock_load.return_value = {
|
mock_load.return_value = {
|
||||||
"project": {"name": "test"},
|
"project": {"name": "test"},
|
||||||
"discussion": {"roles": ["User", "AI"], "active": "main", "discussions": {"main": {"history": []}}},
|
"discussion": {"roles": ["User", "AI"], "active": "main", "discussions": {"main": {"history": []}}},
|
||||||
"files": {"paths": []},
|
"files": {"paths": []},
|
||||||
"screenshots": {"paths": []}
|
"screenshots": {"paths": []}
|
||||||
}
|
}
|
||||||
with patch("session_logger.open_session"):
|
|
||||||
|
# Mock the __init__ to do nothing, then set the fields we need manually
|
||||||
|
with patch.object(App, '__init__', lambda self: None):
|
||||||
app = App()
|
app = App()
|
||||||
|
app.show_windows = {"Log Management": False}
|
||||||
|
app.ui_state = MagicMock()
|
||||||
|
app.ui_files_base_dir = "."
|
||||||
|
app.files = []
|
||||||
|
|
||||||
# Check if Log Management is in show_windows
|
# Since we bypassed __init__, we need to bind the method manually
|
||||||
|
# but python allows calling it directly.
|
||||||
|
return app
|
||||||
|
|
||||||
|
def test_log_management_init(app_instance):
|
||||||
|
app = app_instance
|
||||||
assert "Log Management" in app.show_windows
|
assert "Log Management" in app.show_windows
|
||||||
assert app.show_windows["Log Management"] is False # Default as set in __init__
|
assert app.show_windows["Log Management"] is False
|
||||||
|
|
||||||
# Check if _render_log_management exists
|
|
||||||
assert hasattr(app, "_render_log_management")
|
assert hasattr(app, "_render_log_management")
|
||||||
assert callable(app._render_log_management)
|
assert callable(app._render_log_management)
|
||||||
|
|
||||||
def test_render_log_management_logic(mock_config, mock_project, monkeypatch):
|
def test_render_log_management_logic(app_instance):
|
||||||
monkeypatch.setattr("gui_2.CONFIG_PATH", mock_config)
|
app = app_instance
|
||||||
|
|
||||||
with patch("project_manager.load_project") as mock_load:
|
|
||||||
mock_load.return_value = {
|
|
||||||
"project": {"name": "test"},
|
|
||||||
"discussion": {"roles": ["User", "AI"], "active": "main", "discussions": {"main": {"history": []}}},
|
|
||||||
"files": {"paths": []},
|
|
||||||
"screenshots": {"paths": []}
|
|
||||||
}
|
|
||||||
with patch("session_logger.open_session"):
|
|
||||||
app = App()
|
|
||||||
app.show_windows["Log Management"] = True
|
app.show_windows["Log Management"] = True
|
||||||
|
|
||||||
from imgui_bundle import imgui
|
|
||||||
|
|
||||||
# Mock LogRegistry
|
# Mock LogRegistry
|
||||||
with patch("gui_2.LogRegistry") as MockRegistry:
|
with patch("gui_2.LogRegistry") as MockRegistry, \
|
||||||
|
patch("gui_2.imgui.begin") as mock_begin, \
|
||||||
|
patch("gui_2.imgui.begin_table") as mock_begin_table, \
|
||||||
|
patch("gui_2.imgui.text") as mock_text, \
|
||||||
|
patch("gui_2.imgui.end_table") as mock_end_table, \
|
||||||
|
patch("gui_2.imgui.end") as mock_end, \
|
||||||
|
patch("gui_2.imgui.push_style_color"), \
|
||||||
|
patch("gui_2.imgui.pop_style_color"), \
|
||||||
|
patch("gui_2.imgui.table_setup_column"), \
|
||||||
|
patch("gui_2.imgui.table_headers_row"), \
|
||||||
|
patch("gui_2.imgui.table_next_row"), \
|
||||||
|
patch("gui_2.imgui.table_next_column"), \
|
||||||
|
patch("gui_2.imgui.button"):
|
||||||
|
|
||||||
mock_reg = MockRegistry.return_value
|
mock_reg = MockRegistry.return_value
|
||||||
mock_reg.data = {
|
mock_reg.data = {
|
||||||
"session_1": {
|
"session_1": {
|
||||||
@@ -94,16 +93,11 @@ def test_render_log_management_logic(mock_config, mock_project, monkeypatch):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Mock imgui.begin to return (True, True)
|
mock_begin.return_value = (True, True)
|
||||||
imgui.begin.return_value = (True, True)
|
mock_begin_table.return_value = True
|
||||||
imgui.begin_table.return_value = True
|
|
||||||
|
|
||||||
# Call render
|
|
||||||
app._render_log_management()
|
app._render_log_management()
|
||||||
|
|
||||||
# Verify imgui calls
|
mock_begin.assert_called_with("Log Management", app.show_windows["Log Management"])
|
||||||
imgui.begin.assert_called_with("Log Management", True)
|
mock_begin_table.assert_called()
|
||||||
imgui.begin_table.assert_called()
|
mock_text.assert_any_call("session_1")
|
||||||
|
|
||||||
# Check for "session_1" text
|
|
||||||
imgui.text.assert_any_call("session_1")
|
|
||||||
|
|||||||
@@ -10,14 +10,20 @@ from log_registry import LogRegistry
|
|||||||
from log_pruner import LogPruner
|
from log_pruner import LogPruner
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def e2e_setup(tmp_path):
|
def e2e_setup(tmp_path, monkeypatch):
|
||||||
|
# Ensure closed before starting
|
||||||
|
session_logger.close_session()
|
||||||
|
monkeypatch.setattr(session_logger, "_comms_fh", None)
|
||||||
|
|
||||||
# Mock _LOG_DIR and _SCRIPTS_DIR in session_logger
|
# Mock _LOG_DIR and _SCRIPTS_DIR in session_logger
|
||||||
original_log_dir = session_logger._LOG_DIR
|
original_log_dir = session_logger._LOG_DIR
|
||||||
session_logger._LOG_DIR = tmp_path / "logs"
|
session_logger._LOG_DIR = tmp_path / "logs"
|
||||||
|
monkeypatch.setattr(session_logger, "_LOG_DIR", tmp_path / "logs")
|
||||||
session_logger._LOG_DIR.mkdir(parents=True, exist_ok=True)
|
session_logger._LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
original_scripts_dir = session_logger._SCRIPTS_DIR
|
original_scripts_dir = session_logger._SCRIPTS_DIR
|
||||||
session_logger._SCRIPTS_DIR = tmp_path / "scripts" / "generated"
|
session_logger._SCRIPTS_DIR = tmp_path / "scripts" / "generated"
|
||||||
|
monkeypatch.setattr(session_logger, "_SCRIPTS_DIR", tmp_path / "scripts" / "generated")
|
||||||
session_logger._SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
|
session_logger._SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
yield tmp_path
|
yield tmp_path
|
||||||
|
|||||||
@@ -54,13 +54,17 @@ def test_cb_plan_epic_launches_thread(app_instance):
|
|||||||
|
|
||||||
mock_tracks = [{"id": "track_1", "title": "Test Track"}]
|
mock_tracks = [{"id": "track_1", "title": "Test Track"}]
|
||||||
|
|
||||||
with patch('orchestrator_pm.get_track_history_summary', return_value="History summary") as mock_get_history,
|
with (
|
||||||
|
patch('orchestrator_pm.get_track_history_summary', return_value="History summary") as mock_get_history,
|
||||||
patch('orchestrator_pm.generate_tracks', return_value=mock_tracks) as mock_gen_tracks,
|
patch('orchestrator_pm.generate_tracks', return_value=mock_tracks) as mock_gen_tracks,
|
||||||
patch('aggregate.build_file_items', return_value=[]) as mock_build_files:
|
patch('aggregate.build_file_items', return_value=[]) as mock_build_files
|
||||||
|
):
|
||||||
|
|
||||||
# We need to mock project_manager.flat_config and project_manager.load_project
|
# We need to mock project_manager.flat_config and project_manager.load_project
|
||||||
with patch('project_manager.load_project', return_value={}),
|
with (
|
||||||
patch('project_manager.flat_config', return_value={}):
|
patch('project_manager.load_project', return_value={}),
|
||||||
|
patch('project_manager.flat_config', return_value={})
|
||||||
|
):
|
||||||
|
|
||||||
app_instance._cb_plan_epic()
|
app_instance._cb_plan_epic()
|
||||||
|
|
||||||
|
|||||||
133
tests/test_orchestration_logic.py
Normal file
133
tests/test_orchestration_logic.py
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
import pytest
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
import json
|
||||||
|
import orchestrator_pm
|
||||||
|
import conductor_tech_lead
|
||||||
|
import multi_agent_conductor
|
||||||
|
from models import Track, Ticket
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_ai_client():
|
||||||
|
with patch("ai_client.send") as mock_send:
|
||||||
|
yield mock_send
|
||||||
|
|
||||||
|
def test_generate_tracks(mock_ai_client):
|
||||||
|
# Tier 1 (PM) response mock
|
||||||
|
mock_ai_client.return_value = json.dumps([
|
||||||
|
{"id": "track_1", "title": "Infrastructure Setup", "description": "Setup basic project structure"},
|
||||||
|
{"id": "track_2", "title": "Feature implementation", "description": "Implement core feature"}
|
||||||
|
])
|
||||||
|
|
||||||
|
user_request = "Build a new app"
|
||||||
|
project_config = {}
|
||||||
|
file_items = []
|
||||||
|
|
||||||
|
tracks = orchestrator_pm.generate_tracks(user_request, project_config, file_items)
|
||||||
|
|
||||||
|
assert len(tracks) == 2
|
||||||
|
assert tracks[0]["id"] == "track_1"
|
||||||
|
assert tracks[1]["id"] == "track_2"
|
||||||
|
mock_ai_client.assert_called_once()
|
||||||
|
|
||||||
|
def test_generate_tickets(mock_ai_client):
|
||||||
|
# Tier 2 (Tech Lead) response mock
|
||||||
|
mock_ai_client.return_value = json.dumps([
|
||||||
|
{"id": "T-001", "description": "Define interfaces", "depends_on": []},
|
||||||
|
{"id": "T-002", "description": "Implement interfaces", "depends_on": ["T-001"]}
|
||||||
|
])
|
||||||
|
|
||||||
|
track_brief = "Implement a new feature."
|
||||||
|
module_skeletons = "class Feature: pass"
|
||||||
|
|
||||||
|
tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
|
||||||
|
|
||||||
|
assert len(tickets) == 2
|
||||||
|
assert tickets[0]["id"] == "T-001"
|
||||||
|
assert tickets[1]["id"] == "T-002"
|
||||||
|
assert tickets[1]["depends_on"] == ["T-001"]
|
||||||
|
|
||||||
|
def test_topological_sort():
|
||||||
|
tickets = [
|
||||||
|
{"id": "T-002", "description": "Dep on 001", "depends_on": ["T-001"]},
|
||||||
|
{"id": "T-001", "description": "Base", "depends_on": []},
|
||||||
|
{"id": "T-003", "description": "Dep on 002", "depends_on": ["T-002"]}
|
||||||
|
]
|
||||||
|
|
||||||
|
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
|
||||||
|
|
||||||
|
assert sorted_tickets[0]["id"] == "T-001"
|
||||||
|
assert sorted_tickets[1]["id"] == "T-002"
|
||||||
|
assert sorted_tickets[2]["id"] == "T-003"
|
||||||
|
|
||||||
|
def test_topological_sort_circular():
|
||||||
|
tickets = [
|
||||||
|
{"id": "T-001", "depends_on": ["T-002"]},
|
||||||
|
{"id": "T-002", "depends_on": ["T-001"]}
|
||||||
|
]
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="Circular dependency detected"):
|
||||||
|
conductor_tech_lead.topological_sort(tickets)
|
||||||
|
|
||||||
|
def test_track_executable_tickets():
|
||||||
|
t1 = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
|
||||||
|
t2 = Ticket(id="T2", description="desc", status="todo", assigned_to="user", depends_on=["T1"])
|
||||||
|
|
||||||
|
track = Track(id="track_1", description="desc", tickets=[t1, t2])
|
||||||
|
|
||||||
|
executable = track.get_executable_tickets()
|
||||||
|
assert len(executable) == 1
|
||||||
|
assert executable[0].id == "T1"
|
||||||
|
|
||||||
|
# Complete T1
|
||||||
|
t1.status = "completed"
|
||||||
|
executable = track.get_executable_tickets()
|
||||||
|
assert len(executable) == 1
|
||||||
|
assert executable[0].id == "T2"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_conductor_engine_run_linear():
|
||||||
|
t1 = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
|
||||||
|
t2 = Ticket(id="T2", description="desc", status="todo", assigned_to="user", depends_on=["T1"])
|
||||||
|
|
||||||
|
track = Track(id="track_1", description="desc", tickets=[t1, t2])
|
||||||
|
engine = multi_agent_conductor.ConductorEngine(track)
|
||||||
|
|
||||||
|
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_worker:
|
||||||
|
# Mock worker to complete tickets
|
||||||
|
def complete_ticket(ticket, context, **kwargs):
|
||||||
|
ticket.status = "completed"
|
||||||
|
|
||||||
|
mock_worker.side_effect = complete_ticket
|
||||||
|
|
||||||
|
await engine.run_linear()
|
||||||
|
|
||||||
|
assert t1.status == "completed"
|
||||||
|
assert t2.status == "completed"
|
||||||
|
assert mock_worker.call_count == 2
|
||||||
|
|
||||||
|
def test_conductor_engine_parse_json_tickets():
|
||||||
|
track = Track(id="track_1", description="desc")
|
||||||
|
engine = multi_agent_conductor.ConductorEngine(track)
|
||||||
|
|
||||||
|
json_data = json.dumps([
|
||||||
|
{"id": "T1", "description": "desc 1", "depends_on": []},
|
||||||
|
{"id": "T2", "description": "desc 2", "depends_on": ["T1"]}
|
||||||
|
])
|
||||||
|
|
||||||
|
engine.parse_json_tickets(json_data)
|
||||||
|
|
||||||
|
assert len(track.tickets) == 2
|
||||||
|
assert track.tickets[0].id == "T1"
|
||||||
|
assert track.tickets[1].id == "T2"
|
||||||
|
assert track.tickets[1].depends_on == ["T1"]
|
||||||
|
|
||||||
|
def test_run_worker_lifecycle_blocked(mock_ai_client):
|
||||||
|
ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
|
||||||
|
context = multi_agent_conductor.WorkerContext(ticket_id="T1", model_name="model", messages=[])
|
||||||
|
|
||||||
|
mock_ai_client.return_value = "BLOCKED because of missing info"
|
||||||
|
|
||||||
|
multi_agent_conductor.run_worker_lifecycle(ticket, context)
|
||||||
|
|
||||||
|
assert ticket.status == "blocked"
|
||||||
|
assert ticket.blocked_reason == "BLOCKED because of missing info"
|
||||||
@@ -40,30 +40,29 @@ class TestOrchestratorPM(unittest.TestCase):
|
|||||||
mock_send.assert_called_once()
|
mock_send.assert_called_once()
|
||||||
args, kwargs = mock_send.call_args
|
args, kwargs = mock_send.call_args
|
||||||
self.assertEqual(kwargs['md_content'], "")
|
self.assertEqual(kwargs['md_content'], "")
|
||||||
self.assertEqual(kwargs['system_prompt'], expected_system_prompt)
|
# Cannot check system_prompt via mock_send kwargs anymore as it's set globally
|
||||||
|
# But we can verify user_message was passed
|
||||||
self.assertIn(user_request, kwargs['user_message'])
|
self.assertIn(user_request, kwargs['user_message'])
|
||||||
self.assertIn("REPO_MAP_CONTENT", kwargs['user_message'])
|
self.assertIn("REPO_MAP_CONTENT", kwargs['user_message'])
|
||||||
self.assertEqual(kwargs['model_name'], "gemini-1.5-pro")
|
|
||||||
|
|
||||||
# Verify result
|
# Verify result
|
||||||
self.assertEqual(result, mock_response_data)
|
self.assertEqual(result[0]['id'], mock_response_data[0]['id'])
|
||||||
|
|
||||||
@patch('summarize.build_summary_markdown')
|
@patch('summarize.build_summary_markdown')
|
||||||
@patch('ai_client.send')
|
@patch('ai_client.send')
|
||||||
def test_generate_tracks_markdown_wrapped(self, mock_send, mock_summarize):
|
def test_generate_tracks_markdown_wrapped(self, mock_send, mock_summarize):
|
||||||
mock_summarize.return_value = "REPO_MAP"
|
mock_summarize.return_value = "REPO_MAP"
|
||||||
|
|
||||||
mock_response_data = [{"id": "track_1"}]
|
mock_response_data = [{"id": "track_1"}]
|
||||||
|
expected_result = [{"id": "track_1", "title": "Untitled Track"}]
|
||||||
# Wrapped in ```json ... ```
|
# Wrapped in ```json ... ```
|
||||||
mock_send.return_value = f"Here is the plan:\n```json\n{json.dumps(mock_response_data)}\n```\nHope this helps."
|
mock_send.return_value = f"Here is the plan:\n```json\n{json.dumps(mock_response_data)}\n```\nHope this helps."
|
||||||
|
|
||||||
result = orchestrator_pm.generate_tracks("req", {}, [])
|
result = orchestrator_pm.generate_tracks("req", {}, [])
|
||||||
self.assertEqual(result, mock_response_data)
|
self.assertEqual(result, expected_result)
|
||||||
|
|
||||||
# Wrapped in ``` ... ```
|
# Wrapped in ``` ... ```
|
||||||
mock_send.return_value = f"```\n{json.dumps(mock_response_data)}\n```"
|
mock_send.return_value = f"```\n{json.dumps(mock_response_data)}\n```"
|
||||||
result = orchestrator_pm.generate_tracks("req", {}, [])
|
result = orchestrator_pm.generate_tracks("req", {}, [])
|
||||||
self.assertEqual(result, mock_response_data)
|
self.assertEqual(result, expected_result)
|
||||||
|
|
||||||
@patch('summarize.build_summary_markdown')
|
@patch('summarize.build_summary_markdown')
|
||||||
@patch('ai_client.send')
|
@patch('ai_client.send')
|
||||||
|
|||||||
@@ -40,13 +40,11 @@ def test_redundant_calls_in_process_pending_gui_tasks(app_instance):
|
|||||||
# ai_client.reset_session()
|
# ai_client.reset_session()
|
||||||
# ai_client.set_provider(value, self.current_model)
|
# ai_client.set_provider(value, self.current_model)
|
||||||
|
|
||||||
# _process_pending_gui_tasks ALSO calls:
|
# _process_pending_gui_tasks NO LONGER calls it redundantly:
|
||||||
# ai_client.set_provider(self.current_provider, self.current_model)
|
|
||||||
# ai_client.reset_session()
|
|
||||||
|
|
||||||
# Total should be 2 calls for each if redundant.
|
# Total should be 1 call for each.
|
||||||
assert mock_set_provider.call_count == 2
|
assert mock_set_provider.call_count == 1
|
||||||
assert mock_reset_session.call_count == 2
|
assert mock_reset_session.call_count == 1
|
||||||
|
|
||||||
def test_gcli_path_updates_adapter(app_instance):
|
def test_gcli_path_updates_adapter(app_instance):
|
||||||
# Setup
|
# Setup
|
||||||
|
|||||||
@@ -8,15 +8,21 @@ import session_logger
|
|||||||
import tomllib
|
import tomllib
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def temp_logs(tmp_path):
|
def temp_logs(tmp_path, monkeypatch):
|
||||||
|
# Ensure closed before starting
|
||||||
|
session_logger.close_session()
|
||||||
|
monkeypatch.setattr(session_logger, "_comms_fh", None)
|
||||||
|
|
||||||
# Mock _LOG_DIR in session_logger
|
# Mock _LOG_DIR in session_logger
|
||||||
original_log_dir = session_logger._LOG_DIR
|
original_log_dir = session_logger._LOG_DIR
|
||||||
session_logger._LOG_DIR = tmp_path / "logs"
|
session_logger._LOG_DIR = tmp_path / "logs"
|
||||||
|
monkeypatch.setattr(session_logger, "_LOG_DIR", tmp_path / "logs")
|
||||||
session_logger._LOG_DIR.mkdir(parents=True, exist_ok=True)
|
session_logger._LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Mock _SCRIPTS_DIR
|
# Mock _SCRIPTS_DIR
|
||||||
original_scripts_dir = session_logger._SCRIPTS_DIR
|
original_scripts_dir = session_logger._SCRIPTS_DIR
|
||||||
session_logger._SCRIPTS_DIR = tmp_path / "scripts" / "generated"
|
session_logger._SCRIPTS_DIR = tmp_path / "scripts" / "generated"
|
||||||
|
monkeypatch.setattr(session_logger, "_SCRIPTS_DIR", tmp_path / "scripts" / "generated")
|
||||||
session_logger._SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
|
session_logger._SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
yield tmp_path / "logs"
|
yield tmp_path / "logs"
|
||||||
|
|||||||
Reference in New Issue
Block a user