checkpoint: massive refactor

This commit is contained in:
2026-02-28 09:06:45 -05:00
parent f2512c30e9
commit d36632c21a
149 changed files with 16255 additions and 17722 deletions

View File

@@ -3,23 +3,22 @@ import sys
import os
def run_diag(role, prompt):
print(f"--- Running Diag for {role} ---")
cmd = [sys.executable, "scripts/mma_exec.py", "--role", role, prompt]
try:
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
print("STDOUT:")
print(result.stdout)
print("STDERR:")
print(result.stderr)
return result.stdout
except Exception as e:
print(f"FAILED: {e}")
return str(e)
print(f"--- Running Diag for {role} ---")
cmd = [sys.executable, "scripts/mma_exec.py", "--role", role, prompt]
try:
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
print("STDOUT:")
print(result.stdout)
print("STDERR:")
print(result.stderr)
return result.stdout
except Exception as e:
print(f"FAILED: {e}")
return str(e)
if __name__ == "__main__":
# Test 1: Simple read
print("TEST 1: read_file")
run_diag("tier3-worker", "Read the file 'pyproject.toml' and tell me the version of the project. ONLY the version string.")
print("\nTEST 2: run_shell_command")
run_diag("tier3-worker", "Use run_shell_command to execute 'echo HELLO_SUBAGENT' and return the output. ONLY the output.")
# Test 1: Simple read
print("TEST 1: read_file")
run_diag("tier3-worker", "Read the file 'pyproject.toml' and tell me the version of the project. ONLY the version string.")
print("\nTEST 2: run_shell_command")
run_diag("tier3-worker", "Use run_shell_command to execute 'echo HELLO_SUBAGENT' and return the output. ONLY the output.")

View File

@@ -3,55 +3,51 @@ import pytest
import os
def run_ps_script(role, prompt):
"""Helper to run the run_subagent.ps1 script."""
# Using -File is safer and handles arguments better
cmd = [
"powershell", "-NoProfile", "-ExecutionPolicy", "Bypass",
"-File", "./scripts/run_subagent.ps1",
"-Role", role,
"-Prompt", prompt
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.stdout:
print(f"\n[Sub-Agent {role} Output]:\n{result.stdout}")
if result.stderr:
print(f"\n[Sub-Agent {role} Error]:\n{result.stderr}")
return result
"""Helper to run the run_subagent.ps1 script."""
# Using -File is safer and handles arguments better
cmd = [
"powershell", "-NoProfile", "-ExecutionPolicy", "Bypass",
"-File", "./scripts/run_subagent.ps1",
"-Role", role,
"-Prompt", prompt
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.stdout:
print(f"\n[Sub-Agent {role} Output]:\n{result.stdout}")
if result.stderr:
print(f"\n[Sub-Agent {role} Error]:\n{result.stderr}")
return result
def test_subagent_script_qa_live():
"""Verify that the QA role works and returns a compressed fix."""
prompt = "Traceback (most recent call last): File 'test.py', line 1, in <module> 1/0 ZeroDivisionError: division by zero"
result = run_ps_script("QA", prompt)
assert result.returncode == 0
# Expected output should mention the fix for division by zero
assert "zero" in result.stdout.lower()
# It should be short (QA agents compress)
assert len(result.stdout.split()) < 40
"""Verify that the QA role works and returns a compressed fix."""
prompt = "Traceback (most recent call last): File 'test.py', line 1, in <module> 1/0 ZeroDivisionError: division by zero"
result = run_ps_script("QA", prompt)
assert result.returncode == 0
# Expected output should mention the fix for division by zero
assert "zero" in result.stdout.lower()
# It should be short (QA agents compress)
assert len(result.stdout.split()) < 40
def test_subagent_script_worker_live():
"""Verify that the Worker role works and returns code."""
prompt = "Write a python function that returns 'hello world'"
result = run_ps_script("Worker", prompt)
assert result.returncode == 0
assert "def" in result.stdout.lower()
assert "hello" in result.stdout.lower()
"""Verify that the Worker role works and returns code."""
prompt = "Write a python function that returns 'hello world'"
result = run_ps_script("Worker", prompt)
assert result.returncode == 0
assert "def" in result.stdout.lower()
assert "hello" in result.stdout.lower()
def test_subagent_script_utility_live():
"""Verify that the Utility role works."""
prompt = "Tell me 'True' if 1+1=2, otherwise 'False'"
result = run_ps_script("Utility", prompt)
assert result.returncode == 0
assert "true" in result.stdout.lower()
"""Verify that the Utility role works."""
prompt = "Tell me 'True' if 1+1=2, otherwise 'False'"
result = run_ps_script("Utility", prompt)
assert result.returncode == 0
assert "true" in result.stdout.lower()
def test_subagent_isolation_live():
"""Verify that the sub-agent is stateless and does not see the parent's conversation context."""
# This prompt asks the sub-agent about a 'secret' mentioned only here, not in its prompt.
prompt = "What is the secret code I just told you? If I didn't tell you, say 'UNKNOWN'."
result = run_ps_script("Utility", prompt)
assert result.returncode == 0
# A stateless agent should not know any previous context.
assert "unknown" in result.stdout.lower()
"""Verify that the sub-agent is stateless and does not see the parent's conversation context."""
# This prompt asks the sub-agent about a 'secret' mentioned only here, not in its prompt.
prompt = "What is the secret code I just told you? If I didn't tell you, say 'UNKNOWN'."
result = run_ps_script("Utility", prompt)
assert result.returncode == 0
# A stateless agent should not know any previous context.
assert "unknown" in result.stdout.lower()

View File

@@ -4,148 +4,137 @@ from unittest.mock import patch, MagicMock
from scripts.mma_exec import create_parser, get_role_documents, execute_agent, get_model_for_role, get_dependencies
def test_parser_role_choices():
"""Test that the parser accepts valid roles and the prompt argument."""
parser = create_parser()
valid_roles = ['tier1', 'tier2', 'tier3', 'tier4']
test_prompt = "Analyze the codebase for bottlenecks."
for role in valid_roles:
args = parser.parse_args(['--role', role, test_prompt])
assert args.role == role
assert args.prompt == test_prompt
"""Test that the parser accepts valid roles and the prompt argument."""
parser = create_parser()
valid_roles = ['tier1', 'tier2', 'tier3', 'tier4']
test_prompt = "Analyze the codebase for bottlenecks."
for role in valid_roles:
args = parser.parse_args(['--role', role, test_prompt])
assert args.role == role
assert args.prompt == test_prompt
def test_parser_invalid_role():
"""Test that the parser rejects roles outside the specified choices."""
parser = create_parser()
with pytest.raises(SystemExit):
parser.parse_args(['--role', 'tier5', 'Some prompt'])
"""Test that the parser rejects roles outside the specified choices."""
parser = create_parser()
with pytest.raises(SystemExit):
parser.parse_args(['--role', 'tier5', 'Some prompt'])
def test_parser_prompt_optional():
"""Test that the prompt argument is optional if role is provided (or handled in main)."""
parser = create_parser()
# Prompt is now optional (nargs='?')
args = parser.parse_args(['--role', 'tier3'])
assert args.role == 'tier3'
assert args.prompt is None
"""Test that the prompt argument is optional if role is provided (or handled in main)."""
parser = create_parser()
# Prompt is now optional (nargs='?')
args = parser.parse_args(['--role', 'tier3'])
assert args.role == 'tier3'
assert args.prompt is None
def test_parser_help():
"""Test that the help flag works without raising errors (exits with 0)."""
parser = create_parser()
with pytest.raises(SystemExit) as excinfo:
parser.parse_args(['--help'])
assert excinfo.value.code == 0
"""Test that the help flag works without raising errors (exits with 0)."""
parser = create_parser()
with pytest.raises(SystemExit) as excinfo:
parser.parse_args(['--help'])
assert excinfo.value.code == 0
def test_get_role_documents():
"""Test that get_role_documents returns the correct documentation paths for each tier."""
assert get_role_documents('tier1') == ['conductor/product.md', 'conductor/product-guidelines.md']
assert get_role_documents('tier2') == ['conductor/tech-stack.md', 'conductor/workflow.md']
assert get_role_documents('tier3') == ['conductor/workflow.md']
assert get_role_documents('tier4') == []
"""Test that get_role_documents returns the correct documentation paths for each tier."""
assert get_role_documents('tier1') == ['conductor/product.md', 'conductor/product-guidelines.md']
assert get_role_documents('tier2') == ['conductor/tech-stack.md', 'conductor/workflow.md']
assert get_role_documents('tier3') == ['conductor/workflow.md']
assert get_role_documents('tier4') == []
def test_get_model_for_role():
"""Test that get_model_for_role returns the correct model for each role."""
assert get_model_for_role('tier1-orchestrator') == 'gemini-3.1-pro-preview'
assert get_model_for_role('tier2-tech-lead') == 'gemini-2.5-flash-lite'
assert get_model_for_role('tier3-worker') == 'gemini-2.5-flash-lite'
assert get_model_for_role('tier4-qa') == 'gemini-2.5-flash-lite'
"""Test that get_model_for_role returns the correct model for each role."""
assert get_model_for_role('tier1-orchestrator') == 'gemini-3.1-pro-preview'
assert get_model_for_role('tier2-tech-lead') == 'gemini-2.5-flash-lite'
assert get_model_for_role('tier3-worker') == 'gemini-2.5-flash-lite'
assert get_model_for_role('tier4-qa') == 'gemini-2.5-flash-lite'
def test_execute_agent():
"""
"""
Test that execute_agent calls subprocess.run with powershell and the correct gemini CLI arguments
including the model specified for the role.
"""
role = "tier3-worker"
prompt = "Write a unit test."
docs = ["file1.py", "docs/spec.md"]
expected_model = "gemini-2.5-flash-lite"
mock_stdout = "Mocked AI Response"
with patch("subprocess.run") as mock_run:
mock_process = MagicMock()
mock_process.stdout = mock_stdout
mock_process.returncode = 0
mock_run.return_value = mock_process
result = execute_agent(role, prompt, docs)
mock_run.assert_called_once()
args, kwargs = mock_run.call_args
cmd_list = args[0]
assert cmd_list[0] == "powershell.exe"
assert "-Command" in cmd_list
ps_cmd = cmd_list[cmd_list.index("-Command") + 1]
assert "gemini" in ps_cmd
assert f"--model {expected_model}" in ps_cmd
# Verify input contains the prompt and system directive
input_text = kwargs.get("input")
assert "STRICT SYSTEM DIRECTIVE" in input_text
assert "TASK: Write a unit test." in input_text
assert kwargs.get("capture_output") is True
assert kwargs.get("text") is True
assert result == mock_stdout
role = "tier3-worker"
prompt = "Write a unit test."
docs = ["file1.py", "docs/spec.md"]
expected_model = "gemini-2.5-flash-lite"
mock_stdout = "Mocked AI Response"
with patch("subprocess.run") as mock_run:
mock_process = MagicMock()
mock_process.stdout = mock_stdout
mock_process.returncode = 0
mock_run.return_value = mock_process
result = execute_agent(role, prompt, docs)
mock_run.assert_called_once()
args, kwargs = mock_run.call_args
cmd_list = args[0]
assert cmd_list[0] == "powershell.exe"
assert "-Command" in cmd_list
ps_cmd = cmd_list[cmd_list.index("-Command") + 1]
assert "gemini" in ps_cmd
assert f"--model {expected_model}" in ps_cmd
# Verify input contains the prompt and system directive
input_text = kwargs.get("input")
assert "STRICT SYSTEM DIRECTIVE" in input_text
assert "TASK: Write a unit test." in input_text
assert kwargs.get("capture_output") is True
assert kwargs.get("text") is True
assert result == mock_stdout
def test_get_dependencies(tmp_path):
content = (
"import os\n"
"import sys\n"
"import file_cache\n"
"from mcp_client import something\n"
)
filepath = tmp_path / "mock_script.py"
filepath.write_text(content)
dependencies = get_dependencies(str(filepath))
assert dependencies == ['os', 'sys', 'file_cache', 'mcp_client']
content = (
"import os\n"
"import sys\n"
"import file_cache\n"
"from mcp_client import something\n"
)
filepath = tmp_path / "mock_script.py"
filepath.write_text(content)
dependencies = get_dependencies(str(filepath))
assert dependencies == ['os', 'sys', 'file_cache', 'mcp_client']
import re
def test_execute_agent_logging(tmp_path):
log_file = tmp_path / "mma_delegation.log"
# mma_exec now uses logs/agents/ for individual logs and logs/mma_delegation.log for master
# We will patch LOG_FILE to point to our temp location
with patch("scripts.mma_exec.LOG_FILE", str(log_file)), \
patch("subprocess.run") as mock_run:
mock_process = MagicMock()
mock_process.stdout = ""
mock_process.returncode = 0
mock_run.return_value = mock_process
test_role = "tier1"
test_prompt = "Plan the next phase"
execute_agent(test_role, test_prompt, [])
assert log_file.exists()
log_content = log_file.read_text()
assert test_role in log_content
assert test_prompt in log_content # Master log should now have the summary prompt
assert re.search(r"\d{4}-\d{2}-\d{2}", log_content)
def test_execute_agent_tier3_injection(tmp_path):
main_content = "import dependency\n\ndef run():\n dependency.do_work()\n"
main_file = tmp_path / "main.py"
main_file.write_text(main_content)
dep_content = "def do_work():\n pass\n\ndef other_func():\n print('hello')\n"
dep_file = tmp_path / "dependency.py"
dep_file.write_text(dep_content)
# We need to ensure generate_skeleton is mockable or working
old_cwd = os.getcwd()
os.chdir(tmp_path)
try:
with patch("subprocess.run") as mock_run:
mock_process = MagicMock()
mock_process.stdout = "OK"
mock_process.returncode = 0
mock_run.return_value = mock_process
execute_agent('tier3-worker', 'Modify main.py', ['main.py'])
assert mock_run.called
input_text = mock_run.call_args[1].get("input")
assert "DEPENDENCY SKELETON: dependency.py" in input_text
assert "def do_work():" in input_text
assert "Modify main.py" in input_text
finally:
os.chdir(old_cwd)
def test_execute_agent_logging(tmp_path):
log_file = tmp_path / "mma_delegation.log"
# mma_exec now uses logs/agents/ for individual logs and logs/mma_delegation.log for master
# We will patch LOG_FILE to point to our temp location
with patch("scripts.mma_exec.LOG_FILE", str(log_file)), \
patch("subprocess.run") as mock_run:
mock_process = MagicMock()
mock_process.stdout = ""
mock_process.returncode = 0
mock_run.return_value = mock_process
test_role = "tier1"
test_prompt = "Plan the next phase"
execute_agent(test_role, test_prompt, [])
assert log_file.exists()
log_content = log_file.read_text()
assert test_role in log_content
assert test_prompt in log_content # Master log should now have the summary prompt
assert re.search(r"\d{4}-\d{2}-\d{2}", log_content)
def test_execute_agent_tier3_injection(tmp_path):
main_content = "import dependency\n\ndef run():\n dependency.do_work()\n"
main_file = tmp_path / "main.py"
main_file.write_text(main_content)
dep_content = "def do_work():\n pass\n\ndef other_func():\n print('hello')\n"
dep_file = tmp_path / "dependency.py"
dep_file.write_text(dep_content)
# We need to ensure generate_skeleton is mockable or working
old_cwd = os.getcwd()
os.chdir(tmp_path)
try:
with patch("subprocess.run") as mock_run:
mock_process = MagicMock()
mock_process.stdout = "OK"
mock_process.returncode = 0
mock_run.return_value = mock_process
execute_agent('tier3-worker', 'Modify main.py', ['main.py'])
assert mock_run.called
input_text = mock_run.call_args[1].get("input")
assert "DEPENDENCY SKELETON: dependency.py" in input_text
assert "def do_work():" in input_text
assert "Modify main.py" in input_text
finally:
os.chdir(old_cwd)

View File

@@ -2,7 +2,7 @@ import pytest
from scripts.mma_exec import generate_skeleton
def test_generate_skeleton():
sample_code = '''
sample_code = '''
class Calculator:
"""Performs basic math operations."""
@@ -15,26 +15,21 @@ def log_message(msg):
timestamp = "2026-02-25"
print(f"[{timestamp}] {msg}")
'''
skeleton = generate_skeleton(sample_code)
# Check that signatures are preserved
assert "class Calculator:" in skeleton
assert "def add(self, a: int, b: int) -> int:" in skeleton
assert "def log_message(msg):" in skeleton
# Check that docstrings are preserved
assert '"""Performs basic math operations."""' in skeleton
assert '"""Adds two numbers."""' in skeleton
# Check that implementation details are removed
assert "result = a + b" not in skeleton
assert "return result" not in skeleton
assert "timestamp =" not in skeleton
assert "print(" not in skeleton
# Check that bodies are replaced with ellipsis
assert "..." in skeleton
skeleton = generate_skeleton(sample_code)
# Check that signatures are preserved
assert "class Calculator:" in skeleton
assert "def add(self, a: int, b: int) -> int:" in skeleton
assert "def log_message(msg):" in skeleton
# Check that docstrings are preserved
assert '"""Performs basic math operations."""' in skeleton
assert '"""Adds two numbers."""' in skeleton
# Check that implementation details are removed
assert "result = a + b" not in skeleton
assert "return result" not in skeleton
assert "timestamp =" not in skeleton
assert "print(" not in skeleton
# Check that bodies are replaced with ellipsis
assert "..." in skeleton
if __name__ == "__main__":
pytest.main([__file__])
pytest.main([__file__])