checkpoint: massive refactor

2026-02-28 09:06:45 -05:00
parent f2512c30e9
commit d36632c21a
149 changed files with 16255 additions and 17722 deletions
--- a/conductor/tests/diag_subagent.py
+++ b/conductor/tests/diag_subagent.py
@@ -3,23 +3,22 @@ import sys
 import os

 def run_diag(role, prompt):
-    print(f"--- Running Diag for {role} ---")
-    cmd = [sys.executable, "scripts/mma_exec.py", "--role", role, prompt]
-    try:
-        result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
-        print("STDOUT:")
-        print(result.stdout)
-        print("STDERR:")
-        print(result.stderr)
-        return result.stdout
-    except Exception as e:
-        print(f"FAILED: {e}")
-        return str(e)
+ print(f"--- Running Diag for {role} ---")
+ cmd = [sys.executable, "scripts/mma_exec.py", "--role", role, prompt]
+ try:
+  result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
+  print("STDOUT:")
+  print(result.stdout)
+  print("STDERR:")
+  print(result.stderr)
+  return result.stdout
+ except Exception as e:
+  print(f"FAILED: {e}")
+  return str(e)

 if __name__ == "__main__":
-    # Test 1: Simple read
-    print("TEST 1: read_file")
-    run_diag("tier3-worker", "Read the file 'pyproject.toml' and tell me the version of the project. ONLY the version string.")
-    
-    print("\nTEST 2: run_shell_command")
-    run_diag("tier3-worker", "Use run_shell_command to execute 'echo HELLO_SUBAGENT' and return the output. ONLY the output.")
+# Test 1: Simple read
+ print("TEST 1: read_file")
+ run_diag("tier3-worker", "Read the file 'pyproject.toml' and tell me the version of the project. ONLY the version string.")
+ print("\nTEST 2: run_shell_command")
+ run_diag("tier3-worker", "Use run_shell_command to execute 'echo HELLO_SUBAGENT' and return the output. ONLY the output.")
--- a/conductor/tests/test_infrastructure.py
+++ b/conductor/tests/test_infrastructure.py
@@ -3,55 +3,51 @@ import pytest
 import os

 def run_ps_script(role, prompt):
-    """Helper to run the run_subagent.ps1 script."""
-    # Using -File is safer and handles arguments better
-    cmd = [
-        "powershell", "-NoProfile", "-ExecutionPolicy", "Bypass", 
-        "-File", "./scripts/run_subagent.ps1", 
-        "-Role", role, 
-        "-Prompt", prompt
-    ]
-    result = subprocess.run(cmd, capture_output=True, text=True)
-    if result.stdout:
-        print(f"\n[Sub-Agent {role} Output]:\n{result.stdout}")
-    if result.stderr:
-        print(f"\n[Sub-Agent {role} Error]:\n{result.stderr}")
-    return result
+ """Helper to run the run_subagent.ps1 script."""
+ # Using -File is safer and handles arguments better
+ cmd = [
+  "powershell", "-NoProfile", "-ExecutionPolicy", "Bypass", 
+  "-File", "./scripts/run_subagent.ps1", 
+  "-Role", role, 
+  "-Prompt", prompt
+ ]
+ result = subprocess.run(cmd, capture_output=True, text=True)
+ if result.stdout:
+  print(f"\n[Sub-Agent {role} Output]:\n{result.stdout}")
+ if result.stderr:
+  print(f"\n[Sub-Agent {role} Error]:\n{result.stderr}")
+ return result

 def test_subagent_script_qa_live():
-    """Verify that the QA role works and returns a compressed fix."""
-    prompt = "Traceback (most recent call last): File 'test.py', line 1, in <module> 1/0 ZeroDivisionError: division by zero"
-    result = run_ps_script("QA", prompt)
-    
-    assert result.returncode == 0
-    # Expected output should mention the fix for division by zero
-    assert "zero" in result.stdout.lower()
-    # It should be short (QA agents compress)
-    assert len(result.stdout.split()) < 40
+ """Verify that the QA role works and returns a compressed fix."""
+ prompt = "Traceback (most recent call last): File 'test.py', line 1, in <module> 1/0 ZeroDivisionError: division by zero"
+ result = run_ps_script("QA", prompt)
+ assert result.returncode == 0
+ # Expected output should mention the fix for division by zero
+ assert "zero" in result.stdout.lower()
+ # It should be short (QA agents compress)
+ assert len(result.stdout.split()) < 40

 def test_subagent_script_worker_live():
-    """Verify that the Worker role works and returns code."""
-    prompt = "Write a python function that returns 'hello world'"
-    result = run_ps_script("Worker", prompt)
-    
-    assert result.returncode == 0
-    assert "def" in result.stdout.lower()
-    assert "hello" in result.stdout.lower()
+ """Verify that the Worker role works and returns code."""
+ prompt = "Write a python function that returns 'hello world'"
+ result = run_ps_script("Worker", prompt)
+ assert result.returncode == 0
+ assert "def" in result.stdout.lower()
+ assert "hello" in result.stdout.lower()

 def test_subagent_script_utility_live():
-    """Verify that the Utility role works."""
-    prompt = "Tell me 'True' if 1+1=2, otherwise 'False'"
-    result = run_ps_script("Utility", prompt)
-    
-    assert result.returncode == 0
-    assert "true" in result.stdout.lower()
+ """Verify that the Utility role works."""
+ prompt = "Tell me 'True' if 1+1=2, otherwise 'False'"
+ result = run_ps_script("Utility", prompt)
+ assert result.returncode == 0
+ assert "true" in result.stdout.lower()

 def test_subagent_isolation_live():
-    """Verify that the sub-agent is stateless and does not see the parent's conversation context."""
-    # This prompt asks the sub-agent about a 'secret' mentioned only here, not in its prompt.
-    prompt = "What is the secret code I just told you? If I didn't tell you, say 'UNKNOWN'."
-    result = run_ps_script("Utility", prompt)
-    
-    assert result.returncode == 0
-    # A stateless agent should not know any previous context.
-    assert "unknown" in result.stdout.lower()
+ """Verify that the sub-agent is stateless and does not see the parent's conversation context."""
+ # This prompt asks the sub-agent about a 'secret' mentioned only here, not in its prompt.
+ prompt = "What is the secret code I just told you? If I didn't tell you, say 'UNKNOWN'."
+ result = run_ps_script("Utility", prompt)
+ assert result.returncode == 0
+ # A stateless agent should not know any previous context.
+ assert "unknown" in result.stdout.lower()
--- a/conductor/tests/test_mma_exec.py
+++ b/conductor/tests/test_mma_exec.py
@@ -4,148 +4,137 @@ from unittest.mock import patch, MagicMock
 from scripts.mma_exec import create_parser, get_role_documents, execute_agent, get_model_for_role, get_dependencies

 def test_parser_role_choices():
-    """Test that the parser accepts valid roles and the prompt argument."""
-    parser = create_parser()
-    valid_roles = ['tier1', 'tier2', 'tier3', 'tier4']
-    test_prompt = "Analyze the codebase for bottlenecks."
-
-    for role in valid_roles:
-        args = parser.parse_args(['--role', role, test_prompt])
-        assert args.role == role
-        assert args.prompt == test_prompt
+ """Test that the parser accepts valid roles and the prompt argument."""
+ parser = create_parser()
+ valid_roles = ['tier1', 'tier2', 'tier3', 'tier4']
+ test_prompt = "Analyze the codebase for bottlenecks."
+ for role in valid_roles:
+  args = parser.parse_args(['--role', role, test_prompt])
+  assert args.role == role
+  assert args.prompt == test_prompt

 def test_parser_invalid_role():
-    """Test that the parser rejects roles outside the specified choices."""
-    parser = create_parser()
-    with pytest.raises(SystemExit):
-        parser.parse_args(['--role', 'tier5', 'Some prompt'])
+ """Test that the parser rejects roles outside the specified choices."""
+ parser = create_parser()
+ with pytest.raises(SystemExit):
+  parser.parse_args(['--role', 'tier5', 'Some prompt'])

 def test_parser_prompt_optional():
-    """Test that the prompt argument is optional if role is provided (or handled in main)."""
-    parser = create_parser()
-    # Prompt is now optional (nargs='?')
-    args = parser.parse_args(['--role', 'tier3'])
-    assert args.role == 'tier3'
-    assert args.prompt is None
+ """Test that the prompt argument is optional if role is provided (or handled in main)."""
+ parser = create_parser()
+ # Prompt is now optional (nargs='?')
+ args = parser.parse_args(['--role', 'tier3'])
+ assert args.role == 'tier3'
+ assert args.prompt is None

 def test_parser_help():
-    """Test that the help flag works without raising errors (exits with 0)."""
-    parser = create_parser()
-    with pytest.raises(SystemExit) as excinfo:
-        parser.parse_args(['--help'])
-    assert excinfo.value.code == 0
+ """Test that the help flag works without raising errors (exits with 0)."""
+ parser = create_parser()
+ with pytest.raises(SystemExit) as excinfo:
+  parser.parse_args(['--help'])
+ assert excinfo.value.code == 0

 def test_get_role_documents():
-    """Test that get_role_documents returns the correct documentation paths for each tier."""
-    assert get_role_documents('tier1') == ['conductor/product.md', 'conductor/product-guidelines.md']
-    assert get_role_documents('tier2') == ['conductor/tech-stack.md', 'conductor/workflow.md']
-    assert get_role_documents('tier3') == ['conductor/workflow.md']
-    assert get_role_documents('tier4') == []
+ """Test that get_role_documents returns the correct documentation paths for each tier."""
+ assert get_role_documents('tier1') == ['conductor/product.md', 'conductor/product-guidelines.md']
+ assert get_role_documents('tier2') == ['conductor/tech-stack.md', 'conductor/workflow.md']
+ assert get_role_documents('tier3') == ['conductor/workflow.md']
+ assert get_role_documents('tier4') == []

 def test_get_model_for_role():
-    """Test that get_model_for_role returns the correct model for each role."""
-    assert get_model_for_role('tier1-orchestrator') == 'gemini-3.1-pro-preview'
-    assert get_model_for_role('tier2-tech-lead') == 'gemini-2.5-flash-lite'
-    assert get_model_for_role('tier3-worker') == 'gemini-2.5-flash-lite'
-    assert get_model_for_role('tier4-qa') == 'gemini-2.5-flash-lite'
+ """Test that get_model_for_role returns the correct model for each role."""
+ assert get_model_for_role('tier1-orchestrator') == 'gemini-3.1-pro-preview'
+ assert get_model_for_role('tier2-tech-lead') == 'gemini-2.5-flash-lite'
+ assert get_model_for_role('tier3-worker') == 'gemini-2.5-flash-lite'
+ assert get_model_for_role('tier4-qa') == 'gemini-2.5-flash-lite'

 def test_execute_agent():
-    """
+ """
    Test that execute_agent calls subprocess.run with powershell and the correct gemini CLI arguments
    including the model specified for the role.
    """
-    role = "tier3-worker"
-    prompt = "Write a unit test."
-    docs = ["file1.py", "docs/spec.md"]
-
-    expected_model = "gemini-2.5-flash-lite"
-
-    mock_stdout = "Mocked AI Response"
-
-    with patch("subprocess.run") as mock_run:
-        mock_process = MagicMock()
-        mock_process.stdout = mock_stdout
-        mock_process.returncode = 0
-        mock_run.return_value = mock_process
-
-        result = execute_agent(role, prompt, docs)
-
-        mock_run.assert_called_once()
-        args, kwargs = mock_run.call_args
-        cmd_list = args[0]
-        
-        assert cmd_list[0] == "powershell.exe"
-        assert "-Command" in cmd_list
-        ps_cmd = cmd_list[cmd_list.index("-Command") + 1]
-        assert "gemini" in ps_cmd
-        assert f"--model {expected_model}" in ps_cmd
-        
-        # Verify input contains the prompt and system directive
-        input_text = kwargs.get("input")
-        assert "STRICT SYSTEM DIRECTIVE" in input_text
-        assert "TASK: Write a unit test." in input_text
-
-        assert kwargs.get("capture_output") is True
-        assert kwargs.get("text") is True
-
-        assert result == mock_stdout
+ role = "tier3-worker"
+ prompt = "Write a unit test."
+ docs = ["file1.py", "docs/spec.md"]
+ expected_model = "gemini-2.5-flash-lite"
+ mock_stdout = "Mocked AI Response"
+ with patch("subprocess.run") as mock_run:
+  mock_process = MagicMock()
+  mock_process.stdout = mock_stdout
+  mock_process.returncode = 0
+  mock_run.return_value = mock_process
+  result = execute_agent(role, prompt, docs)
+  mock_run.assert_called_once()
+  args, kwargs = mock_run.call_args
+  cmd_list = args[0]
+  assert cmd_list[0] == "powershell.exe"
+  assert "-Command" in cmd_list
+  ps_cmd = cmd_list[cmd_list.index("-Command") + 1]
+  assert "gemini" in ps_cmd
+  assert f"--model {expected_model}" in ps_cmd
+  # Verify input contains the prompt and system directive
+  input_text = kwargs.get("input")
+  assert "STRICT SYSTEM DIRECTIVE" in input_text
+  assert "TASK: Write a unit test." in input_text
+  assert kwargs.get("capture_output") is True
+  assert kwargs.get("text") is True
+  assert result == mock_stdout

 def test_get_dependencies(tmp_path):
-    content = (
-        "import os\n"
-        "import sys\n"
-        "import file_cache\n"
-        "from mcp_client import something\n"
-    )
-    filepath = tmp_path / "mock_script.py"
-    filepath.write_text(content)
-    dependencies = get_dependencies(str(filepath))
-    assert dependencies == ['os', 'sys', 'file_cache', 'mcp_client']
-
+ content = (
+  "import os\n"
+  "import sys\n"
+  "import file_cache\n"
+  "from mcp_client import something\n"
+ )
+ filepath = tmp_path / "mock_script.py"
+ filepath.write_text(content)
+ dependencies = get_dependencies(str(filepath))
+ assert dependencies == ['os', 'sys', 'file_cache', 'mcp_client']

 import re
-def test_execute_agent_logging(tmp_path):
-    log_file = tmp_path / "mma_delegation.log"
-    # mma_exec now uses logs/agents/ for individual logs and logs/mma_delegation.log for master
-    # We will patch LOG_FILE to point to our temp location
-    with patch("scripts.mma_exec.LOG_FILE", str(log_file)), \
-         patch("subprocess.run") as mock_run:
-        mock_process = MagicMock()
-        mock_process.stdout = ""
-        mock_process.returncode = 0
-        mock_run.return_value = mock_process
-        test_role = "tier1"
-        test_prompt = "Plan the next phase"
-        execute_agent(test_role, test_prompt, [])
-        assert log_file.exists()
-        log_content = log_file.read_text()
-        assert test_role in log_content
-        assert test_prompt in log_content # Master log should now have the summary prompt
-        assert re.search(r"\d{4}-\d{2}-\d{2}", log_content)
-        
-def test_execute_agent_tier3_injection(tmp_path):
-    main_content = "import dependency\n\ndef run():\n    dependency.do_work()\n"
-    main_file = tmp_path / "main.py"
-    main_file.write_text(main_content)
-    dep_content = "def do_work():\n    pass\n\ndef other_func():\n    print('hello')\n"
-    dep_file = tmp_path / "dependency.py"
-    dep_file.write_text(dep_content)
-    
-    # We need to ensure generate_skeleton is mockable or working
-    old_cwd = os.getcwd()
-    os.chdir(tmp_path)
-    try:
-        with patch("subprocess.run") as mock_run:
-            mock_process = MagicMock()
-            mock_process.stdout = "OK"
-            mock_process.returncode = 0
-            mock_run.return_value = mock_process
-            execute_agent('tier3-worker', 'Modify main.py', ['main.py'])
-            assert mock_run.called
-            input_text = mock_run.call_args[1].get("input")
-            assert "DEPENDENCY SKELETON: dependency.py" in input_text
-            assert "def do_work():" in input_text
-            assert "Modify main.py" in input_text
-    finally:
-        os.chdir(old_cwd)
+
+def test_execute_agent_logging(tmp_path):
+ log_file = tmp_path / "mma_delegation.log"
+ # mma_exec now uses logs/agents/ for individual logs and logs/mma_delegation.log for master
+ # We will patch LOG_FILE to point to our temp location
+ with patch("scripts.mma_exec.LOG_FILE", str(log_file)), \
+ patch("subprocess.run") as mock_run:
+  mock_process = MagicMock()
+  mock_process.stdout = ""
+  mock_process.returncode = 0
+  mock_run.return_value = mock_process
+  test_role = "tier1"
+  test_prompt = "Plan the next phase"
+  execute_agent(test_role, test_prompt, [])
+  assert log_file.exists()
+  log_content = log_file.read_text()
+  assert test_role in log_content
+  assert test_prompt in log_content # Master log should now have the summary prompt
+  assert re.search(r"\d{4}-\d{2}-\d{2}", log_content)
+
+def test_execute_agent_tier3_injection(tmp_path):
+ main_content = "import dependency\n\ndef run():\n    dependency.do_work()\n"
+ main_file = tmp_path / "main.py"
+ main_file.write_text(main_content)
+ dep_content = "def do_work():\n    pass\n\ndef other_func():\n    print('hello')\n"
+ dep_file = tmp_path / "dependency.py"
+ dep_file.write_text(dep_content)
+ # We need to ensure generate_skeleton is mockable or working
+ old_cwd = os.getcwd()
+ os.chdir(tmp_path)
+ try:
+  with patch("subprocess.run") as mock_run:
+   mock_process = MagicMock()
+   mock_process.stdout = "OK"
+   mock_process.returncode = 0
+   mock_run.return_value = mock_process
+   execute_agent('tier3-worker', 'Modify main.py', ['main.py'])
+   assert mock_run.called
+   input_text = mock_run.call_args[1].get("input")
+   assert "DEPENDENCY SKELETON: dependency.py" in input_text
+   assert "def do_work():" in input_text
+   assert "Modify main.py" in input_text
+ finally:
+  os.chdir(old_cwd)

--- a/conductor/tests/test_mma_skeleton.py
+++ b/conductor/tests/test_mma_skeleton.py
@@ -2,7 +2,7 @@ import pytest
 from scripts.mma_exec import generate_skeleton

 def test_generate_skeleton():
-    sample_code = '''
+ sample_code = '''
 class Calculator:
    """Performs basic math operations."""
    
@@ -15,26 +15,21 @@ def log_message(msg):
    timestamp = "2026-02-25"
    print(f"[{timestamp}] {msg}")
 '''
-
-    skeleton = generate_skeleton(sample_code)
-
-    # Check that signatures are preserved
-    assert "class Calculator:" in skeleton
-    assert "def add(self, a: int, b: int) -> int:" in skeleton
-    assert "def log_message(msg):" in skeleton
-
-    # Check that docstrings are preserved
-    assert '"""Performs basic math operations."""' in skeleton
-    assert '"""Adds two numbers."""' in skeleton
-
-    # Check that implementation details are removed
-    assert "result = a + b" not in skeleton
-    assert "return result" not in skeleton
-    assert "timestamp =" not in skeleton
-    assert "print(" not in skeleton
-
-    # Check that bodies are replaced with ellipsis
-    assert "..." in skeleton
+ skeleton = generate_skeleton(sample_code)
+ # Check that signatures are preserved
+ assert "class Calculator:" in skeleton
+ assert "def add(self, a: int, b: int) -> int:" in skeleton
+ assert "def log_message(msg):" in skeleton
+ # Check that docstrings are preserved
+ assert '"""Performs basic math operations."""' in skeleton
+ assert '"""Adds two numbers."""' in skeleton
+ # Check that implementation details are removed
+ assert "result = a + b" not in skeleton
+ assert "return result" not in skeleton
+ assert "timestamp =" not in skeleton
+ assert "print(" not in skeleton
+ # Check that bodies are replaced with ellipsis
+ assert "..." in skeleton

 if __name__ == "__main__":
-    pytest.main([__file__])
+ pytest.main([__file__])