feat(mma): Implement track-scoped state persistence and configure sub-agents

2026-02-27 19:45:21 -05:00
parent a5684bf773
commit e1a3712d9a
9 changed files with 220 additions and 8 deletions
@@ -0,0 +1,18 @@
 ---
 name: tier1-orchestrator
 description: Tier 1 Orchestrator for product alignment and high-level planning.
 model: gemini-3.1-pro-preview
 tools:
  - read_file
  - list_directory
  - glob
  - grep_search
  - google_web_search
  - web_fetch
  - codebase_investigator
  - activate_skill
  - discovered_tool_run_powershell
 ---
 STRICT SYSTEM DIRECTIVE: You are a Tier 1 Orchestrator.
 Focused on product alignment, high-level planning, and track initialization.
 ONLY output the requested text. No pleasantries.
@@ -0,0 +1,20 @@
 ---
 name: tier2-tech-lead
 description: Tier 2 Tech Lead for architectural design and execution.
 model: gemini-3-flash-preview
 tools:
  - read_file
  - write_file
  - replace
  - list_directory
  - glob
  - grep_search
  - google_web_search
  - web_fetch
  - codebase_investigator
  - activate_skill
  - discovered_tool_run_powershell
 ---
 STRICT SYSTEM DIRECTIVE: You are a Tier 2 Tech Lead.
 Focused on architectural design and track execution.
 ONLY output the requested text. No pleasantries.
@@ -0,0 +1,22 @@
 ---
 name: tier3-worker
 description: Stateless Tier 3 Worker for code implementation and TDD.
 model: gemini-3-flash-preview
 tools:
  - read_file
  - write_file
  - replace
  - list_directory
  - glob
  - grep_search
  - google_web_search
  - web_fetch
  - codebase_investigator
  - activate_skill
  - discovered_tool_run_powershell
 ---
 STRICT SYSTEM DIRECTIVE: You are a stateless Tier 3 Worker (Contributor).
 Your goal is to implement specific code changes or tests based on the provided task.
 You have access to tools for reading and writing files, codebase investigation, and web tools.
 You CAN execute PowerShell scripts or run shell commands via discovered_tool_run_powershell for verification and testing.
 Follow TDD and return success status or code changes. No pleasantries, no conversational filler.
@@ -0,0 +1,20 @@
 ---
 name: tier4-qa
 description: Stateless Tier 4 QA Agent for log analysis and diagnostics.
 model: gemini-2.5-flash-lite
 tools:
  - read_file
  - list_directory
  - glob
  - grep_search
  - google_web_search
  - web_fetch
  - codebase_investigator
  - activate_skill
  - discovered_tool_run_powershell
 ---
 STRICT SYSTEM DIRECTIVE: You are a stateless Tier 4 QA Agent.
 Your goal is to analyze errors, summarize logs, or verify tests.
 You have access to tools for reading files, exploring the codebase, and web tools.
 You CAN execute PowerShell scripts or run shell commands via discovered_tool_run_powershell for diagnostics.
 ONLY output the requested analysis. No pleasantries.
@@ -0,0 +1,22 @@
 [[rule]]
 toolName = [
  "read_file",
  "write_file",
  "replace",
  "list_directory",
  "glob",
  "grep_search",
  "search_files",
  "get_file_summary",
  "google_web_search",
  "web_fetch",
  "codebase_investigator",
  "cli_help",
  "activate_skill",
  "run_shell_command",
  "run_powershell",
  "discovered_tool_run_powershell"
 ]
 decision = "allow"
 priority = 900
 description = "Allow all MMA tools for sub-agents in headless mode."
@@ -1,4 +1,7 @@
 {
  "experimental": {
    "enableAgents": true
  },
  "tools": {
    "discoveryCommand": "python C:/projects/manual_slop/scripts/tool_discovery.py",
    "whitelist": [
@@ -240,3 +240,30 @@ def flat_config(proj: dict, disc_name: str | None = None) -> dict:
            "history": disc_data.get("history", []),
        },
    }
 # ── track state persistence ─────────────────────────────────────────────────
 def save_track_state(track_id: str, state: 'TrackState', base_dir: str | Path = "."):
    """
    Saves a TrackState object to conductor/tracks/<track_id>/state.toml.
    """
    track_dir = Path(base_dir) / "conductor" / "tracks" / track_id
    track_dir.mkdir(parents=True, exist_ok=True)
    state_file = track_dir / "state.toml"
    data = clean_nones(state.to_dict())
    with open(state_file, "wb") as f:
        tomli_w.dump(data, f)
 def load_track_state(track_id: str, base_dir: str | Path = ".") -> 'TrackState':
    """
    Loads a TrackState object from conductor/tracks/<track_id>/state.toml.
    """
    from models import TrackState
    state_file = Path(base_dir) / "conductor" / "tracks" / track_id / "state.toml"
    if not state_file.exists():
        return None
    with open(state_file, "rb") as f:
        data = tomllib.load(f)
    return TrackState.from_dict(data)
@@ -176,17 +176,16 @@ def execute_agent(role: str, prompt: str, docs: list[str]) -> str:
    if role in ['tier3', 'tier3-worker']:
        system_directive = "STRICT SYSTEM DIRECTIVE: You are a stateless Tier 3 Worker (Contributor). " \
                           "Your goal is to implement specific code changes or tests based on the provided task. " \
-                           "You have access to tools for reading and writing files (e.g., read_file, write_file, replace). " \
+                           "You have access to tools for reading and writing files (e.g., read_file, write_file, replace), " \
-                           "CRITICAL: You CANNOT execute PowerShell scripts or run shell commands directly. " \
+                           "codebase investigation (codebase_investigator), and web tools (google_web_search, web_fetch). " \
-                           "If you need to verify code or run tests, output the full PowerShell script inside a " \
+                           "You CAN execute PowerShell scripts or run shell commands via run_shell_command for verification and testing. " \
                           "markdown code block (e.g., ```powershell) and state that it needs to be executed. " \
                           "Follow TDD and return success status or code changes. No pleasantries, no conversational filler."
    elif role in ['tier4', 'tier4-qa']:
        system_directive = "STRICT SYSTEM DIRECTIVE: You are a stateless Tier 4 QA Agent. " \
                           "Your goal is to analyze errors, summarize logs, or verify tests. " \
-                           "You have access to tools for reading files and exploring the codebase. " \
+                           "You have access to tools for reading files, exploring the codebase (codebase_investigator), " \
-                           "CRITICAL: You CANNOT execute PowerShell scripts or run shell commands directly. " \
+                           "and web tools (google_web_search, web_fetch). " \
-                           "If you need to run diagnostics, output the PowerShell script and request execution. " \
+                           "You CAN execute PowerShell scripts or run shell commands via run_shell_command for diagnostics. " \
                           "ONLY output the requested analysis. No pleasantries."
    else:
        system_directive = f"STRICT SYSTEM DIRECTIVE: You are a stateless {role}. " \
@@ -209,7 +208,7 @@ def execute_agent(role: str, prompt: str, docs: list[str]) -> str:
    # Use subprocess with input to pipe the prompt via stdin, avoiding WinError 206.
    # We use -p 'mma_task' to ensure non-interactive (headless) mode and valid parsing.
    # Whitelist tools to ensure they are available to the model in headless mode.
-    allowed_tools = "read_file,write_file,replace,list_directory,glob,grep_search,search_files,get_file_summary"
+    allowed_tools = "read_file,write_file,replace,list_directory,glob,grep_search,search_files,get_file_summary,run_shell_command,activate_skill,codebase_investigator,google_web_search,web_fetch"
    ps_command = (
        f"if (Test-Path 'C:\\projects\\misc\\setup_gemini.ps1') {{ . 'C:\\projects\\misc\\setup_gemini.ps1' }}; "
        f"gemini -p 'mma_task' --allowed-tools {allowed_tools} --output-format json --model {model}"
@@ -0,0 +1,81 @@
 import pytest
 from pathlib import Path
 from datetime import datetime
 import os
 # Import the real models
 from models import TrackState, Metadata, Ticket
 # Import the persistence functions from project_manager
 from project_manager import save_track_state, load_track_state
 def test_track_state_persistence(tmp_path):
    """
    Tests saving and loading a TrackState object to/from a TOML file.
    1. Create a TrackState object with sample metadata, discussion, and tasks.
    2. Call save_track_state('test_track', state, base_dir).
    3. Verify that base_dir/conductor/tracks/test_track/state.toml exists.
    4. Call load_track_state('test_track', base_dir) and verify it returns an identical TrackState object.
    """
    base_dir = tmp_path
    track_id = "test-track-999" # Metadata internal ID
    track_folder_name = "test_track" # Folder name used in persistence
    # 1. Create a TrackState object with sample data
    metadata = Metadata(
        id=track_id,
        name="Test Track",
        status="in_progress",
        created_at=datetime(2023, 1, 1, 12, 0, 0),
        updated_at=datetime(2023, 1, 2, 13, 0, 0)
    )
    discussion = [
        {"role": "User", "content": "Hello", "ts": datetime(2023, 1, 1, 12, 0, 0)},
        {"role": "AI", "content": "Hi there!", "ts": datetime(2023, 1, 1, 12, 0, 5)}
    ]
    tasks = [
        Ticket(id="task-1", description="First task", status="completed", assigned_to="worker-1"),
        Ticket(id="task-2", description="Second task", status="todo", assigned_to="worker-2")
    ]
    original_state = TrackState(
        metadata=metadata,
        discussion=discussion,
        tasks=tasks
    )
    # 2. Call save_track_state('test_track', state, base_dir)
    save_track_state(track_folder_name, original_state, base_dir)
    # 3. Verify that base_dir/conductor/tracks/test_track/state.toml exists
    state_file_path = base_dir / "conductor" / "tracks" / track_folder_name / "state.toml"
    assert state_file_path.exists(), f"State file should exist at {state_file_path}"
    # 4. Call load_track_state('test_track', base_dir) and verify it returns an identical TrackState object
    loaded_state = load_track_state(track_folder_name, base_dir)
    assert loaded_state is not None, "load_track_state returned None"
    # Verify equality
    assert loaded_state.metadata.id == original_state.metadata.id
    assert loaded_state.metadata.name == original_state.metadata.name
    assert loaded_state.metadata.status == original_state.metadata.status
    assert loaded_state.metadata.created_at == original_state.metadata.created_at
    assert loaded_state.metadata.updated_at == original_state.metadata.updated_at
    assert len(loaded_state.tasks) == len(original_state.tasks)
    for i in range(len(original_state.tasks)):
        assert loaded_state.tasks[i].id == original_state.tasks[i].id
        assert loaded_state.tasks[i].description == original_state.tasks[i].description
        assert loaded_state.tasks[i].status == original_state.tasks[i].status
        assert loaded_state.tasks[i].assigned_to == original_state.tasks[i].assigned_to
    assert len(loaded_state.discussion) == len(original_state.discussion)
    for i in range(len(original_state.discussion)):
        assert loaded_state.discussion[i]["role"] == original_state.discussion[i]["role"]
        assert loaded_state.discussion[i]["content"] == original_state.discussion[i]["content"]
        assert loaded_state.discussion[i]["ts"] == original_state.discussion[i]["ts"]
    # Final check: deep equality of dataclasses
    assert loaded_state == original_state