feat(mma): Implement track-scoped state persistence and configure sub-agents

2026-02-27 19:45:21 -05:00
parent a5684bf773
commit e1a3712d9a
9 changed files with 220 additions and 8 deletions
@@ -0,0 +1,18 @@
+---
+name: tier1-orchestrator
+description: Tier 1 Orchestrator for product alignment and high-level planning.
+model: gemini-3.1-pro-preview
+tools:
+  - read_file
+  - list_directory
+  - glob
+  - grep_search
+  - google_web_search
+  - web_fetch
+  - codebase_investigator
+  - activate_skill
+  - discovered_tool_run_powershell
+---
+STRICT SYSTEM DIRECTIVE: You are a Tier 1 Orchestrator.
+Focused on product alignment, high-level planning, and track initialization.
+ONLY output the requested text. No pleasantries.
@@ -0,0 +1,20 @@
+---
+name: tier2-tech-lead
+description: Tier 2 Tech Lead for architectural design and execution.
+model: gemini-3-flash-preview
+tools:
+  - read_file
+  - write_file
+  - replace
+  - list_directory
+  - glob
+  - grep_search
+  - google_web_search
+  - web_fetch
+  - codebase_investigator
+  - activate_skill
+  - discovered_tool_run_powershell
+---
+STRICT SYSTEM DIRECTIVE: You are a Tier 2 Tech Lead.
+Focused on architectural design and track execution.
+ONLY output the requested text. No pleasantries.
@@ -0,0 +1,22 @@
+---
+name: tier3-worker
+description: Stateless Tier 3 Worker for code implementation and TDD.
+model: gemini-3-flash-preview
+tools:
+  - read_file
+  - write_file
+  - replace
+  - list_directory
+  - glob
+  - grep_search
+  - google_web_search
+  - web_fetch
+  - codebase_investigator
+  - activate_skill
+  - discovered_tool_run_powershell
+---
+STRICT SYSTEM DIRECTIVE: You are a stateless Tier 3 Worker (Contributor).
+Your goal is to implement specific code changes or tests based on the provided task.
+You have access to tools for reading and writing files, codebase investigation, and web tools.
+You CAN execute PowerShell scripts or run shell commands via discovered_tool_run_powershell for verification and testing.
+Follow TDD and return success status or code changes. No pleasantries, no conversational filler.
@@ -0,0 +1,20 @@
+---
+name: tier4-qa
+description: Stateless Tier 4 QA Agent for log analysis and diagnostics.
+model: gemini-2.5-flash-lite
+tools:
+  - read_file
+  - list_directory
+  - glob
+  - grep_search
+  - google_web_search
+  - web_fetch
+  - codebase_investigator
+  - activate_skill
+  - discovered_tool_run_powershell
+---
+STRICT SYSTEM DIRECTIVE: You are a stateless Tier 4 QA Agent.
+Your goal is to analyze errors, summarize logs, or verify tests.
+You have access to tools for reading files, exploring the codebase, and web tools.
+You CAN execute PowerShell scripts or run shell commands via discovered_tool_run_powershell for diagnostics.
+ONLY output the requested analysis. No pleasantries.
@@ -0,0 +1,22 @@
+[[rule]]
+toolName = [
+  "read_file",
+  "write_file",
+  "replace",
+  "list_directory",
+  "glob",
+  "grep_search",
+  "search_files",
+  "get_file_summary",
+  "google_web_search",
+  "web_fetch",
+  "codebase_investigator",
+  "cli_help",
+  "activate_skill",
+  "run_shell_command",
+  "run_powershell",
+  "discovered_tool_run_powershell"
+]
+decision = "allow"
+priority = 900
+description = "Allow all MMA tools for sub-agents in headless mode."
@@ -1,4 +1,7 @@
 {
+  "experimental": {
+    "enableAgents": true
+  },
  "tools": {
    "discoveryCommand": "python C:/projects/manual_slop/scripts/tool_discovery.py",
    "whitelist": [
@@ -240,3 +240,30 @@ def flat_config(proj: dict, disc_name: str | None = None) -> dict:
            "history": disc_data.get("history", []),
        },
    }
+
+
+# ── track state persistence ─────────────────────────────────────────────────
+
+def save_track_state(track_id: str, state: 'TrackState', base_dir: str | Path = "."):
+    """
+    Saves a TrackState object to conductor/tracks/<track_id>/state.toml.
+    """
+    track_dir = Path(base_dir) / "conductor" / "tracks" / track_id
+    track_dir.mkdir(parents=True, exist_ok=True)
+    state_file = track_dir / "state.toml"
+    data = clean_nones(state.to_dict())
+    with open(state_file, "wb") as f:
+        tomli_w.dump(data, f)
+
+
+def load_track_state(track_id: str, base_dir: str | Path = ".") -> 'TrackState':
+    """
+    Loads a TrackState object from conductor/tracks/<track_id>/state.toml.
+    """
+    from models import TrackState
+    state_file = Path(base_dir) / "conductor" / "tracks" / track_id / "state.toml"
+    if not state_file.exists():
+        return None
+    with open(state_file, "rb") as f:
+        data = tomllib.load(f)
+    return TrackState.from_dict(data)
@@ -176,17 +176,16 @@ def execute_agent(role: str, prompt: str, docs: list[str]) -> str:
    if role in ['tier3', 'tier3-worker']:
        system_directive = "STRICT SYSTEM DIRECTIVE: You are a stateless Tier 3 Worker (Contributor). " \
                           "Your goal is to implement specific code changes or tests based on the provided task. " \
-                           "You have access to tools for reading and writing files (e.g., read_file, write_file, replace). " \
-                           "CRITICAL: You CANNOT execute PowerShell scripts or run shell commands directly. " \
-                           "If you need to verify code or run tests, output the full PowerShell script inside a " \
-                           "markdown code block (e.g., ```powershell) and state that it needs to be executed. " \
+                           "You have access to tools for reading and writing files (e.g., read_file, write_file, replace), " \
+                           "codebase investigation (codebase_investigator), and web tools (google_web_search, web_fetch). " \
+                           "You CAN execute PowerShell scripts or run shell commands via run_shell_command for verification and testing. " \
                           "Follow TDD and return success status or code changes. No pleasantries, no conversational filler."
    elif role in ['tier4', 'tier4-qa']:
        system_directive = "STRICT SYSTEM DIRECTIVE: You are a stateless Tier 4 QA Agent. " \
                           "Your goal is to analyze errors, summarize logs, or verify tests. " \
-                           "You have access to tools for reading files and exploring the codebase. " \
-                           "CRITICAL: You CANNOT execute PowerShell scripts or run shell commands directly. " \
-                           "If you need to run diagnostics, output the PowerShell script and request execution. " \
+                           "You have access to tools for reading files, exploring the codebase (codebase_investigator), " \
+                           "and web tools (google_web_search, web_fetch). " \
+                           "You CAN execute PowerShell scripts or run shell commands via run_shell_command for diagnostics. " \
                           "ONLY output the requested analysis. No pleasantries."
    else:
        system_directive = f"STRICT SYSTEM DIRECTIVE: You are a stateless {role}. " \
@@ -209,7 +208,7 @@ def execute_agent(role: str, prompt: str, docs: list[str]) -> str:
    # Use subprocess with input to pipe the prompt via stdin, avoiding WinError 206.
    # We use -p 'mma_task' to ensure non-interactive (headless) mode and valid parsing.
    # Whitelist tools to ensure they are available to the model in headless mode.
-    allowed_tools = "read_file,write_file,replace,list_directory,glob,grep_search,search_files,get_file_summary"
+    allowed_tools = "read_file,write_file,replace,list_directory,glob,grep_search,search_files,get_file_summary,run_shell_command,activate_skill,codebase_investigator,google_web_search,web_fetch"
    ps_command = (
        f"if (Test-Path 'C:\\projects\\misc\\setup_gemini.ps1') {{ . 'C:\\projects\\misc\\setup_gemini.ps1' }}; "
        f"gemini -p 'mma_task' --allowed-tools {allowed_tools} --output-format json --model {model}"
@@ -0,0 +1,81 @@
+import pytest
+from pathlib import Path
+from datetime import datetime
+import os
+
+# Import the real models
+from models import TrackState, Metadata, Ticket
+# Import the persistence functions from project_manager
+from project_manager import save_track_state, load_track_state
+
+def test_track_state_persistence(tmp_path):
+    """
+    Tests saving and loading a TrackState object to/from a TOML file.
+    1. Create a TrackState object with sample metadata, discussion, and tasks.
+    2. Call save_track_state('test_track', state, base_dir).
+    3. Verify that base_dir/conductor/tracks/test_track/state.toml exists.
+    4. Call load_track_state('test_track', base_dir) and verify it returns an identical TrackState object.
+    """
+    base_dir = tmp_path
+    track_id = "test-track-999" # Metadata internal ID
+    track_folder_name = "test_track" # Folder name used in persistence
+    
+    # 1. Create a TrackState object with sample data
+    metadata = Metadata(
+        id=track_id,
+        name="Test Track",
+        status="in_progress",
+        created_at=datetime(2023, 1, 1, 12, 0, 0),
+        updated_at=datetime(2023, 1, 2, 13, 0, 0)
+    )
+    
+    discussion = [
+        {"role": "User", "content": "Hello", "ts": datetime(2023, 1, 1, 12, 0, 0)},
+        {"role": "AI", "content": "Hi there!", "ts": datetime(2023, 1, 1, 12, 0, 5)}
+    ]
+    
+    tasks = [
+        Ticket(id="task-1", description="First task", status="completed", assigned_to="worker-1"),
+        Ticket(id="task-2", description="Second task", status="todo", assigned_to="worker-2")
+    ]
+    
+    original_state = TrackState(
+        metadata=metadata,
+        discussion=discussion,
+        tasks=tasks
+    )
+
+    # 2. Call save_track_state('test_track', state, base_dir)
+    save_track_state(track_folder_name, original_state, base_dir)
+
+    # 3. Verify that base_dir/conductor/tracks/test_track/state.toml exists
+    state_file_path = base_dir / "conductor" / "tracks" / track_folder_name / "state.toml"
+    assert state_file_path.exists(), f"State file should exist at {state_file_path}"
+
+    # 4. Call load_track_state('test_track', base_dir) and verify it returns an identical TrackState object
+    loaded_state = load_track_state(track_folder_name, base_dir)
+    
+    assert loaded_state is not None, "load_track_state returned None"
+    
+    # Verify equality
+    assert loaded_state.metadata.id == original_state.metadata.id
+    assert loaded_state.metadata.name == original_state.metadata.name
+    assert loaded_state.metadata.status == original_state.metadata.status
+    assert loaded_state.metadata.created_at == original_state.metadata.created_at
+    assert loaded_state.metadata.updated_at == original_state.metadata.updated_at
+    
+    assert len(loaded_state.tasks) == len(original_state.tasks)
+    for i in range(len(original_state.tasks)):
+        assert loaded_state.tasks[i].id == original_state.tasks[i].id
+        assert loaded_state.tasks[i].description == original_state.tasks[i].description
+        assert loaded_state.tasks[i].status == original_state.tasks[i].status
+        assert loaded_state.tasks[i].assigned_to == original_state.tasks[i].assigned_to
+
+    assert len(loaded_state.discussion) == len(original_state.discussion)
+    for i in range(len(original_state.discussion)):
+        assert loaded_state.discussion[i]["role"] == original_state.discussion[i]["role"]
+        assert loaded_state.discussion[i]["content"] == original_state.discussion[i]["content"]
+        assert loaded_state.discussion[i]["ts"] == original_state.discussion[i]["ts"]
+
+    # Final check: deep equality of dataclasses
+    assert loaded_state == original_state