diff --git a/conductor/tests/diag_subagent.py b/conductor/tests/diag_subagent.py deleted file mode 100644 index e4b290aa..00000000 --- a/conductor/tests/diag_subagent.py +++ /dev/null @@ -1,23 +0,0 @@ -import subprocess -import sys - -def run_diag(role: str, prompt: str) -> str: - print(f"--- Running Diag for {role} ---") - cmd = [sys.executable, "scripts/mma_exec.py", "--role", role, prompt] - try: - result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8') - print("STDOUT:") - print(result.stdout) - print("STDERR:") - print(result.stderr) - return result.stdout - except Exception as e: - print(f"FAILED: {e}") - return str(e) - -if __name__ == "__main__": -# Test 1: Simple read - print("TEST 1: read_file") - run_diag("tier3-worker", "Read the file 'pyproject.toml' and tell me the version of the project. ONLY the version string.") - print("\nTEST 2: run_shell_command") - run_diag("tier3-worker", "Use run_shell_command to execute 'echo HELLO_SUBAGENT' and return the output. ONLY the output.") diff --git a/conductor/tests/test_gui_markdown_table_width.py b/conductor/tests/test_gui_markdown_table_width.py deleted file mode 100644 index 8a458d71..00000000 --- a/conductor/tests/test_gui_markdown_table_width.py +++ /dev/null @@ -1,64 +0,0 @@ -import unittest -from unittest.mock import MagicMock, patch -import sys -import os - -# Ensure project root is in path so we can import src.gui_2 -project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) -if project_root not in sys.path: - sys.path.insert(0, project_root) - -class TestMarkdownTableWidth(unittest.TestCase): - def test_render_discussion_entry_full_width(self): - """ - Verify that render_discussion_entry calls imgui.dummy with the full available width. - """ - # Mock all dependencies to avoid side effects and complex setup during import/execution - with patch('src.gui_2.imgui') as mock_imgui, \ - patch('src.gui_2.imscope') as mock_imscope, \ - patch('src.gui_2.theme') as mock_theme, \ - patch('src.gui_2.project_manager') as mock_pm, \ - patch('src.gui_2.render_thinking_trace') as mock_rtt, \ - patch('src.gui_2.render_discussion_entry_read_mode') as mock_rderm: - - # 1. Setup available width and coordinates - expected_width = 850.0 - mock_avail = MagicMock() - mock_avail.x = expected_width - mock_imgui.get_content_region_avail.return_value = mock_avail - - # Mock ImVec2 to return a simple tuple for easier assertion - mock_imgui.ImVec2.side_effect = lambda x, y: (x, y) - - # 3. Mock app and entry state - mock_app = MagicMock() - mock_app.disc_roles = ["User", "Assistant"] - - entry = { - "role": "User", - "content": "Hello world", - "collapsed": False, - "read_mode": False - } - - # Mock interactive elements - mock_imgui.begin_combo.return_value = False - mock_imgui.button.return_value = False - mock_imgui.input_text_multiline.return_value = (False, entry["content"]) - - # 4. Import the function within the patch context - from src.gui_2 import render_discussion_entry - - # 5. Execute the function - render_discussion_entry(mock_app, entry, 0) - - # 6. Verification - # The function should call imgui.dummy(imgui.ImVec2(full_width, 0)) - mock_imgui.dummy.assert_any_call((expected_width, 0.0)) - - # CRITICAL: Verify newline or spacing is called to prevent squashing - # We expect this to fail currently - assert mock_imgui.new_line.called or mock_imgui.spacing.called - -if __name__ == '__main__': - unittest.main() diff --git a/conductor/tests/test_gui_monolithic_restoration.py b/conductor/tests/test_gui_monolithic_restoration.py deleted file mode 100644 index ef680f76..00000000 --- a/conductor/tests/test_gui_monolithic_restoration.py +++ /dev/null @@ -1,33 +0,0 @@ -import inspect -import sys -import os -import pytest - -# Ensure project root is in path -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) - -def test_gui_monolithic_symbols(): - try: - from src.gui_2 import App, render_discussion_entry, render_thinking_trace - import src.gui_2 - except ImportError as e: - pytest.fail(f"FAILURE: Could not import from src.gui_2: {e}") - - # Verify App is importable - assert App is not None - - # Verify render_discussion_entry is in src.gui_2 - assert hasattr(src.gui_2, 'render_discussion_entry'), "render_discussion_entry missing from src.gui_2" - - # Verify it's defined in src.gui_2, not imported - mod = inspect.getmodule(render_discussion_entry) - assert mod is not None, "Could not determine module for render_discussion_entry" - assert mod.__name__ == 'src.gui_2', f"render_discussion_entry expected in src.gui_2, but found in {mod.__name__}" - - # Verify render_thinking_trace is in src.gui_2 - assert hasattr(src.gui_2, 'render_thinking_trace'), "render_thinking_trace missing from src.gui_2" - - # Verify it's defined in src.gui_2, not imported - mod = inspect.getmodule(render_thinking_trace) - assert mod is not None, "Could not determine module for render_thinking_trace" - assert mod.__name__ == 'src.gui_2', f"render_thinking_trace expected in src.gui_2, but found in {mod.__name__}" diff --git a/conductor/tests/test_imgui_scopes_id_stability.py b/conductor/tests/test_imgui_scopes_id_stability.py deleted file mode 100644 index 5bae1bd8..00000000 --- a/conductor/tests/test_imgui_scopes_id_stability.py +++ /dev/null @@ -1,29 +0,0 @@ -import pytest -from unittest.mock import patch, MagicMock -from src.imgui_scopes import _ScopeId -import src.imgui_scopes as imgui_scopes - -def test_scope_id_string(): - with patch('src.imgui_scopes.imgui') as mock_imgui: - sid = _ScopeId("test_id") - with sid: - pass - mock_imgui.push_id.assert_called_once_with("test_id") - mock_imgui.pop_id.assert_called_once() - -def test_scope_id_int(): - with patch('src.imgui_scopes.imgui') as mock_imgui: - # Python type hint is str, but we test runtime resilience - sid = _ScopeId(1234) - with sid: - pass - # Verify it was converted to string to prevent low-level crashes - mock_imgui.push_id.assert_called_once_with("1234") - mock_imgui.pop_id.assert_called_once() - -def test_id_helper_function(): - with patch('src.imgui_scopes.imgui') as mock_imgui: - with imgui_scopes.id(42): - pass - mock_imgui.push_id.assert_called_once_with("42") - mock_imgui.pop_id.assert_called_once() diff --git a/conductor/tests/test_infrastructure.py b/conductor/tests/test_infrastructure.py deleted file mode 100644 index 9423d25d..00000000 --- a/conductor/tests/test_infrastructure.py +++ /dev/null @@ -1,60 +0,0 @@ -import subprocess -from unittest.mock import patch, MagicMock - -def run_ps_script(role: str, prompt: str) -> subprocess.CompletedProcess: - """Helper to run the run_subagent.ps1 script.""" - # Using -File is safer and handles arguments better - cmd = [ - "powershell", "-NoProfile", "-ExecutionPolicy", "Bypass", - "-File", "./scripts/run_subagent.ps1", - "-Role", role, - "-Prompt", prompt - ] - result = subprocess.run(cmd, capture_output=True, text=True) - if result.stdout: - print(f"\n[Sub-Agent {role} Output]:\n{result.stdout}") - if result.stderr: - print(f"\n[Sub-Agent {role} Error]:\n{result.stderr}") - return result - -@patch('subprocess.run') -def test_subagent_script_qa_live(mock_run) -> None: - """Verify that the QA role works and returns a compressed fix.""" - mock_run.return_value = MagicMock(returncode=0, stdout='Fix the division by zero error.', stderr='') - prompt = "Traceback (most recent call last): File 'test.py', line 1, in 1/0 ZeroDivisionError: division by zero" - result = run_ps_script("QA", prompt) - assert result.returncode == 0 - # Expected output should mention the fix for division by zero - assert "zero" in result.stdout.lower() - # It should be short (QA agents compress) - assert len(result.stdout.split()) < 40 - -@patch('subprocess.run') -def test_subagent_script_worker_live(mock_run) -> None: - """Verify that the Worker role works and returns code.""" - mock_run.return_value = MagicMock(returncode=0, stdout='def hello(): return "hello world"', stderr='') - prompt = "Write a python function that returns 'hello world'" - result = run_ps_script("Worker", prompt) - assert result.returncode == 0 - assert "def" in result.stdout.lower() - assert "hello" in result.stdout.lower() - -@patch('subprocess.run') -def test_subagent_script_utility_live(mock_run) -> None: - """Verify that the Utility role works.""" - mock_run.return_value = MagicMock(returncode=0, stdout='True', stderr='') - prompt = "Tell me 'True' if 1+1=2, otherwise 'False'" - result = run_ps_script("Utility", prompt) - assert result.returncode == 0 - assert "true" in result.stdout.lower() - -@patch('subprocess.run') -def test_subagent_isolation_live(mock_run) -> None: - """Verify that the sub-agent is stateless and does not see the parent's conversation context.""" - mock_run.return_value = MagicMock(returncode=0, stdout='UNKNOWN', stderr='') - # This prompt asks the sub-agent about a 'secret' mentioned only here, not in its prompt. - prompt = "What is the secret code I just told you? If I didn't tell you, say 'UNKNOWN'." - result = run_ps_script("Utility", prompt) - assert result.returncode == 0 - # A stateless agent should not know any previous context. - assert "unknown" in result.stdout.lower() diff --git a/conductor/tests/test_mma_exec.py b/conductor/tests/test_mma_exec.py deleted file mode 100644 index acb1b785..00000000 --- a/conductor/tests/test_mma_exec.py +++ /dev/null @@ -1,140 +0,0 @@ -import pytest -import os -from pathlib import Path -from unittest.mock import patch, MagicMock -from scripts.mma_exec import create_parser, get_role_documents, execute_agent, get_model_for_role, get_dependencies - -def test_parser_role_choices() -> None: - """Test that the parser accepts valid roles and the prompt argument.""" - parser = create_parser() - valid_roles = ['tier1', 'tier2', 'tier3', 'tier4'] - test_prompt = "Analyze the codebase for bottlenecks." - for role in valid_roles: - args = parser.parse_args(['--role', role, test_prompt]) - assert args.role == role - assert args.prompt == test_prompt - -def test_parser_invalid_role() -> None: - """Test that the parser rejects roles outside the specified choices.""" - parser = create_parser() - with pytest.raises(SystemExit): - parser.parse_args(['--role', 'tier5', 'Some prompt']) - -def test_parser_prompt_optional() -> None: - """Test that the prompt argument is optional if role is provided (or handled in main).""" - parser = create_parser() - # Prompt is now optional (nargs='?') - args = parser.parse_args(['--role', 'tier3']) - assert args.role == 'tier3' - assert args.prompt is None - -def test_parser_help() -> None: - """Test that the help flag works without raising errors (exits with 0).""" - parser = create_parser() - with pytest.raises(SystemExit) as excinfo: - parser.parse_args(['--help']) - assert excinfo.value.code == 0 - -def test_get_role_documents() -> None: - """Test that get_role_documents returns the correct documentation paths for each tier.""" - assert get_role_documents('tier1') == ['conductor/product.md', 'conductor/product-guidelines.md', 'docs/guide_architecture.md', 'docs/guide_mma.md'] - assert get_role_documents('tier2') == ['conductor/tech-stack.md', 'conductor/workflow.md', 'docs/guide_architecture.md', 'docs/guide_mma.md'] - assert get_role_documents('tier3') == ['docs/guide_architecture.md'] - assert get_role_documents('tier4') == ['docs/guide_architecture.md'] - -def test_get_model_for_role() -> None: - """Test that get_model_for_role returns the correct model for each role.""" - assert get_model_for_role('tier1-orchestrator') == 'gemini-3.1-pro-preview' - assert get_model_for_role('tier2-tech-lead') == 'gemini-3-flash-preview' - assert get_model_for_role('tier3-worker') == 'gemini-3-flash-preview' - assert get_model_for_role('tier4-qa') == 'gemini-2.5-flash-lite' - -def test_execute_agent() -> None: - """ - Test that execute_agent calls subprocess.run with powershell and the correct gemini CLI arguments - including the model specified for the role. - """ - role = "tier3-worker" - prompt = "Write a unit test." - docs = ["file1.py", "docs/spec.md"] - expected_model = "gemini-3-flash-preview" - mock_stdout = "Mocked AI Response" - with patch("subprocess.run") as mock_run: - mock_process = MagicMock() - mock_process.stdout = mock_stdout - mock_process.returncode = 0 - mock_run.return_value = mock_process - result = execute_agent(role, prompt, docs) - mock_run.assert_called_once() - args, kwargs = mock_run.call_args - cmd_list = args[0] - assert cmd_list[0] == "powershell.exe" - assert "-Command" in cmd_list - ps_cmd = cmd_list[cmd_list.index("-Command") + 1] - assert "gemini" in ps_cmd - assert f"--model {expected_model}" in ps_cmd - # Verify input contains the prompt and system directive - input_text = kwargs.get("input") - assert "STRICT SYSTEM DIRECTIVE" in input_text - assert "TASK: Write a unit test." in input_text - assert kwargs.get("capture_output") is True - assert kwargs.get("text") is True - assert result == mock_stdout - -def test_get_dependencies(tmp_path: Path) -> None: - content = ( - "import os\n" - "import sys\n" - "import file_cache\n" - "from mcp_client import something\n" - ) - filepath = tmp_path / "mock_script.py" - filepath.write_text(content) - dependencies = get_dependencies(str(filepath)) - assert dependencies == ['os', 'sys', 'file_cache', 'mcp_client'] - -import re - -def test_execute_agent_logging(tmp_path: Path) -> None: - log_file = tmp_path / "mma_delegation.log" - # mma_exec now uses logs/agents/ for individual logs and logs/mma_delegation.log for master - # We will patch LOG_FILE to point to our temp location - with patch("scripts.mma_exec.LOG_FILE", str(log_file)), \ - patch("subprocess.run") as mock_run: - mock_process = MagicMock() - mock_process.stdout = "" - mock_process.returncode = 0 - mock_run.return_value = mock_process - test_role = "tier1" - test_prompt = "Plan the next phase" - execute_agent(test_role, test_prompt, []) - assert log_file.exists() - log_content = log_file.read_text() - assert test_role in log_content - assert test_prompt in log_content # Master log should now have the summary prompt - assert re.search(r"\d{4}-\d{2}-\d{2}", log_content) - -def test_execute_agent_tier3_injection(tmp_path: Path) -> None: - main_content = "import dependency\n\ndef run():\n dependency.do_work()\n" - main_file = tmp_path / "main.py" - main_file.write_text(main_content) - dep_content = "def do_work():\n pass\n\ndef other_func():\n print('hello')\n" - dep_file = tmp_path / "dependency.py" - dep_file.write_text(dep_content) - # We need to ensure generate_skeleton is mockable or working - old_cwd = os.getcwd() - os.chdir(tmp_path) - try: - with patch("subprocess.run") as mock_run: - mock_process = MagicMock() - mock_process.stdout = "OK" - mock_process.returncode = 0 - mock_run.return_value = mock_process - execute_agent('tier3-worker', 'Modify main.py', ['main.py']) - assert mock_run.called - input_text = mock_run.call_args[1].get("input") - assert "DEPENDENCY SKELETON: dependency.py" in input_text - assert "def do_work():" in input_text - assert "Modify main.py" in input_text - finally: - os.chdir(old_cwd) diff --git a/conductor/tests/verify_phase_1.py b/conductor/tests/verify_phase_1.py deleted file mode 100644 index 74e1b623..00000000 --- a/conductor/tests/verify_phase_1.py +++ /dev/null @@ -1,40 +0,0 @@ -import sys -import os - -# Add src to path -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))) - -from src.history import HistoryManager - -def verify_phase_1(): - print("Verifying Phase 1: History Core Logic...") - hm = HistoryManager(max_capacity=10) - - # Test push - hm.push({"test": 1}, "initial") - if not hm.can_undo: - print("Error: can_undo should be true after push") - sys.exit(1) - - # Test undo - entry = hm.undo({"test": 2}, "current") - if entry.state != {"test": 1}: - print(f"Error: expected state {{'test': 1}}, got {entry.state}") - sys.exit(1) - if entry.description != "initial": - print(f"Error: expected description 'initial', got {entry.description}") - sys.exit(1) - - # Test redo - entry = hm.redo({"test": 1}, "back") - if entry.state != {"test": 2}: - print(f"Error: expected state {{'test': 2}}, got {entry.state}") - sys.exit(1) - if entry.description != "current": - print(f"Error: expected description 'current', got {entry.description}") - sys.exit(1) - - print("Phase 1 verification PASSED.") - -if __name__ == "__main__": - verify_phase_1() diff --git a/conductor/tests/verify_phase_2.py b/conductor/tests/verify_phase_2.py deleted file mode 100644 index 03189fbe..00000000 --- a/conductor/tests/verify_phase_2.py +++ /dev/null @@ -1,24 +0,0 @@ -import subprocess -import sys -import os - -def verify_phase_2(): - print("Verifying Phase 2: Text Input & Control Undo/Redo...") - - # Run the simulation test - result = subprocess.run( - ["uv", "run", "pytest", "tests/test_undo_redo_sim.py"], - capture_output=True, - text=True - ) - - if result.returncode == 0: - print("Phase 2 verification PASSED.") - else: - print("Phase 2 verification FAILED.") - print(result.stdout) - print(result.stderr) - sys.exit(1) - -if __name__ == "__main__": - verify_phase_2() diff --git a/conductor/tests/verify_phase_3.py b/conductor/tests/verify_phase_3.py deleted file mode 100644 index 01dbfb45..00000000 --- a/conductor/tests/verify_phase_3.py +++ /dev/null @@ -1,24 +0,0 @@ -import subprocess -import sys - -def verify_phase_3(): - print("Verifying Phase 3: GUI Menu Integration...") - - # We rely on the existing simulation test to verify the callback logic, - # which underpins the GUI menu integration. - result = subprocess.run( - ["uv", "run", "pytest", "tests/test_workspace_profiles_sim.py"], - capture_output=True, - text=True - ) - - if result.returncode == 0: - print("Phase 3 verification PASSED.") - else: - print("Phase 3 verification FAILED.") - print(result.stdout) - print(result.stderr) - sys.exit(1) - -if __name__ == "__main__": - verify_phase_3() diff --git a/conductor/tests/verify_phase_4.py b/conductor/tests/verify_phase_4.py deleted file mode 100644 index 8dcc7a34..00000000 --- a/conductor/tests/verify_phase_4.py +++ /dev/null @@ -1,23 +0,0 @@ -import subprocess -import sys -import os - -def verify_phase_4(): - print("Verifying Phase 4: Contextual Auto-Switch...") - - result = subprocess.run( - ["uv", "run", "pytest", "tests/test_auto_switch_sim.py"], - capture_output=True, - text=True - ) - - if result.returncode == 0: - print("Phase 4 verification PASSED.") - else: - print("Phase 4 verification FAILED.") - print(result.stdout) - print(result.stderr) - sys.exit(1) - -if __name__ == "__main__": - verify_phase_4() diff --git a/conductor/tier2/agents/tier2-autonomous.md b/conductor/tier2/agents/tier2-autonomous.md index 741b701a..073fb087 100644 --- a/conductor/tier2/agents/tier2-autonomous.md +++ b/conductor/tier2/agents/tier2-autonomous.md @@ -21,6 +21,8 @@ permission: "git reset*": deny --- +Note: You may use superpowers skills to assist you (brainstorming, recieving code reviews, writing plans, writting skills, dispatching parallel agents) + STRICT SYSTEM DIRECTIVE: You are a Tier 2 Tech Lead in AUTONOMOUS mode, running in the **META-TOOLING** domain (per `docs/guide_meta_boundary.md`). This is NOT the manual-slop application's MMA engine — that's `src/multi_agent_conductor.py` in the APPLICATION domain. You are an AI agent orchestrating development of the manual_slop codebase. ## MANDATORY: Domain Distinction (added 2026-06-27) @@ -115,6 +117,8 @@ These are all attempts to rewrite history. They are BANNED. The right answer is ## Conventions (MUST follow - added 2026-06-17; updated 2026-06-27) - **Test runner:** ALWAYS use `uv run python scripts/run_tests_batched.py` for test runs. NEVER call `uv run pytest` directly. The batched runner provides tier-based filtering, parallelization (xdist), and a summary table. Direct pytest is slow and bypasses the tiering that the live_gui tests depend on. +- **NEVER filter test output** (added 2026-06-27 per user directive). Do NOT pipe test output through `Select-Object`, `| Select -First N`, `| Select -Last N`, `head`, `tail`, or any truncation filter. If you need to see more output later, you'll have to re-run the entire test — which wastes time and context. Instead, ALWAYS redirect to a log file: `uv run python scripts/run_tests_batched.py > tests/artifacts/tier2_state//test_run__.log 2>&1`. Then read the log file with `manual-slop_read_file` or `grep` to find the relevant sections. The log file is your full record; you can search it without re-running. +- **Prefer targeted tier runs** (added 2026-06-27 per user directive). Do NOT run the full 11-tier batch for every verification. Run only the tiers relevant to the current task (e.g., `uv run python scripts/run_tests_batched.py --tier tier3` or `--filter test_`). The full batch is for the USER to run after merge review, not for Tier 2's per-task verification. Running the full batch every time wastes 20+ minutes and the output is too large to be useful in context. - **Default branch:** this repo uses `master` (not `main`). Always use `origin/master` in `git fetch` and as the base for new branches. Do not assume `main` exists. - **Line endings:** preserve existing line endings on edit. This repo has a mix of CRLF and LF (a repo-wide LF standardization is a future track). If the file is CRLF, keep it CRLF. If the file is LF, keep it LF. Do not add CRLF to LF files or strip CRLF from CRLF files. - **Throw-away scripts:** write them to `scripts/tier2/artifacts//`, NOT the base `scripts/tier2/` directory. The base directory is reserved for production code that ships with the sandbox (failcount.py, run_track.py, write_report.py, the .ps1 launchers). Throw-away scripts are kept for archival but live in a track-specific subdir so they don't pollute the base. diff --git a/conductor/tier2/commands/tier-2-auto-execute.md b/conductor/tier2/commands/tier-2-auto-execute.md index 58bbed59..61e1890e 100644 --- a/conductor/tier2/commands/tier-2-auto-execute.md +++ b/conductor/tier2/commands/tier-2-auto-execute.md @@ -51,6 +51,8 @@ Optional flags: `--resume` (continue from last completed task), `--toast` (Windo ## Conventions (MUST follow - added 2026-06-17) - **Test runner:** use `uv run python scripts/run_tests_batched.py` (NOT `uv run pytest`) +- **NEVER filter test output** (added 2026-06-27 per user directive). Do NOT pipe test output through `Select-Object`, `| Select -First N`, `| Select -Last N`, `head`, `tail`, or any truncation filter. Instead, ALWAYS redirect to a log file: `uv run python scripts/run_tests_batched.py > tests/artifacts/tier2_state//test_run__.log 2>&1`. Then read the log file to find relevant sections. The log file is your full record; you can search it without re-running. +- **Prefer targeted tier runs** (added 2026-06-27 per user directive). Do NOT run the full 11-tier batch for every verification. Run only the tiers relevant to the current task (e.g., `--tier tier3` or `--filter test_`). The full batch is for the USER to run after merge review, not for Tier 2's per-task verification. - **Default branch:** `master` (this repo never had `main`) - **Line endings:** preserve existing (CRLF stays CRLF, LF stays LF) - **Throw-away scripts:** write to `scripts/tier2/artifacts//`, NOT the base directory diff --git a/conductor/tracks/directive_hotswap_harness_20260627/metadata.json b/conductor/tracks/directive_hotswap_harness_20260627/metadata.json new file mode 100644 index 00000000..5418645e --- /dev/null +++ b/conductor/tracks/directive_hotswap_harness_20260627/metadata.json @@ -0,0 +1,108 @@ +{ + "track_id": "directive_hotswap_harness_20260627", + "name": "Directive Hot-Swap Harness (OpenCode Directive Presets)", + "status": "active", + "branch": "master", + "created": "2026-06-27", + "owner": "Tier 1 (initialized); implementation delegated to Tier 2/3.", + "blocked_by": [], + "blocks": ["directive_encoding_experiments (future; alternative v2+ variant authoring)", "manual_slop_directive_lab (future; GUI integration)"], + "scope": { + "new_files": [ + "conductor/directives/<48 directive directories>/v1.md (48 files)", + "conductor/directives/presets/current_baseline.md", + "docs/reports/TRACK_COMPLETION_directive_hotswap_harness_20260627.md" + ], + "modified_files": [ + ".opencode/agents/tier1-orchestrator.md (replace hardcoded reading list with warm with:)", + ".opencode/agents/tier2-tech-lead.md (same)", + ".opencode/agents/tier3-worker.md (same)", + ".opencode/agents/tier4-qa.md (same)", + "conductor/tier2/agents/tier2-autonomous.md (same)" + ], + "deleted_files": [] + }, + "estimated_effort": { + "method": "scope (per workflow.md Tier 1 Track Initialization Rules. NO day estimates.)", + "phase_1": "10 steps: harvest 48 directives from doc tree into conductor/directives/ with exact source file:line refs", + "phase_2": "8 steps: baseline preset + 5 role-prompt warm with: updates", + "phase_3": "4 steps: verification + end-of-track report" + }, + "verification_criteria": [ + "48 directive directories exist under conductor/directives/, each with a v1.md file", + "Each v1.md has a header annotating the source location (file:line) and why this iteration exists", + "conductor/directives/presets/current_baseline.md exists and lists all 48 directives", + "All 5 tier role prompts have a 'warm with: conductor/directives/presets/current_baseline.md' line", + "Non-directive reads (AGENTS.md, workflow.md, edit_workflow.md, forbidden-files.txt, guide_*.md) remain hardcoded in the role prompts", + "Original docs are NOT modified (conductor/directives/ is a parallel structure)", + "No scripts, no TOML, no build steps — markdown-only", + "docs/reports/TRACK_COMPLETION_directive_hotswap_harness_20260627.md exists" + ], + "regressions_and_pre_existing_failures": [], + "pre_existing_failures_remaining": [], + "deferred_to_followup_tracks": [ + { + "title": "Alternative encoding authoring (v2+ variants)", + "description": "Author v2_rationale_first.md, v3_before_after.md, v4_tabular.md etc. per directive. The actual experimentation.", + "track_status": "not yet initialized" + }, + { + "title": "Manual Slop Directive Lab (GUI integration)", + "description": "A Directive Lab panel in Manual Slop for virtualized directive selection + context aggregation.", + "track_status": "not yet initialized" + }, + { + "title": "Token-cost analysis tooling", + "description": "Measure token cost per directive variant. Compare compliance vs token cost.", + "track_status": "not yet initialized" + }, + { + "title": "Automated compliance testing", + "description": "Test harness to measure LLM compliance per encoding (does the LLM follow the directive?).", + "track_status": "not yet initialized" + }, + { + "title": "Video Analysis Campaign 2 (4 new videos)", + "description": "Separate campaign; follows the 3-pass pattern. May inform alternative encoding strategies.", + "track_status": "not yet initialized; separate track" + } + ], + "risk_register": [ + { + "id": "R1", + "description": "Harvest completeness: directives embedded in prose may be missed", + "likelihood": "medium", + "impact": "the baseline preset is incomplete; some directives are not swappable", + "mitigation": "systematic combing of the entire doc tree with grep; the plan's Step 1.1-1.10 cover every doc file identified in the spec's source list" + }, + { + "id": "R2", + "description": "Granularity ambiguity: some directives overlap (e.g., ban_dict_any + typed_dataclass_fields are two sides of the same coin)", + "likelihood": "medium", + "impact": "the directive count is inflated by overlapping directives; preset becomes verbose", + "mitigation": "the 48-directive list is the initial best-guess; granularity is resolved iteratively as the user experiments. Merging directives is a future preset edit, not a blocker." + }, + { + "id": "R3", + "description": "LLM doesn't follow the warm with: instruction reliably", + "likelihood": "low", + "impact": "the LLM doesn't read the preset or the variant files; directives are missing from context", + "mitigation": "the instruction is simple (read a file, read the files it lists) and uses the existing file-reading behavior. The Step 3.2 manual verification catches this." + }, + { + "id": "R4", + "description": "Role-prompt update breaks existing Tier 2 autonomous runs", + "likelihood": "low", + "impact": "Tier 2 starts reading a different set of files; behavior changes", + "mitigation": "the current_baseline preset lists the exact same directives that were hardcoded. The change is structural (where the list lives), not semantic (what the directives say)." + } + ], + "campaign_context": { + "campaign_name": "Directive Encoding Campaign (Campaign A)", + "track_1": "directive_hotswap_harness_20260627 (THIS; harvest + scaffold + baseline preset + role-prompt bootstrap)", + "track_2": "directive_encoding_experiments (future; v2+ variant authoring + preset experimentation)", + "track_3": "manual_slop_directive_lab (future; GUI integration)", + "sibling_campaign": "Video Analysis Campaign 2 (Campaign B; 4 new videos; separate track)", + "cross_campaign_relationship": "Intellectual cross-pollination; no hard dependency. Video insights may surface alternative encoding strategies. The harness design mirrors the video campaign's deobfuscation pattern (same content, different encoding)." + } +} \ No newline at end of file diff --git a/conductor/tracks/directive_hotswap_harness_20260627/plan.md b/conductor/tracks/directive_hotswap_harness_20260627/plan.md new file mode 100644 index 00000000..d2ee6574 --- /dev/null +++ b/conductor/tracks/directive_hotswap_harness_20260627/plan.md @@ -0,0 +1,490 @@ +# Directive Hot-Swap Harness Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Build a directive hot-swap harness that lets the user maintain alternative encodings of the same directive as separate files, compose them into named presets (markdown bills of materials), and hot-swap which preset is active via a single `warm with: ` instruction in the role prompt or session message. + +**Architecture:** A `conductor/directives/` directory tree where each directive is a subdirectory and each encoding variant is a file (`v1.md`, `v2_