From 0b03b612b952c3ce2ab7270d7c46146a03e27ed3 Mon Sep 17 00:00:00 2001
From: Ed_ <edwardgz@gmail.com>
Date: Sun, 1 Mar 2026 10:16:38 -0500
Subject: [PATCH] chore: Wire architecture docs into mma_exec.py and workflow
 delegation prompts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

mma_exec.py changes:
- get_role_documents: Tier 1 now gets docs/guide_architecture.md + guide_mma.md
  (was: only product.md). Tier 2 gets same (was: only tech-stack + workflow).
  Tier 3 gets guide_architecture.md (was: only workflow.md — workers modifying
  gui_2.py had zero knowledge of threading model). Tier 4 gets guide_architecture.md
  (was: nothing).
- Tier 3 system directive: Added ARCHITECTURE REFERENCE callout, CRITICAL
  THREADING RULE (never write GUI state from background thread), TASK FORMAT
  instruction (follow WHERE/WHAT/HOW/SAFETY from surgical tasks), and
  py_get_definition to tool list.
- Tier 4 system directive: Added ARCHITECTURE REFERENCE callout and instruction
  to trace errors through thread domains documented in guide_architecture.md.

conductor/workflow.md changes:
- Red Phase delegation prompt: Replaced 'with a prompt to create tests' with
  surgical prompt format example showing WHERE/WHAT/HOW/SAFETY.
- Green Phase delegation prompt: Replaced 'with a highly specific prompt' with
  surgical prompt format example with exact line refs and API calls.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 conductor/workflow.md |  4 ++--
 scripts/mma_exec.py   | 20 +++++++++++++++-----
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/conductor/workflow.md b/conductor/workflow.md
index 5e012f6..bd7e2b5 100644
--- a/conductor/workflow.md
+++ b/conductor/workflow.md
@@ -36,14 +36,14 @@ All tasks follow a strict lifecycle:
 4. **Write Failing Tests (Red Phase):**
    - **Pre-Delegation Checkpoint:** Before spawning a worker for dangerous or non-trivial changes, ensure your current progress is staged (`git add .`) or committed. This prevents losing iterations if a sub-agent incorrectly uses `git restore`.
    - **Code Style:** ALWAYS explicitly mention "Use exactly 1-space indentation for Python code" when prompting a sub-agent.
-   - **Delegate Test Creation:** Do NOT write test code directly. Spawn a Tier 3 Worker (`python scripts/mma_exec.py --role tier3-worker "[PROMPT]"`) with a prompt to create the necessary test files and unit tests based on the task criteria. (If repeating due to failures, pass `--failure-count X` to switch to a more capable model).
+   - **Delegate Test Creation:** Do NOT write test code directly. Spawn a Tier 3 Worker (`python scripts/mma_exec.py --role tier3-worker "[PROMPT]"`) with a **surgical prompt** specifying WHERE (file:line range), WHAT (test to create), HOW (which assertions/fixtures to use), and SAFETY (thread constraints if applicable). Example: `"Write tests in tests/test_cost_tracker.py for cost_tracker.py:estimate_cost(). Test all model patterns in MODEL_PRICING dict. Assert unknown model returns 0. Use 1-space indentation."` (If repeating due to failures, pass `--failure-count X` to switch to a more capable model).
    - Take the code generated by the Worker and apply it.
    - **CRITICAL:** Run the tests and confirm that they fail as expected. This is the "Red" phase of TDD. Do not proceed until you have failing tests.
 
 5. **Implement to Pass Tests (Green Phase):**
    - **Pre-Delegation Checkpoint:** Ensure current progress is staged or committed before delegating.
    - **Code Style:** ALWAYS explicitly mention "Use exactly 1-space indentation for Python code" when prompting a sub-agent.
-   - **Delegate Implementation:** Do NOT write the implementation code directly. Spawn a Tier 3 Worker (`python scripts/mma_exec.py --role tier3-worker "[PROMPT]"`) with a highly specific prompt to write the minimum amount of application code necessary to make the failing tests pass. (If repeating due to failures, pass `--failure-count X` to switch to a more capable model).
+   - **Delegate Implementation:** Do NOT write the implementation code directly. Spawn a Tier 3 Worker (`python scripts/mma_exec.py --role tier3-worker "[PROMPT]"`) with a **surgical prompt** specifying WHERE (file:line range to modify), WHAT (the specific change), HOW (which API calls, data structures, or patterns to use), and SAFETY (thread-safety constraints). Example: `"In gui_2.py _render_mma_dashboard (lines 2685-2699), extend the token usage table from 3 to 5 columns. Add 'Model' and 'Est. Cost' using imgui.table_setup_column(). Call cost_tracker.estimate_cost(model, input_tokens, output_tokens). Use 1-space indentation."` (If repeating due to failures, pass `--failure-count X` to switch to a more capable model).
    - Take the code generated by the Worker and apply it.
    - Run the test suite again and confirm that all tests now pass. This is the "Green" phase.
 
diff --git a/scripts/mma_exec.py b/scripts/mma_exec.py
index 48d4558..2f549d4 100644
--- a/scripts/mma_exec.py
+++ b/scripts/mma_exec.py
@@ -73,11 +73,15 @@ def get_model_for_role(role: str, failure_count: int = 0) -> str:
 
 def get_role_documents(role: str) -> list[str]:
  if role == 'tier1-orchestrator' or role == 'tier1':
-  return ['conductor/product.md', 'conductor/product-guidelines.md']
+  return ['conductor/product.md', 'conductor/product-guidelines.md',
+   'docs/guide_architecture.md', 'docs/guide_mma.md']
  elif role == 'tier2-tech-lead' or role == 'tier2':
-  return ['conductor/tech-stack.md', 'conductor/workflow.md']
+  return ['conductor/tech-stack.md', 'conductor/workflow.md',
+   'docs/guide_architecture.md', 'docs/guide_mma.md']
  elif role == 'tier3-worker' or role == 'tier3':
-  return ['conductor/workflow.md']
+  return ['docs/guide_architecture.md']
+ elif role == 'tier4-qa' or role == 'tier4':
+  return ['docs/guide_architecture.md']
  return []
 
 def log_delegation(role: str, full_prompt: str, result: str | None = None, summary_prompt: str | None = None) -> str:
@@ -165,16 +169,22 @@ def execute_agent(role: str, prompt: str, docs: list[str], debug: bool = False,
   "Your goal is to implement specific code changes or tests based on the provided task. " \
   "CRITICAL CODE STYLE RULE: ALL Python code MUST use exactly 1 SPACE for indentation. DO NOT use 4 spaces or tabs. " \
   "You have access to tools for reading and writing files (e.g., read_file, write_file, replace), " \
-  "codebase investigation (discovered_tool_py_get_code_outline, discovered_tool_py_get_skeleton, discovered_tool_py_find_usages, discovered_tool_py_get_imports, discovered_tool_py_check_syntax, discovered_tool_get_tree), " \
+  "codebase investigation (discovered_tool_py_get_code_outline, discovered_tool_py_get_skeleton, discovered_tool_py_get_definition, discovered_tool_py_find_usages, discovered_tool_py_get_imports, discovered_tool_py_check_syntax, discovered_tool_get_tree), " \
   "version control (discovered_tool_get_git_diff), and web tools (discovered_tool_web_search, discovered_tool_fetch_url). " \
   "You CAN execute PowerShell scripts via discovered_tool_run_powershell for verification and testing. " \
+  "ARCHITECTURE REFERENCE: docs/guide_architecture.md contains the threading model, cross-thread data structures, " \
+  "frame-sync mechanism (_process_pending_gui_tasks action catalog), AI client architecture, and HITL Execution Clutch. " \
+  "CRITICAL THREADING RULE: NEVER write GUI state from a background thread. Push task dicts to _pending_gui_tasks with the lock. " \
+  "TASK FORMAT: Your task will specify WHERE (file:line), WHAT (change), HOW (API calls), and SAFETY (thread constraints). Follow these exactly. " \
   "Follow TDD and return success status or code changes. No pleasantries, no conversational filler."
  elif role in ['tier4', 'tier4-qa']:
   system_directive = "STRICT SYSTEM DIRECTIVE: You are a stateless Tier 4 QA Agent. " \
   "Your goal is to analyze errors, summarize logs, or verify tests. " \
-  "You have access to tools for reading files, exploring the codebase (discovered_tool_py_get_code_outline, discovered_tool_py_get_skeleton, discovered_tool_py_find_usages, discovered_tool_py_get_imports), " \
+  "You have access to tools for reading files, exploring the codebase (discovered_tool_py_get_code_outline, discovered_tool_py_get_skeleton, discovered_tool_py_get_definition, discovered_tool_py_find_usages, discovered_tool_py_get_imports), " \
   "version control (discovered_tool_get_git_diff), and web tools (discovered_tool_web_search, discovered_tool_fetch_url). " \
   "You CAN execute PowerShell scripts via discovered_tool_run_powershell for diagnostics. " \
+  "ARCHITECTURE REFERENCE: docs/guide_architecture.md contains the threading model and data flow. " \
+  "When analyzing errors, trace the data flow through the thread domains (GUI main, asyncio worker, HookServer) documented there. " \
   "ONLY output the requested analysis. No pleasantries."
  else:
   system_directive = f"STRICT SYSTEM DIRECTIVE: You are a stateless {role}. " \