diff --git a/conductor/tracks/fable_review_20260617/metadata.json b/conductor/tracks/fable_review_20260617/metadata.json new file mode 100644 index 00000000..11037897 --- /dev/null +++ b/conductor/tracks/fable_review_20260617/metadata.json @@ -0,0 +1,91 @@ +{ + "track_id": "fable_review_20260617", + "name": "Fable System Prompt Review (Critical Analysis)", + "initialized": "2026-06-17", + "owner": "tier1-orchestrator (spec + synthesis); tier2-tech-lead (dispatch + QA)", + "priority": "medium", + "status": "spec_approved", + "type": "research-only (critical-analysis deliverable; no src/ changes, no tests/ changes, no new deps)", + "domain": "meta-tooling (the report is a critical-analysis deliverable; the track produces no Application code)", + "user_hard_rule": "docs/artifacts/Fable System Prompt.txt is NEVER committed. The artifact stays at that local path; the report and the cluster sub-references quote line ranges (≤15 words per quote) but the file does not enter git. Do not modify .gitignore for this; the rule is enforced by the implementer's discipline, not by a tracked file. git add . MUST be inspected before each commit in this track.", + "scope": { + "new_files": [ + "conductor/tracks/fable_review_20260617/spec.md", + "conductor/tracks/fable_review_20260617/metadata.json", + "conductor/tracks/fable_review_20260617/state.toml", + "conductor/tracks/fable_review_20260617/research/cluster_1_product_branding.md", + "conductor/tracks/fable_review_20260617/research/cluster_2_refusal_architecture.md", + "conductor/tracks/fable_review_20260617/research/cluster_3_user_wellbeing_watchdog.md", + "conductor/tracks/fable_review_20260617/research/cluster_4_tone_and_formatting.md", + "conductor/tracks/fable_review_20260617/research/cluster_5_mistakes_and_criticism.md", + "conductor/tracks/fable_review_20260617/research/cluster_6_evenhandedness.md", + "conductor/tracks/fable_review_20260617/research/cluster_7_epistemic_discipline.md", + "conductor/tracks/fable_review_20260617/research/cluster_8_memory_and_storage.md", + "conductor/tracks/fable_review_20260617/research/cluster_9_computer_use.md", + "conductor/tracks/fable_review_20260617/research/cluster_10_mcp_app_suggestions.md", + "conductor/tracks/fable_review_20260617/report.md", + "conductor/tracks/fable_review_20260617/comparison_table.md", + "conductor/tracks/fable_review_20260617/decisions.md", + "conductor/tracks/fable_review_20260617/nagent_takeaways_fable_20260617.md" + ], + "modified_files": [ + "conductor/tracks.md (register the track in the appropriate section)" + ], + "deleted_files": [], + "external_resources": [ + "docs/artifacts/Fable System Prompt.txt (LOCAL-ONLY; 1585 lines, 120KB; the subject of the review; NEVER COMMITTED)", + "conductor/tracks/nagent_review_20260608/ (the nagent corpus; 11 files; all in scope)" + ] + }, + "blocked_by": [], + "blocks": [ + "the deferred nagent-rebuild (the recommendations in decisions.md are inputs to that future track; the rebuild is not this track)" + ], + "estimated_phases": 7, + "tshirt_size": "XL (similar to the nagent_review v2.3 rewrite at 4,969 lines; 10 cluster sub-reports + 17-section synthesis report + 3 side artifacts = ~10,300 LOC total)", + "estimated_effort": "scope: 1 spec + 1 metadata.json + 1 state.toml + 10 cluster sub-reports (~3,500 LOC) + 1 main report (4,800 LOC) + 3 side artifacts (1,350 LOC) = T-shirt size XL. Method: scope (per conductor/workflow.md §Tier 1 Track Initialization Rules). NO day estimates.", + "phases": [ + {"id": 1, "name": "Initialize track + skeletons", "tshirt": "S", "sub_agents": 0}, + {"id": 2, "name": "Dispatch 10 cluster sub-agents in parallel", "tshirt": "L", "sub_agents": 10}, + {"id": 3, "name": "Tier 1 writes 17 synthesis sections (max-token-output strategy)", "tshirt": "XL", "sub_agents": 0}, + {"id": 4, "name": "Tier 1 writes 3 side artifacts", "tshirt": "M", "sub_agents": 0}, + {"id": 5, "name": "Self-review per the brainstorming skill", "tshirt": "S", "sub_agents": 0}, + {"id": 6, "name": "User review gate", "tshirt": "S", "sub_agents": 0}, + {"id": 7, "name": "Final commit + register track in conductor/tracks.md", "tshirt": "S", "sub_agents": 0} + ], + "spec": "spec.md", + "plan": "to be authored by the writing-plans skill after spec approval", + "verification_criteria": [ + "All 10 cluster sub-reports exist at conductor/tracks/fable_review_20260617/research/cluster_N_*.md and are 200-500 lines each.", + "Every cluster sub-report cites specific Fable line numbers, project file:line refs, and nagent section refs.", + "Every cluster sub-report has a verdict (Useful / Persona Performance / Anti-User / Mixed) with justification.", + "Every cluster sub-report has a 'Synthesis notes for the Tier 1 writer' section.", + "The synthesis report conductor/tracks/fable_review_20260617/report.md has all 17 sections present and non-empty.", + "The synthesis report is >3500 LOC.", + "Every synthesis section references its source cluster(s) by file:line.", + "The 3 side artifacts exist at conductor/tracks/fable_review_20260617/{comparison_table.md, decisions.md, nagent_takeaways_fable_20260617.md}.", + "comparison_table.md has ~100 rows.", + "decisions.md has 15-20 concrete recommendations.", + "nagent_takeaways_fable_20260617.md is ~150 lines.", + "The Fable artifact at docs/artifacts/Fable System Prompt.txt was NEVER committed. Verification command: git log --all --full-history -- 'docs/artifacts/Fable*' returns zero entries.", + "Self-review pass complete (placeholder scan, internal consistency, scope check, ambiguity check).", + "User has reviewed and approved the final report.", + "conductor/tracks.md is updated to register the track.", + "All commits are per-file atomic with git notes.", + "state.toml final state is current_phase = 7 and the track is in the appropriate section per the convention." + ], + "pre_existing_failures_remaining": [], + "deferred_to_followup_tracks": [ + {"title": "Deferred nagent-rebuild (Manual Slop agent-directive overhaul)", "description": "User-deferred 1-2 weeks (per 2026-06-17 user message). The Fable review's decisions.md is one of several inputs to this rebuild; the rebuild itself is not this track.", "track_status": "user-deferred (no track yet)"} + ], + "risk_register": [ + {"name": "Fable prompt grows/evolves during the track", "likelihood": "low", "impact": "low", "mitigation": "The artifact is a snapshot at 2026-06-17; we note the date. If the user has a newer version, the track re-dispatches the cluster agents."}, + {"name": "10 sub-agents in parallel = high token cost", "likelihood": "medium", "impact": "medium (cost)", "mitigation": "Each sub-agent gets a 500-line output budget; the dispatch is mma_exec.py --role tier3-worker with explicit context files. Total cluster output: ~3,500 LOC across 10 files."}, + {"name": "Tier 1's synthesis hits context pressure after 17 sections", "likelihood": "medium", "impact": "high (track stalls mid-synthesis)", "mitigation": "Per-section commits serve as a rollback point; if Tier 1 hits pressure mid-section, the section can be handed off to a fresh Tier 1 with the cluster reports + the previous sections as context."}, + {"name": "User disagrees with a verdict", "likelihood": "low", "impact": "low", "mitigation": "The user-review gate at the end of phase 6 catches this; revisions are local."}, + {"name": "Cluster sub-agents over-quote Fable (copyright)", "likelihood": "low", "impact": "medium", "mitigation": "Each cluster's acceptance check enforces the ≤15-word quote discipline; Fable's own rule applied externally."}, + {"name": "Fable artifact accidentally committed", "likelihood": "low", "impact": "high (user's hard rule violated)", "mitigation": "The Fable artifact is NEVER in the same git add as anything else. Per-commit git status inspection. Final verification: git log --all --full-history -- 'docs/artifacts/Fable*' returns zero."}, + {"name": "Tier 2 doesn't dispatch cluster sub-agents correctly", "likelihood": "medium", "impact": "medium", "mitigation": "The Tier 1's spec includes the read budget per sub-agent (§5). The Tier 2's plan must include explicit context-file lists per dispatch."}, + {"name": "Tier 1's report deviates from the cluster verdicts (editorial drift)", "likelihood": "low", "impact": "low", "mitigation": "The synthesis report's verdicts are anchored to the cluster reports' verdicts; if a synthesis section changes a verdict, it must explicitly note the override."} + ] +} diff --git a/conductor/tracks/fable_review_20260617/spec.md b/conductor/tracks/fable_review_20260617/spec.md new file mode 100644 index 00000000..f1996856 --- /dev/null +++ b/conductor/tracks/fable_review_20260617/spec.md @@ -0,0 +1,420 @@ +# Track: Fable System Prompt Review (Critical Analysis) + +**Status:** Spec approved 2026-06-17 +**Initialized:** 2026-06-17 +**Owner:** Tier 1 Orchestrator (spec + synthesis); Tier 2 Tech Lead (dispatch + QA) +**Priority:** Medium (user-requested critical review; informs the deferred nagent-rebuild, scheduled 1-2 weeks out) +**Type:** Research-only (no `src/` changes, no `tests/` changes, no new deps, no agent-directive modifications) +**Domain:** Meta-Tooling (the report is a *critical-analysis deliverable*; the track produces no Application code) + +> **Purpose.** This track produces a single critical-analysis report: a side-by-side comparison of Anthropic's Claude Fable 5 system prompt (the public version of "Mythos") against Manual Slop's existing agent-directive corpus and Mike Acton's nagent patterns, with verdicts on which Fable patterns are *generally useful*, which are *persona performance* (irrelevant constraint dressing), and which are *anti-user watch-dogging* (the model is text generation, not a clinician). The report is the *evidence document* the user can use to argue against Fable-style "helpful, harmless, honest" framing in agent systems. The track is *research-only*; no edits to the project's directives, no follow-up implementation. + +> **Companion doc.** The actual report is at `conductor/tracks/fable_review_20260617/report.md`. This `spec.md` is the conductor/track wrapper: the design intent, the cluster architecture, the synthesis plan, the verification criteria, the out-of-scope notes, and the connection to the deferred nagent-rebuild. + +> **Hard rule (the user was explicit).** `docs/artifacts/Fable System Prompt.txt` is **never committed**. The artifact stays at that local path; the report and the cluster sub-references quote line ranges (≤15 words per quote, the same discipline Fable itself applies to its own search results) but the file does not enter git. **Do not** modify `.gitignore` for this; the rule is enforced by the implementer's discipline, not by a tracked file. `git add .` MUST be inspected before each commit in this track. + +--- + +## 1. Overview + +This track produces a critical analysis of Anthropic's Claude Fable 5 system prompt (1585 lines, 120KB), comparing it against: + +1. **Manual Slop's existing agent-directive corpus** — `AGENTS.md` (200 lines), `conductor/*.md` (workflow.md, product.md, product-guidelines.md, tech-stack.md, edit_workflow.md, tracks.md, index.md), `conductor/code_styleguides/*.md` (11 files), `.opencode/agents/*.md` (6 files), `.opencode/commands/*.md` (9 files), `docs/*.md` (40+ files including 36 `guide_*.md`), and the superpowers-plugin content loaded via the opencode `skill` tool. +2. **Mike Acton's nagent reports** in `conductor/tracks/nagent_review_20260608/` — the original `nagent_takeaways_20260608.md`, the `report.md`, the `decisions.md`, the `comparison_table.md`, and the v2 series (`nagent_review_v2_20260612.md`, `v2_1`, `v2_2`, `v2_3`). + +The analytical framework is the user's own framing: **how much of Fable is generally useful vs. how much is "nerf on the model's capabilities" via persona constraint, anti-user watch-dogging, or fake-clinician framing?** + +The report follows the nagent_review track's distributed-sub-agent pattern: 10 cluster sub-reports written in parallel by Tier 3 workers, then synthesized by Tier 1 in 17+ section-passes using a max-token-output strategy to hit **>3500 LOC total**. + +### 1.1 What this track produces + +| Artifact | Purpose | Owner | Approx LOC | +|---|---|---|---| +| `spec.md` | This file — the track design. | Tier 1 | ~400 | +| `metadata.json` | The track metadata (id, scope, blocks, etc.). | Tier 1 | ~50 | +| `state.toml` | The track state (current_phase, task tracking). | Tier 1 | ~80 | +| `research/cluster_1_product_branding.md` | Cluster 1 sub-report. | Tier 3 sub-agent | ~300 | +| `research/cluster_2_refusal_architecture.md` | Cluster 2 sub-report. | Tier 3 sub-agent | ~400 | +| `research/cluster_3_user_wellbeing_watchdog.md` | Cluster 3 sub-report. | Tier 3 sub-agent | ~400 | +| `research/cluster_4_tone_and_formatting.md` | Cluster 4 sub-report. | Tier 3 sub-agent | ~300 | +| `research/cluster_5_mistakes_and_criticism.md` | Cluster 5 sub-report. | Tier 3 sub-agent | ~250 | +| `research/cluster_6_evenhandedness.md` | Cluster 6 sub-report. | Tier 3 sub-agent | ~350 | +| `research/cluster_7_epistemic_discipline.md` | Cluster 7 sub-report. | Tier 3 sub-agent | ~400 | +| `research/cluster_8_memory_and_storage.md` | Cluster 8 sub-report. | Tier 3 sub-agent | ~400 | +| `research/cluster_9_computer_use.md` | Cluster 9 sub-report. | Tier 3 sub-agent | ~350 | +| `research/cluster_10_mcp_app_suggestions.md` | Cluster 10 sub-report. | Tier 3 sub-agent | ~300 | +| `report.md` | The main synthesis report (17 sections, >3500 LOC). | Tier 1 | ~4800 | +| `comparison_table.md` | Flat side-by-side verdict table. | Tier 1 | ~700 | +| `decisions.md` | Recommendations for the deferred nagent-rebuild. | Tier 1 | ~500 | +| `nagent_takeaways_fable_20260617.md` | Fable-specific extension to `nagent_takeaways_20260608.md`. | Tier 1 | ~150 | + +**Total new files:** 17 (16 markdown + 1 metadata.json + 1 state.toml). Approx total LOC: ~10,300. + +### 1.2 Non-Goals + +- **Not** modifying any agent-directive file in the project. The recommendations go in `decisions.md` for the user's deferred nagent-rebuild (1-2 weeks out). +- **Not** building any recommendation. The deferred rebuild is its own track. +- **Not** comparing Fable to other commercial system prompts (OpenAI, Google, xAI). Out of scope; Fable is the named subject. +- **Not** reading every line of every project file. Cluster sub-agents read the relevant sections of the relevant files; full-file reads are unnecessary and would waste context. +- **Not** committing the Fable artifact. The artifact stays at `docs/artifacts/Fable System Prompt.txt`; clusters quote line ranges but the file itself never enters git. +- **Not** adding new `src/` code, new tests, `pyproject.toml` dependencies, or `scripts/` files. +- **Not** running automated tests. The track is research-only; verification is the brainstorming-skill self-review plus user review. + +--- + +## 2. Current State Audit (as of commit `HEAD`, 2026-06-17) + +### 2.1 Already Implemented (DO NOT re-implement) + +The Fable artifact exists at `docs/artifacts/Fable System Prompt.txt` (120,039 bytes, 1585 lines). The cluster sub-agents and the synthesis report reference it by file path + line range. The artifact is the *only* Fable source material; nothing else Fable-specific is in the project. + +The nagent_review corpus is at `conductor/tracks/nagent_review_20260608/`: + +| File | LOC | Bytes | Purpose | +|---|---|---|---| +| `nagent_review_v2_3_20260612.md` | 4969 | 276,531 | The latest full rewrite (v2.3, 2026-06-12). The 14 patterns + the 16 future-track candidates. | +| `nagent_review_v2_20260612.md` | 1335 | 68,428 | The v2 draft (preserved per user). | +| `nagent_review_v2_1_20260612.md` | 1197 | 58,844 | The user-revised v2.1 (CLAUDE.md → AGENTS.md swap, RAG reframe, cache TTL GUI controls). | +| `nagent_review_v2_2_20260612.md` | 712 | 35,356 | The v2.2 incremental. | +| `nagent_takeaways_20260608.md` | 599 | 31,238 | The original 10 takeaways from the v1 review. | +| `report.md` | 1024 | 52,544 | The v1 14-section deep-dive. | +| `decisions.md` | 286 | 18,433 | The 10 future-track candidates from v1. | +| `comparison_table.md` | 211 | 10,849 | The flat side-by-side table from v1. | +| `spec.md` | 240 | 21,173 | The v1 spec. | +| `state.toml` | — | 19,477 | The track state. | +| `metadata.json` | — | 20,034 | The track metadata. | + +The agent-directive files that the clusters will reference (per the user's scope clarification): + +| Directory | File count | Approx total LOC | +|---|---|---| +| `AGENTS.md` (root) | 1 | ~200 | +| `conductor/*.md` | 7 | ~3000 | +| `conductor/code_styleguides/*.md` | 11 | ~2400 | +| `.opencode/agents/*.md` | 6 | ~1100 | +| `.opencode/commands/*.md` | 9 | ~700 | +| `docs/*.md` (excluding `superpowers/`) | 40+ | ~16,000 | +| `conductor/tracks/nagent_review_20260608/*` | 11 | ~10,500 | +| superpowers plugin content (loaded via `skill` tool) | — | n/a (in-context only) | + +### 2.2 Gaps to Fill (This Track's Scope) + +- **The synthesis report.** A 17-section, >3500-LOC critical analysis of Fable against the project's directives and nagent patterns. Does not exist. +- **The 10 cluster sub-reports.** Distributed parallel sub-agent output. Do not exist. +- **The comparison table.** A flat verdict-by-verdict cross-reference of Fable's themes against the project's themes. Does not exist. +- **The decisions file.** Concrete recommendations for the deferred nagent-rebuild. Does not exist. +- **The nagent_takeaways extension.** A Fable-specific addendum to the v1 takeaways file. Does not exist. + +### 2.3 Pre-Existing Conditions the Track Must Respect + +- The deferred nagent-rebuild: per the user, the project's agent directives are not yet overhauled based on `nagent_review_v2_3_20260612.md`. The Fable review is a *parallel* analysis that will inform (but not consume) the deferred rebuild. +- The data-oriented error handling convention: the project's `Result[T]` / `ErrorInfo` convention (per `conductor/code_styleguides/error_handling.md`) is the data-grounded contrast to Fable's persona-driven error-handling guidance. The synthesis report uses the convention's terminology when discussing Fable's error responses. +- The "less Python does, the better" heuristic: the synthesis report is itself a critical-analysis document; the report's verbosity is deliberate (per the user's max-token-output strategy) but the *conclusions* should be terse and actionable. + +--- + +## 3. Goals (Priority Order) + +| Priority | Goal | Rationale | +|---|---|---| +| **A (primary value)** | The synthesis report (`report.md`, >3500 LOC) covers all 17 sections, each with a clear verdict on every Fable pattern in scope. | The report is the deliverable. | +| **A (primary value)** | The 10 cluster sub-reports (`research/cluster_*.md`) cite specific Fable line numbers, project file:line refs, and nagent section refs. | The clusters are the evidence base. The synthesis report cites them by file:line. | +| **A (primary value)** | The "Useful vs Persona vs Anti-User" framework is applied consistently to every cluster. Every Fable pattern gets a verdict; no pattern is left unjudged. | The framework is the analytical lens the user asked for. | +| **B (analytical)** | The 3 side artifacts (`comparison_table.md`, `decisions.md`, `nagent_takeaways_fable_20260617.md`) are produced and consistent with the synthesis report. | The side artifacts make the synthesis referenceable and actionable for the deferred rebuild. | +| **B (process)** | The cluster sub-agents enforce the ≤15-word quote discipline (Fable's own rule applied externally). No long paraphrased passages that mirror Fable's structure (also Fable's rule, per `search_instructions`). | Defensive against the Fable copyright pattern; the report is "evidence document" not "Fable reproduction." | +| **B (process)** | Each cluster is independently verifiable: a reader can re-derive the verdict by reading the cluster sub-report + the cited Fable lines + the cited project files. | The report's credibility depends on traceability. | +| **C (housekeeping)** | `conductor/tracks.md` is updated to register the track in the "Recently Completed" section when the track ships. | Standard per-track convention. | +| **C (housekeeping)** | The Fable artifact at `docs/artifacts/Fable System Prompt.txt` is **not** committed. The track's git history contains zero references to the artifact's bytes (only to the path for citation). | The user's hard rule. | + +--- + +## 4. Architecture (the cluster + synthesis design) + +### 4.1 Cluster Sub-Report Template (per `research/cluster_N_*.md`) + +Each cluster follows the `cluster_8_metadesk.md` template from `intent_dsl_survey_20260612/`: + +```markdown +# Cluster N: {Title} + +**Sub-agent dispatch:** Tier 3 Worker (2026-06-17). Read-only research task. +**Sources read:** +- `docs/artifacts/Fable System Prompt.txt` lines X-Y +- {project file:line refs} +- {nagent_review file:line refs} + +--- + +## 1. What Fable says +{Verbatim quotes ≤15 words with line numbers; paraphrases otherwise.} + +## 2. What this project does +{Citations from AGENTS.md, conductor/*.md, .opencode/*, code_styleguides/*.md, docs/*.md} + +## 3. What nagent does +{Citations from nagent_review_v2_3_20260612.md and friends.} + +## 4. Verdict +{Useful / Persona Performance / Anti-User / Mixed, with 1-paragraph justification.} + +## 5. Synthesis notes for the Tier 1 writer +{Which synthesis report section(s) this cluster feeds; key claims to surface; quotes to use.} + +--- + +**Sub-report complete.** This is the evidence base for §{N} of `report.md`. +``` + +### 4.2 The Synthesis Report Plan (`report.md`, 17 sections, >3500 LOC) + +| § | Section | Approx LOC | Source clusters | Verdict orientation | +|---|---|---|---|---| +| 0 | TL;DR + Verdict Scorecard (1-page summary table) | 100 | All | (summary) | +| 1 | The 3 Sources (Fable, Manual Slop, nagent) — what's in scope | 200 | n/a | (framing) | +| 2 | The "Useful vs Persona vs Anti-User" Framework | 250 | n/a | (methodology) | +| 3 | Fable's Product Branding & "Helpful Assistant" Persona | 300 | 1 | Persona Performance | +| 4 | Fable's Refusal Architecture & "Safety Theater" | 350 | 2 | Anti-User + Persona | +| 5 | Fable's Mental-Health Watchdog Framing | 350 | 3 | Anti-User | +| 6 | Fable's Tone & Formatting Constraints | 250 | 4 | Useful + Persona | +| 7 | Fable's Mistake Handling | 200 | 5 | Persona | +| 8 | Fable's Evenhandedness & Contested Content | 300 | 6 | Persona + Useful caveats | +| 9 | Fable's Epistemic Discipline & Search Strategy | 350 | 7 | Useful | +| 10 | Fable's Memory System & Persistent Storage | 350 | 8 | Useful + nagent-stronger | +| 11 | Fable's Computer-Use / File Workflow | 300 | 9 | Useful + over-broad | +| 12 | Fable's MCP App Suggestions | 250 | 10 | Useful + over-engineered | +| 13 | The "Genuinely Useful" Patterns (Manual Slop should adopt) | 350 | 7-10 | Useful summary | +| 14 | The "Anti-User Watchdog" Patterns (Manual Slop should explicitly reject) | 350 | 2-6 | Anti-User summary | +| 15 | The "Persona Performance" Patterns (irrelevant to the rebuild) | 250 | 1, 4, 5, 8 | Persona summary | +| 16 | Recommendations for the deferred nagent-rebuild | 200 | All | Actionable | +| 17 | References (file:line index) | 150 | All | Index | +| **Total** | | **~4,800** | | | + +The "max token output strategy" works like this: each section is its own `write`/`manual-slop_edit_file` call by Tier 1, with the cluster reports + the previous sections loaded into context. 17 sections = 17 atomic commits (per `conductor/workflow.md` §"Task Workflow" step 9). + +### 4.3 The Cluster-to-Section Mapping + +The synthesis report's section count (17) is intentionally larger than the cluster count (10) so each cluster's evidence can be spread across multiple synthesis sections (e.g., Cluster 2 "refusal" feeds §4 directly and §14's anti-user summary; Cluster 7 "epistemic" feeds §9 directly and §13's useful summary). + +### 4.4 Tier 1's Workflow Per Section + +1. Read the relevant cluster sub-report(s) in full. +2. Read the cited Fable lines (via `manual-slop_get_file_slice`). +3. Read the cited project file lines (via `manual-slop_get_file_slice` or `manual-slop_py_get_definition` for code refs). +4. Read the cited nagent_review sections (via `manual-slop_get_file_slice`). +5. Write the synthesis section with a `write` or `manual-slop_set_file_slice` call. +6. Self-review the section for placeholders, internal consistency, scope, ambiguity. +7. Commit with a 1-3 sentence commit message; attach a git note summarizing the section. +8. Move to the next section. + +--- + +## 5. The 10 Cluster Specifications + +| # | Cluster | Fable source | Project refs | nagent refs | Sub-agent read budget | +|---|---|---|---|---|---| +| 1 | **Product Branding & "Helpful Assistant" Persona** | `Fable System Prompt.txt:1-31` (`product_information`) | `AGENTS.md` (root); `conductor/product.md`; `docs/Readme.md` (the "What This Is" framing) | n/a (nagent doesn't have product branding) | 600 lines | +| 2 | **Refusal Architecture & "Safety Theater"** | `Fable System Prompt.txt:32-53` (`refusal_handling`, `legal_and_financial_advice`) | `AGENTS.md` §"Critical Anti-Patterns"; `conductor/workflow.md` §"Skip-Marker Policy"; `conductor/code_styleguides/error_handling.md` | nagent §14 (Own the Inputs); nagent §2.1 (4 memory dimensions) | 800 lines | +| 3 | **User Wellbeing / Mental-Health Watchdog** | `Fable System Prompt.txt:78-110` (`user_wellbeing`) | `conductor/product-guidelines.md` §"AI-Optimized Compact Style"; `conductor/code_styleguides/agent_memory_dimensions.md`; `docs/guide_discussions.md` | nagent §2.1 (4 memory dimensions, esp. the knowledge dim); nagent §13 (Compaction) | 800 lines | +| 4 | **Tone & Formatting Constraints** | `Fable System Prompt.txt:54-77` (`tone_and_formatting`, `lists_and_bullets`); plus cross-ref to line 110's "no engagement" rule in `user_wellbeing` | `AGENTS.md` (root); `conductor/product-guidelines.md`; `.opencode/agents/tier*.md` | nagent §3.8 (CLAUDE.md / AGENTS.md @import pattern) | 600 lines | +| 5 | **Mistakes & Criticism Handling** | `Fable System Prompt.txt:134-140` (`responding_to_mistakes_and_criticism`) | `AGENTS.md` §"receiving-code-review"; `.opencode/agents/tier3-worker.md`; `conductor/workflow.md` §"Process Anti-Patterns" | nagent §5.5 (Self-review); nagent §3.4 (Compaction self-review) | 500 lines | +| 6 | **Evenhandedness & Contested Content** | `Fable System Prompt.txt:120-132` (`evenhandedness`) | `AGENTS.md` §"receiving-code-review"; `conductor/code_styleguides/rag_integration_discipline.md` | nagent §2.10 (RAG integration discipline) | 700 lines | +| 7 | **Epistemic Discipline & Search Strategy** | `Fable System Prompt.txt:142-150, 422-565` (`knowledge_cutoff`, `search_instructions`) | `conductor/code_styleguides/rag_integration_discipline.md`; `conductor/code_styleguides/cache_friendly_context.md`; `docs/guide_rag.md` | nagent §3.2 (Cache ordering); nagent §2.10 (RAG discipline); nagent §13 (Compaction) | 800 lines | +| 8 | **Memory System & Persistent Storage** | `Fable System Prompt.txt:152-236` (`memory_system`, `persistent_storage_for_artifacts`) | `src/models.py` (History); `docs/guide_discussions.md`; `conductor/code_styleguides/agent_memory_dimensions.md`; `docs/guide_knowledge_curation.md` | nagent §2.1 (4 memory dimensions); nagent §3.9 (Per-file knowledge notes) | 800 lines | +| 9 | **Computer-Use / Skills / File Workflow** | `Fable System Prompt.txt:287-420` (`computer_use`, `file_creation_advice`, `producing_outputs`) | `docs/guide_tools.md` (MCP tools); `conductor/tech-stack.md` (file system); `conductor/edit_workflow.md` | nagent §11 (Large files); nagent §12 (Tool discovery, `--description` self-describing) | 700 lines | +| 10 | **MCP App Suggestions & Third-Party Connectors** | `Fable System Prompt.txt:238-285` (`mcp_app_suggestions`) | `docs/guide_mcp_client.md`; `docs/guide_tools.md` §"MCP"; `docs/guide_state_lifecycle.md` §"Hook API" | nagent §12 (Tool discovery, `--description` self-describing); nagent §2.7 (Conversations are editable state) | 600 lines | + +**Sub-agent read budget total:** 6,900 lines across 10 sub-agents. Each sub-agent gets one `mma_exec.py --role tier3-worker` dispatch with explicit context files (the Fable slice + the project file refs + the nagent section refs) and an output budget of 300-500 lines per cluster. + +--- + +## 6. Functional Requirements + +### 6.1 Cluster Sub-Agent Output + +Each of the 10 cluster sub-reports MUST: + +1. Cite Fable lines verbatim (≤15 words per quote) with `docs/artifacts/Fable System Prompt.txt` file:line references. +2. Cite project file:line references for every "what this project does" claim. +3. Cite nagent_review section references for every "what nagent does" claim. +4. Provide a verdict (Useful / Persona Performance / Anti-User / Mixed) with 1-paragraph justification. +5. Provide a "Synthesis notes for the Tier 1 writer" section naming the target synthesis report section(s) and key claims to surface. +6. Be 200-500 lines. +7. Be committed to `conductor/tracks/fable_review_20260617/research/cluster_N_*.md` as a separate file (1 file per cluster; 10 commits total). + +### 6.2 Synthesis Report Output + +The synthesis report (`report.md`) MUST: + +1. Have all 17 sections present and non-empty. +2. Total >3500 LOC. +3. Each section references its source cluster(s) by file:line. +4. Each section's "verdict orientation" (per the table in §4.2) is clear and consistent with the cluster's verdict. +5. Be committed in 17 atomic commits (1 per section), each with a 1-3 sentence commit message and a git note. + +### 6.3 Side Artifacts + +The 3 side artifacts MUST: + +1. `comparison_table.md` — flat table with ~100 rows (one per Fable sub-theme), columns: Fable sub-theme | Fable line | Project file:line | nagent section | Verdict. ~700 lines. +2. `decisions.md` — 15-20 concrete recommendations for the deferred nagent-rebuild, each with: rationale, source evidence (cluster file:line), suggested Manual Slop destination (AGENTS.md / code_styleguide / etc.), priority. ~500 lines. +3. `nagent_takeaways_fable_20260617.md` — a 17th takeaway to append to the nagent_takeaways_20260608.md model: "Persona-performance directives don't survive the Fable audit; only epistemic + memory + workflow rules have durable value." ~150 lines. + +### 6.4 The Fable Artifact Discipline + +- The artifact at `docs/artifacts/Fable System Prompt.txt` MUST NOT be committed. +- Every `git add` in this track MUST be inspected before commit to verify no Fable artifact bytes enter the index. +- The cluster sub-reports and the synthesis report reference the artifact by file path + line range only. +- If a cluster sub-agent or a synthesis section needs to quote more than 15 words from Fable, it MUST paraphrase instead (per Fable's own rule at `Fable System Prompt.txt:486-499`). +- The final track commit includes a verification step: `git log --all --full-history -- 'docs/artifacts/Fable*'` MUST return zero entries. + +### 6.5 Track Registration + +- `conductor/tracks.md` is updated to register the track in the appropriate section (research track; under "Active" while in progress, "Recently Completed" when shipped). +- `conductor/tracks/fable_review_20260617/state.toml` is initialized at the start of phase 1 and updated per task. + +--- + +## 7. Non-Functional Requirements + +### 7.1 Process Discipline + +- All commits are per-file atomic (per `conductor/workflow.md` §"Task Workflow" step 9). +- All commits have git notes attached (per `conductor/workflow.md` §"Task Workflow" step 9.2). +- All tasks are recorded in `state.toml` with commit SHAs. +- No day / hour / minute estimates in any track artifact. T-shirt size only (per `conductor/workflow.md` §"Tier 1 Track Initialization Rules" + the user's 2026-06-16 directive). +- The 1-space indentation rule applies to the `metadata.json` and `state.toml` only (Markdown is not Python; the rule doesn't apply to prose). + +### 7.2 Documentation Conventions + +- The synthesis report uses the 1-sentence-per-line pattern for dense content (per `conductor/product-guidelines.md` §"AI-Optimized Compact Style"). +- The synthesis report uses `#region: Name` / `#endregion: Name` for large sections (not applicable to markdown; this is a Python-only rule). +- All file:line references are stable (the report is the durable artifact; the Fable artifact may change). + +### 7.3 Audit Hooks (Optional) + +- This track is research-only; no `scripts/audit_*.py` scripts are added or modified. The deferred nagent-rebuild is the appropriate place for any new audit scripts. + +--- + +## 8. Architecture Reference + +- **`docs/artifacts/Fable System Prompt.txt`** (1585 lines, 120KB) — the subject of the review. **Local-only; never committed.** +- **`conductor/tracks/nagent_review_20260608/`** — the nagent corpus. All 11 files in scope. The 17 sections of the synthesis report reference this corpus for "what nagent does" claims. +- **`AGENTS.md`** (root) — the project's top-level agent-facing rules. Cluster 1, 4, 5, 6 reference this. +- **`conductor/product.md`** (27K) — the product vision. Cluster 1 references the "What This Is" framing. +- **`conductor/product-guidelines.md`** (20K) — the AI-Optimized Compact Style. Clusters 3, 4 reference the formatting heuristics. +- **`conductor/workflow.md`** (63K) — the operational workflow. Clusters 2, 5 reference the Skip-Marker Policy + Process Anti-Patterns. +- **`conductor/tech-stack.md`** (15K) — the tech stack. Cluster 9 references the file-system + tools layout. +- **`conductor/edit_workflow.md`** (9K) — the edit workflow. Cluster 9 references the 1-space indentation + small-edits rule. +- **`conductor/code_styleguides/`** (11 files, ~140K) — the convention catalog. Clusters 2, 3, 6, 7, 8 reference these (especially `error_handling.md`, `agent_memory_dimensions.md`, `rag_integration_discipline.md`, `cache_friendly_context.md`, `knowledge_artifacts.md`, `feature_flags.md`). +- **`.opencode/agents/*.md`** (6 files) — the 4 MMA tier agents + explore + general. Clusters 1, 4, 5 reference these for the "what every agent sees" baseline. +- **`.opencode/commands/*.md`** (9 files) — the 5 conductor commands + 4 mma commands. Cluster 5 references the `/conductor-new-track` command for the "this is a track" framing. +- **`docs/AGENTS.md`** — the agent-facing mirror. Cluster 1 references the "What This Is" framing. +- **`docs/guide_*.md`** (36 files, ~580K) — the 14 deep-dive guides. Clusters 1, 6, 7, 8, 9, 10 reference these selectively (especially `guide_tools.md`, `guide_mcp_client.md`, `guide_discussions.md`, `guide_rag.md`, `guide_knowledge_curation.md`). +- **Superpowers plugin content** (loaded via the `skill` tool) — the brainstorming, writing-plans, test-driven-development, etc. skills. The Tier 1's self-review uses the brainstorming skill; the Tier 2's plan-phase uses the writing-plans skill. Not directly cited in the synthesis report. +- **`docs/reports/PLANNING_DIGEST_*.md`** (if present) — the most recent planning digest. Used for "what's the recommended execution order" sanity check; not directly cited in the report. + +--- + +## 9. Phases (the implementation plan Tier 2 will execute) + +| Phase | Description | T-shirt | Sub-agents | Exit criteria | +|---|---|---|---|---| +| **1** | Initialize track directory + skeleton `report.md` (with section headers), `comparison_table.md` (with column headers), `decisions.md` (with template), `nagent_takeaways_fable_20260617.md` (empty). Initialize `state.toml`. Register track in `conductor/tracks.md` "Active" section. | S | 0 | All skeleton files exist; `state.toml` says `current_phase = 1`. | +| **2** | Dispatch 10 cluster sub-agents in parallel (Tier 3 workers, read-only). Each writes `research/cluster_N_*.md` (200-500 lines). Verify each sub-report: source citations present, ≤15-word quotes only, verdict present, synthesis notes present. | L | 10 parallel | All 10 cluster sub-reports committed; `state.toml` says `current_phase = 2`. | +| **3** | Tier 1 reads all cluster reports, writes the synthesis report sections one at a time (17 sections, 17 commits). Each section references its cluster(s) by file:line. | XL | 0 (Tier 1) | All 17 sections committed; `report.md` >3500 LOC; `state.toml` says `current_phase = 3`. | +| **4** | Tier 1 writes the 3 side artifacts (`comparison_table.md`, `decisions.md`, `nagent_takeaways_fable_20260617.md`). | M | 0 (Tier 1) | All 3 side artifacts committed; `state.toml` says `current_phase = 4`. | +| **5** | Self-review per the brainstorming skill (placeholder scan, internal consistency, scope check, ambiguity check) on the full report + side artifacts. Fix any issues inline. | S | 0 (Tier 1) | Self-review checklist complete; `state.toml` says `current_phase = 5`. | +| **6** | User review gate. Tier 1 presents the report to the user. User approves or iterates. | S | 0 (user) | User approves (or iterates until approved); `state.toml` says `current_phase = 6`. | +| **7** | Final commit + git notes + register track as completed in `conductor/tracks.md` "Recently Completed" section. Update `state.toml` to `current_phase = 7` and `status = "active"` until archived. | S | 0 (Tier 1) | Track registered; `state.toml` final; `state.toml` says `current_phase = 7`. | + +**Total scope:** 1 spec + 1 metadata.json + 1 state.toml + 10 cluster sub-reports (~3,500 LOC) + 1 main report (4,800 LOC) + 3 side artifacts (1,350 LOC) = **T-shirt size: XL** (similar to the nagent_review v2.3 rewrite at 4,969 lines). + +--- + +## 10. Verification Criteria + +The track is "done" when all of the following are true: + +- [ ] All 10 cluster sub-reports exist at `conductor/tracks/fable_review_20260617/research/cluster_N_*.md` and are 200-500 lines each. +- [ ] Every cluster sub-report cites specific Fable line numbers, project file:line refs, and nagent section refs. +- [ ] Every cluster sub-report has a verdict (Useful / Persona Performance / Anti-User / Mixed) with justification. +- [ ] Every cluster sub-report has a "Synthesis notes for the Tier 1 writer" section. +- [ ] The synthesis report `conductor/tracks/fable_review_20260617/report.md` has all 17 sections present and non-empty. +- [ ] The synthesis report is >3500 LOC. +- [ ] Every synthesis section references its source cluster(s) by file:line. +- [ ] The 3 side artifacts exist at `conductor/tracks/fable_review_20260617/{comparison_table.md, decisions.md, nagent_takeaways_fable_20260617.md}`. +- [ ] `comparison_table.md` has ~100 rows. +- [ ] `decisions.md` has 15-20 concrete recommendations. +- [ ] `nagent_takeaways_fable_20260617.md` is ~150 lines. +- [ ] The Fable artifact at `docs/artifacts/Fable System Prompt.txt` was **never committed**. Verification command: `git log --all --full-history -- 'docs/artifacts/Fable*'` returns zero entries. +- [ ] Self-review pass complete (placeholder scan, internal consistency, scope check, ambiguity check). +- [ ] User has reviewed and approved the final report. +- [ ] `conductor/tracks.md` is updated to register the track. +- [ ] All commits are per-file atomic with git notes. +- [ ] `state.toml` final state is `current_phase = 7` and the track is in "Recently Completed" (or the appropriate section per the convention). + +--- + +## 11. Risks & Mitigations + +| Risk | Impact | Likelihood | Mitigation | +|---|---|---|---| +| Fable prompt grows/evolves during the track | Low (the artifact is a snapshot) | Low | The artifact is a snapshot at 2026-06-17; we note the date. If the user has a newer version, the track re-dispatches the cluster agents. | +| 10 sub-agents in parallel = high token cost | Medium (cost) | Medium | Each sub-agent gets a 500-line output budget; the dispatch is `mma_exec.py --role tier3-worker` with explicit context files. Total cluster output: ~3,500 LOC across 10 files. | +| Tier 1's synthesis hits context pressure after 17 sections | High (track stalls mid-synthesis) | Medium | Per-section commits serve as a rollback point; if Tier 1 hits pressure mid-section, the section can be handed off to a fresh Tier 1 with the cluster reports + the previous sections as context. | +| The user disagrees with a verdict (e.g., "no, that pattern is actually useful") | Low (user-review gate catches it) | Low | The user-review gate at the end of phase 6 catches this; revisions are local. | +| Cluster sub-agents over-quote Fable (copyright) | Medium (report becomes a Fable reproduction) | Low | Each cluster's acceptance check enforces the ≤15-word quote discipline; Fable's own rule applied externally. | +| Fable artifact accidentally committed | High (user's hard rule violated) | Low | The Fable artifact is **never** in the same `git add` as anything else. Per-commit `git status` inspection. Final verification: `git log --all --full-history -- 'docs/artifacts/Fable*'` returns zero. | +| Tier 2 doesn't dispatch cluster sub-agents correctly (e.g., the dispatch is too narrow, missing context files) | Medium (cluster reports are weak) | Medium | The Tier 1's spec includes the read budget per sub-agent (§5). The Tier 2's plan must include explicit context-file lists per dispatch. | +| Tier 1's report deviates from the cluster verdicts (editorial drift) | Low (verdict consistency check catches it) | Low | The synthesis report's verdicts are anchored to the cluster reports' verdicts; if a synthesis section changes a verdict, it must explicitly note the override. | + +--- + +## 12. Out of Scope (Explicit) + +- **Modifying any agent-directive file in the project.** The recommendations go in `decisions.md` for the user's deferred nagent-rebuild (1-2 weeks out). +- **Building the recommended changes.** The deferred rebuild is its own track. +- **Comparing Fable to other commercial system prompts** (OpenAI, Google, xAI). Out of scope; Fable is the named subject. +- **Reading every line of every project file.** Cluster sub-agents read the relevant sections of the relevant files; full-file reads are unnecessary and would waste context. +- **Committing the Fable artifact.** The artifact stays at `docs/artifacts/Fable System Prompt.txt`; clusters quote line ranges but the file itself never enters git. +- **Adding new `src/` code, new tests, `pyproject.toml` dependencies, or `scripts/` files.** +- **Running automated tests.** The track is research-only; verification is the brainstorming-skill self-review plus user review. +- **Creating new `docs/Readme.md` or `docs/AGENTS.md` entries.** The report is at `conductor/tracks/fable_review_20260617/`; it is not in the docs index. +- **The deferred nagent-rebuild itself.** The recommendations in `decisions.md` are inputs to that future track; the rebuild is not this track. + +--- + +## 13. See Also + +### 13.1 Internal References + +- **`docs/artifacts/Fable System Prompt.txt`** — the subject of the review. Local-only. +- **`conductor/tracks/nagent_review_20260608/`** — the nagent corpus. All 11 files in scope. +- **`conductor/tracks/intent_dsl_survey_20260612/`** — the closest model for this track. The `research/cluster_*.md` pattern is borrowed from this track's `cluster_3_intent_mapping.md`, `cluster_4_meta_tooling_dsls.md`, `cluster_8_metadesk.md`, `cluster_9_verse.md`. +- **`conductor/tracks/nagent_review_20260608/spec.md`** — the v1 nagent review spec. The "what this track read" and "what this track produces" sections are the model for this spec. +- **`conductor/workflow.md` §"Tier 1 Track Initialization Rules"** — the rules this spec follows (no day estimates, scope-only, T-shirt size). +- **`conductor/product.md`** — the product vision. The synthesis report's "what this project does" claims are anchored to this. +- **`conductor/product-guidelines.md` §"AI-Optimized Compact Style"** — the formatting rules the synthesis report follows. +- **`conductor/code_styleguides/`** — the convention catalog. The synthesis report references these for "what this project does" claims. +- **`AGENTS.md`** (root) — the project's top-level agent-facing rules. The synthesis report's "what every agent sees" baseline. +- **`docs/Readme.md`** — the docs index. The 14 deep-dive guides under `docs/guide_*.md` are the per-source-file references the synthesis report cites. + +### 13.2 External References + +- **Anthropic's Claude Fable 5 / Mythos announcement:** `https://www.anthropic.com/news/claude-fable-5-mythos-5` (referenced by Fable at line 14; the user did not request we read the announcement directly). +- **Mike Acton's nagent:** `https://github.com/macton/nagent` (the source of the nagent_review corpus). +- **Mike Acton's data-oriented design talks:** `https://www.youtube.com/results?search_query=mike+acton+data+oriented` (foundational; nagent is a specific application). +- **Ryan Fleury, "The Easiest Way To Handle Errors Is To Not Have Them":** `https://www.dgtlgrove.com/p/the-easiest-way-to-handle-errors` (cited in `data_oriented_error_handling_20260606`; consistent with nagent's "data, not control flow" stance). +- **The project's "errors are data" convention:** `conductor/code_styleguides/error_handling.md` (the data-oriented contrast to Fable's persona-driven error-handling guidance). + +### 13.3 Track-internal References + +- **`conductor/tracks/fable_review_20260617/spec.md`** — this file. +- **`conductor/tracks/fable_review_20260617/metadata.json`** — the track metadata (id, scope, blocks, etc.). +- **`conductor/tracks/fable_review_20260617/state.toml`** — the track state (current_phase, task tracking). +- **`conductor/tracks/fable_review_20260617/research/cluster_*.md`** — the 10 cluster sub-reports (executed by Tier 3 sub-agents in phase 2). +- **`conductor/tracks/fable_review_20260617/report.md`** — the main synthesis report (executed by Tier 1 in phase 3). +- **`conductor/tracks/fable_review_20260617/comparison_table.md`** — the flat verdict table (executed by Tier 1 in phase 4). +- **`conductor/tracks/fable_review_20260617/decisions.md`** — the recommendations for the deferred nagent-rebuild (executed by Tier 1 in phase 4). +- **`conductor/tracks/fable_review_20260617/nagent_takeaways_fable_20260617.md`** — the Fable-specific addendum to nagent_takeaways_20260608.md (executed by Tier 1 in phase 4).