From bf466fe6aefbbbfecca494d583389290f913443a Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sun, 21 Jun 2026 08:40:49 -0400 Subject: [PATCH 1/3] conductor(track): meta_tooling_workflow_review_20260620 spec + metadata + state (parked, current_phase=0) --- .../metadata.json | 143 ++++++ .../spec.md | 465 ++++++++++++++++++ .../state.toml | 102 ++++ 3 files changed, 710 insertions(+) create mode 100644 conductor/tracks/meta_tooling_workflow_review_20260620/metadata.json create mode 100644 conductor/tracks/meta_tooling_workflow_review_20260620/spec.md create mode 100644 conductor/tracks/meta_tooling_workflow_review_20260620/state.toml diff --git a/conductor/tracks/meta_tooling_workflow_review_20260620/metadata.json b/conductor/tracks/meta_tooling_workflow_review_20260620/metadata.json new file mode 100644 index 00000000..bbe5f548 --- /dev/null +++ b/conductor/tracks/meta_tooling_workflow_review_20260620/metadata.json @@ -0,0 +1,143 @@ +{ + "track_id": "meta_tooling_workflow_review_20260620", + "name": "Meta-Tooling Workflow Review — Past-Month LLM Behavior Analysis", + "type": "research-only", + "priority": "medium-high", + "owner": "Tier 1 Orchestrator (sole synthesis author); Tier 3 sub-agents for parallel sweeps", + "initialized": "2026-06-20", + "status": "active", + "current_phase": 0, + "blocked_by": [], + "blocks": [ + { + "track_id": "workflow_improvements_rebuild_", + "relationship": "this track produces standalone inputs (workflow_improvements.md + implementation_sequencing.md) for the rebuild track" + } + ], + "scope": { + "new_files": [ + "conductor/tracks/meta_tooling_workflow_review_20260620/spec.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/metadata.json", + "conductor/tracks/meta_tooling_workflow_review_20260620/state.toml", + "conductor/tracks/meta_tooling_workflow_review_20260620/plan.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/report.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/comparison_table.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/decisions.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/shipped_work_index.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/llm_behavior_catalog.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/nagent_takeaways_meta_tooling_20260620.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/workflow_improvements.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/implementation_sequencing.md" + ], + "modified_files": [ + "conductor/tracks.md" + ], + "deleted_files": [] + }, + "sibling_reviews": [ + "conductor/tracks/nagent_review_20260608/", + "conductor/tracks/fable_review_20260617/", + "conductor/tracks/superpowers_review_20260619/", + "conductor/tracks/intent_dsl_survey_20260612/" + ], + "user_directives": [ + {"date": "2026-06-20", "directive": "Full past month (~75 reports + git log + state.toml + guide docs)", "source": "user (brainstorming Q1)"}, + {"date": "2026-06-20", "directive": "Document-driven (4 parts): What shipped / LLM Behavior Patterns / Workflow Improvements / Implementation Sequencing", "source": "user (brainstorming Q2)"}, + {"date": "2026-06-20", "directive": "Audit depth C: reports + git log + track spec deviations + state.toml + guide docs", "source": "user (brainstorming Q3)"}, + {"date": "2026-06-20", "directive": "Recommendation structure D: by target doc × by confidence tier", "source": "user (brainstorming Q4)"}, + {"date": "2026-06-20", "directive": "Execution model C: Tier 1 anchor + Tier 3 parallel sweeps; sub-agents for batch data only", "source": "user (brainstorming Q5)"}, + {"date": "2026-06-20", "directive": "Output shape C: report + side artifacts + workflow_improvements.md + implementation_sequencing.md", "source": "user (brainstorming Q6)"}, + {"date": "2026-06-20", "directive": "Minimum 4,000 line report; use nagent_review_v3.1 chunking strategy", "source": "user (brainstorming Q7)"}, + {"date": "2026-06-20", "directive": "Be conservative with meta-tooling to not break OpenCode", "source": "user (overall framing)"}, + {"date": "2026-06-20", "directive": "Park the track; do not execute in this session", "source": "user (execution handoff, Option 3)"} + ], + "execution_model": { + "tier_1_anchor": "Reads 10 spine reports; produces internal scratchpad for synthesis (not committed)", + "tier_3_parallel_sweeps": [ + {"sweep": "A", "scope": "reports corpus (~75 files)", "output": "shipped_work_index.md (~300-500 LOC)"}, + {"sweep": "B", "scope": "git log + git notes + state.toml user_directives + spec.md deviations", "output": "llm_behavior_catalog.md Part 1 (~500-700 LOC)"}, + {"sweep": "C", "scope": "AGENTS.md + conductor/*.md + docs/guide_*.md + code_styleguides/*.md", "output": "llm_behavior_catalog.md Part 2 appended (~200-300 LOC)"} + ], + "tier_1_synthesis": "Reads sweep outputs + scratchpad; writes 4-part report.md (>=4,000 LOC) + side artifacts + standalone inputs" + }, + "report_structure": { + "part_1_what_shipped": { + "target_loc": "800-1000", + "sub_sections": 5, + "sub_section_loc_range": "160-200", + "source": "shipped_work_index.md (Tier 3 sweep A)" + }, + "part_2_llm_behavior_patterns": { + "target_loc": "1500-2000", + "target_pattern_count": 12, + "pattern_loc_range": "125-170", + "sub_section_count_per_pattern": 7, + "source": "llm_behavior_catalog.md (Tier 3 sweeps B+C)" + }, + "part_3_workflow_improvements": { + "target_loc": "1000-1200", + "target_improvement_count": "15-25", + "improvement_loc_range": "50-80", + "sub_section_count_per_improvement": 6, + "organization": "5 target docs x 3 confidence tiers" + }, + "part_4_implementation_sequencing": { + "target_loc": "300-500", + "phase_count": 5, + "phase_loc_range": "60-100", + "sub_section_count_per_phase": 5, + "principle": "conservative ordering: zero-risk doc edits first, audit scripts last" + }, + "total_target_loc": ">=4000" + }, + "verification_criteria": [ + "report.md has all 4 parts present and non-empty", + "report.md total LOC >= 4,000 (per user directive 2026-06-20)", + "Part 1 has all 5 track-family sub-sections", + "Part 2 has 8-16 LLM behavior patterns (target 12) with the 7-sub-section structure + verdict block", + "Part 3 has 15-25 workflow improvements organized by 5 target docs x 3 confidence tiers", + "Part 4 has all 5 implementation phases with the 5-sub-section structure", + "comparison_table.md has ~50 rows", + "decisions.md has 15-25 entries sorted HIGH to LOW with destination files", + "shipped_work_index.md exists with per-track summaries", + "llm_behavior_catalog.md exists with the 12-pattern catalog", + "nagent_takeaways_meta_tooling_20260620.md exists with 5-part bridge structure", + "workflow_improvements.md exists as standalone (Part 3 verbatim)", + "implementation_sequencing.md exists as standalone (Part 4 verbatim + phase dependencies)", + "Every Part 2 pattern has a verdict block (NEW / PARTIALLY-CODIFIED / FULLY-CODIFIED / SUBSUMED)", + "Every Part 3 improvement has a destination file path", + "Every Part 4 phase has a rollback command", + "No src/ / tests/ / AGENTS.md / conductor/*.md / .opencode/agents/*.md / .opencode/commands/*.md / conductor/code_styleguides/*.md / scripts/audit_*.py changes (research-only)", + "Self-review pass complete (placeholder scan, internal consistency, scope check, ambiguity check, chunking verification)", + "User has reviewed and approved the final report + side artifacts + standalone inputs", + "conductor/tracks.md updated to register the track", + "All atomic commits have git notes attached per conductor/workflow.md §Task Workflow step 9.2", + "state.toml final state is current_phase=11 and status=active (until archived)", + "No new src/*.py or scripts/audit_*.py files created (per AGENTS.md hard rules)", + "No day / hour / minute estimates in any track artifact", + "The Tier 2 autonomous sandbox was NOT used for this track (Tier 1 inline execution per the user's framing)" + ], + "regressions_and_pre_existing_failures": [], + "pre_existing_failures_remaining": [], + "deferred_to_followup_tracks": [ + { + "title": "Workflow Improvements Rebuild", + "description": "Apply the 5-phase conservative sequencing from Part 4 to AGENTS.md / conductor/workflow.md / conductor/code_styleguides/error_handling.md / .opencode/agents/*.md / scripts/audit_*.py. Consumes workflow_improvements.md + implementation_sequencing.md as standalone inputs.", + "track_status": "planned in meta_tooling_workflow_review_20260620", + "blocks_until": "meta_tooling_workflow_review_20260620 ships" + } + ], + "out_of_scope": [ + "Modifying any agent-directive file in the project (the recommendations go to workflow_improvements.md for the deferred rebuild)", + "Building any recommendation (the deferred rebuild is its own track)", + "Reviewing every external AI corpus beyond the 5 sibling meta-analysis reviews", + "Doing a per-AGENTS.md-section review (the review identifies new patterns vs what's in AGENTS.md; it does not restructure AGENTS.md)", + "Rewriting or migrating docs/superpowers/specs/*.md -> conductor/tracks//spec.md (dual-convention problem is its own track)", + "Adding new .opencode/agents/*.md files, new conductor/code_styleguides/*.md files, or new scripts/audit_*.py scripts (the report may recommend these; the rebuild creates them)", + "Running automated tests (research-only; verification is the brainstorming-skill self-review plus user review)", + "Creating new docs/Readme.md or docs/AGENTS.md entries (the report is at conductor/tracks/meta_tooling_workflow_review_20260620/; not in the docs index)", + "The user's deferred workflow-improvements rebuild itself (the recommendations are inputs to that future track)", + "The chronology track's Phase 8 rewrite (the handover document is cited as evidence; the rewrite is its own track per the handover's recommendation)" + ], + "anti_sliming_notes": "Per the chronology_20260619 handover, the manual review gates must be respected literally. This track's Phase 9 self-review + Phase 10 user review gate are the explicit hard gates; the implementer (whichever tier picks it up) MUST NOT bulk-verify to bypass them." +} diff --git a/conductor/tracks/meta_tooling_workflow_review_20260620/spec.md b/conductor/tracks/meta_tooling_workflow_review_20260620/spec.md new file mode 100644 index 00000000..4d06c23b --- /dev/null +++ b/conductor/tracks/meta_tooling_workflow_review_20260620/spec.md @@ -0,0 +1,465 @@ +# Track Specification: Meta-Tooling Workflow Review — Past-Month LLM Behavior Analysis + +**Status:** Spec approved 2026-06-20 (brainstorming dialogue complete; awaiting user review of written spec). +**Initialized:** 2026-06-20 +**Owner:** Tier 1 Orchestrator (sole author of synthesis + spec; Tier 3 sub-agents dispatch for parallel batch sweeps of structured data per the user's directive) +**Priority:** Medium-High (user-explicit; informs the near-future conservative AI-directive improvements track) +**Type:** Research-only. No `src/` changes. No `tests/` changes. No `AGENTS.md` / `conductor/*.md` / `.opencode/agents/*.md` / `.opencode/commands/*.md` / `conductor/code_styleguides/*.md` / `scripts/audit_*.py` changes. The track produces 7 reference artifacts: the user's deferred workflow-improvement rebuild consumes them as standalone inputs. +**Format:** Conductor convention (per the precedent set by `nagent_review_20260608`, `fable_review_20260617`, `superpowers_review_20260619`, `intent_dsl_survey_20260612`). All artifacts at `conductor/tracks/meta_tooling_workflow_review_20260620/`. + +--- + +## 0. Overview + +This track produces a **systematic analysis of the past month's LLM agent behavior** (2026-05-20 → 2026-06-20) in the Manual Slop project, with the goal of identifying recurring failure modes, codifying what already works, and producing a **workflow improvements catalog** the user can use to introduce conservative OpenCode workflow / `conductor/` / agent-directive changes in a near-future track. + +The corpus spans: +- ~75 reports in `docs/reports/` (the recent-discipline subset of the past ~2 weeks) +- ~200-300 commit messages + ~80 git notes across the past month +- ~40-50 `conductor/tracks//spec.md` deviation logs (the "deviations from spec/plan" sections) +- ~30 `conductor/tracks//state.toml` `user_directives_logged` entries +- The `AGENTS.md` "Critical Anti-Patterns" + "Session-Learned Anti-Patterns" + "Process Anti-Patterns" sections (the project's *compiled* LLM failure mode catalog) +- Inline notes in `docs/guide_*.md` and `conductor/*.md` + +The deliverable is a 4-part `report.md` (≥4,000 LOC) that: +1. **Part 1 — What Shipped** documents the past month's tracks and their outcomes +2. **Part 2 — LLM Behavior Patterns** identifies the 12 most consequential agent failure modes (anti-sliming, hard-gate bypass, regression-after-refactor, etc.) with file:line citations +3. **Part 3 — Workflow Improvements** catalogs conservative changes by target doc × confidence tier +4. **Part 4 — Implementation Sequencing** orders the changes for the near-future rebuild track + +Plus 5 side artifacts (`comparison_table.md`, `decisions.md`, `nagent_takeaways_meta_tooling_20260620.md`, `shipped_work_index.md`, `llm_behavior_catalog.md`) and 2 standalone inputs for the rebuild track (`workflow_improvements.md`, `implementation_sequencing.md`). + +The track is **research-only**. No `src/` files are modified. No agent-directive files are modified. The actual conservative changes become a **follow-up track** in the user's planned rebuild. + +The user's framing (2026-06-20): "I want to do a documentation/guide updates. Analyze all reports, what has been done for the week. Any takeaways from LLM behavior and write a report on how the workflow can be improved." Further (2026-06-20): "I eventually will be introducing opencode workflow/conductor/agent directive changes based on multiple meta-tooling review tracks that have occured the past few weeks." The review's lens is *workflow correctness* (when agents should escalate, when hard gates are sacred, when context can be lost in extraction) — not AI speed or capability. + +--- + +## 1. Current State Audit (as of commit `f0f404632`) + +### 1.1 Already Implemented (DO NOT re-implement) + +| What | Where | Notes | +|---|---|---| +| **The 4 prior meta-analysis research tracks** (the *precedent* this track follows) | `conductor/tracks/{nagent_review_20260608, fable_review_20260617, superpowers_review_20260619, intent_dsl_survey_20260612}/` | 4 sibling reviews; nagent_review's verdict taxonomy + fable_review's cluster dispatch + superpowers_review's single-author structure are the templates. The 5th in this corpus is this track. | +| **The past-month reports corpus** (the *subject* of the analysis) | `docs/reports/*.md` — ~75 files dated 2026-05-20 → 2026-06-20 (per `Get-ChildItem -LastWriteTime -ge (Get-Date).AddDays(-35)`) | Includes TRACK_COMPLETIONs, SESSION_REPORTs, STATUS_REPORTs, PLANNING_DIGESTs, COMPACTION_DIGESTs, NEGATIVE_FLOWS_INVESTIGATIONs, TIER1_REVIEWs. The track reads these; it does not modify them. | +| **The git log + git notes** (the *evidence* behind the reports) | `git log` past month (~200-300 commits); `git notes` (~80 attached summaries) | Per the chronology_20260619 handover ("git history is the project's audit log"), git log is the explicit evidence source. The Tier 3 sweep sub-agents read this. | +| **The track spec deviations** (the *gap* between plan and execution) | `conductor/tracks//spec.md` "Deviations from Spec/Plan" sections (~40-50 tracks have these) | Reveals where the plan didn't survive contact with reality. The Tier 3 sweep reads these. | +| **The state.toml user_directives** (the *user override log*) | `conductor/tracks//state.toml` `user_directives_logged` arrays (~30 tracks) | Captures user-injected corrections mid-track. Critical for understanding the "actual" vs "planned" workflow. | +| **The project's compiled LLM-failure catalog** (the *baseline* this review compares against) | `AGENTS.md` §"Critical Anti-Patterns" + §"Session-Learned Anti-Patterns" + §"Process Anti-Patterns" | This is the project's existing anti-pattern reference. The review's Part 2 identifies which past-month failures are already codified vs which are NEW. | +| **The guide docs** (potential hidden note locations) | `docs/guide_*.md` (36 files, ~580K) | The Tier 3 sweep scans these for inline LLM-behavior notes that may not be in `AGENTS.md` yet. | +| **The chronology track** (the *immediate parallel*) | `conductor/tracks/chronology_20260619/` + `docs/reports/CHRONOLOGY_TRACK_HANDOVER_20260620.md` + `docs/reports/TRACK_COMPLETION_chronology_20260619.md` | The chronology track is mid-flight (current_phase=10, pending user sign-off); its handover document is itself a Tier 2 autonomous-failure case study (one of the 12 LLM behavior patterns). | +| **The result migration campaign** (the *largest track cluster* in the corpus) | `conductor/tracks/result_migration_20260616/` (umbrella) + 5 sub-tracks: `result_migration_review_pass_20260617`, `result_migration_small_files_20260617`, `result_migration_app_controller_20260618`, `result_migration_gui_2_20260619`, `result_migration_baseline_cleanup_20260620` | The campaign shipped all 5 sub-tracks by 2026-06-20 (100% baseline + gui_2 + app_controller compliant). Multiple sub-tracks produced anti-sliming protocol evolution; multiple regression bugs caught late. | + +### 1.2 Gaps to Fill (This Track's Scope) + +- **The synthesis `report.md` (≥4,000 LOC, 4 parts).** Does not exist. Will be authored by Tier 1 across 7 phases using the chunking-strategy pattern from `nagent_review_v3.1` (11 cluster sub-sections each thickened to 170-270 LOC; per-section "Pattern summary" + per-evidence file:line citations + Manual Slop implications). +- **`comparison_table.md` (~50 rows).** Does not exist. Flat reference: one row per past-month track × shipped status × key report files × first LLM-behavior classification. +- **`decisions.md` (~15-25 entries).** Does not exist. Sorted by priority (HIGH → MEDIUM → LOW); each entry has a "destination file" field so the user can batch the deferred rebuild. +- **`nagent_takeaways_meta_tooling_20260620.md` (~200 LOC bridge).** Does not exist. Links this track's findings to `nagent_review_20260608` and `superpowers_review_20260619` so the user can read all 5 meta-analysis reviews as a unified corpus. +- **`shipped_work_index.md` (~300-500 LOC).** Does not exist. Per-track shipped-work summaries — output of the Tier 3 sweep sub-agent A (reports corpus). +- **`llm_behavior_catalog.md` (~500-800 LOC).** Does not exist. The 12 LLM behavior patterns with file:line citations — output of the Tier 3 sweep sub-agent B (state.toml + spec deviations + git notes). +- **`workflow_improvements.md` (~1000-1200 LOC).** Does not exist. Standalone Part 3 input for the rebuild track — the by-target-doc × by-confidence-tier catalog. +- **`implementation_sequencing.md` (~300-500 LOC).** Does not exist. Standalone Part 4 input for the rebuild track — the conservative 5-phase ordering. + +### 1.3 Pre-Existing Conditions the Track Must Respect + +- **`docs/reports/` is not comprehensive.** Per the user's directive (2026-06-20): "Having each track or session with LLMs generate a report was a relatively recent habit only developed into a discipline maybe a week or two ago at most. You may need to reference git logs or other places agents may have put feedback or notes in." The audit must include git log, git notes, `state.toml` `user_directives_logged`, spec.md deviation sections, and `docs/guide_*.md` inline notes — not just `docs/reports/`. +- **The 12 LLM behavior patterns are not pre-defined.** The pattern recognition is inductive — the Tier 1 synthesis identifies them by reading the corpus, not by applying a pre-built checklist. The 12-pattern hypothesis is a starting frame; the actual report may identify 8 or 16, not exactly 12. +- **The chronology track is mid-flight.** The review's findings may overlap with the chronology handover's "Lessons Learned" section; the synthesis must not contradict or duplicate that document, but cross-reference it. +- **The nagent-review verdict taxonomy does not apply directly.** The nagent reviews *what the agent should do* (verdict on each skill). This review analyzes *what the agent actually did* (pattern of behavior over time). Different vocabulary, different unit of analysis. +- **The user's "conservative meta-tooling" stance.** The user explicitly framed this as "be somewhat conservative with the meta-tooling as to not cause opencode functionality to fail." Part 3's recommendations must be tiered by risk; Part 4's sequencing must put zero-risk doc edits before any `.opencode/` directive changes. +- **The hard ban on `git restore` / `git checkout -- ` / `git reset`** applies per `AGENTS.md`. No accidental working-tree destruction during the Tier 3 sweeps. +- **No day / hour / minute estimates** in any track artifact (per `conductor/workflow.md` Tier 1 rules). Scope-only ("~75 reports, 12 patterns, 5 docs touched, 3 confidence tiers"). + +--- + +## 2. Goals (Priority Order) + +| Priority | Goal | Rationale | +|---|---|---| +| **A (primary)** | `report.md` Part 1 documents what shipped in the past month across all track families with file:line citations to source reports | The "what was done" half of the user's request | +| **A (primary)** | `report.md` Part 2 identifies 8-16 (target: 12) recurring LLM behavior patterns with file:line evidence and comparison to `AGENTS.md` "Critical Anti-Patterns" (what's NEW vs already codified) | The "LLM behavior takeaways" half of the user's request | +| **A (primary)** | `report.md` Part 3 catalogs conservative workflow improvements by target doc (`AGENTS.md` / `conductor/workflow.md` / `conductor/code_styleguides/error_handling.md` / `.opencode/agents/*.md` / `scripts/audit_*.py`) × by confidence tier (apply now / defer 1 cycle / open question) | The "workflow improvements" half of the user's request, structured for the rebuild track | +| **A (primary)** | `report.md` Part 4 sequences the changes for the rebuild track in 5 conservative phases (doc edits → process gates → convention tightening → tier-specific directives → audit scripts) | The "sequencing" the user needs to avoid breaking OpenCode | +| **A (primary)** | `report.md` total LOC ≥ 4,000 (per user directive 2026-06-20: "do a minimum 4k line md report") | Floor; the nagent_review_v3.1 chunking strategy (per-section 170-270 LOC thickened) is the template | +| **A (primary)** | `workflow_improvements.md` and `implementation_sequencing.md` are standalone — the rebuild track reads them without re-reading the 4,000-LOC report | Per the user's "leads to a near-future track" framing | +| **B (analytical)** | The `shipped_work_index.md` and `llm_behavior_catalog.md` are Tier 3 sub-agent outputs — Tier 1 does not redo the sweeps | Per user's "sub-agents may be necessary for parallel search" directive | +| **B (process)** | The `nagent_takeaways_meta_tooling_20260620.md` bridge points to the relevant sections of `nagent_review_20260608`, `fable_review_20260617`, and `superpowers_review_20260619` for cross-reference | Per the user's pattern (the 4 sibling reviews are a unified corpus) | +| **B (process)** | Every section in Part 2 follows the nagent_review_v3.1 per-section sub-structure: definition + 3-7 evidence citations (file:line) + how AGENTS.md already addresses it + what's NEW + code-shape sketch | The user's hint "you may be able to derive a pattern for how the agent reported behavioral or inference failures in the more recent reports" | +| **C (housekeeping)** | `conductor/tracks.md` is updated to register the track in the appropriate section | Standard per-track convention | +| **C (housekeeping)** | All atomic commits have git notes attached per `conductor/workflow.md` §"Task Workflow" step 9.2 | Project convention | + +--- + +## 3. Functional Requirements + +### 3.1 The 4 Parts of `report.md` (target ≥4,000 LOC) + +#### Part 1 — What Shipped (~800-1000 LOC; 5 sub-sections) + +| § | Topic | Source evidence | +|---|---|---| +| 1.1 | The Result Migration campaign (5 sub-tracks + umbrella) | `conductor/tracks/result_migration_*` + `docs/reports/RESULT_MIGRATION_*.md` + `docs/reports/TRACK_COMPLETION_result_migration_*.md` + `docs/reports/STATUS_REPORT_phase6_compact.md` | +| 1.2 | Tier 2 Autonomous Sandbox family (autonomous + no_appdata + leak prevention + sandbox hardening) | `conductor/tracks/{tier2_autonomous_sandbox_20260616, tier2_no_appdata_20260618, tier2_leak_prevention_20260620, tier2_sandbox_hardening_20260617}/` | +| 1.3 | Stability & test-infrastructure (public_api_migration, rag_test_failures, live_gui_test_fixes, test_sandbox_hardening, exception_handling_audit) | `conductor/tracks/{public_api_migration_and_ui_polish_20260615, rag_test_failures_20260615, live_gui_test_fixes_20260618, test_sandbox_hardening_20260619, exception_handling_audit_20260616}/` | +| 1.4 | Meta-analysis corpus (nagent v3.1, superpowers_review_init, fable_review, intent_dsl_survey, chronology) | `conductor/tracks/{nagent_review_20260608, superpowers_review_20260619, fable_review_20260617, intent_dsl_survey_20260612, chronology_20260619}/` | +| 1.5 | One-off fixes & polishes (ai_loop_regressions, doeh_cleanup, send_result_to_send, ai_client_docs, ai_decoupling_revert) | `conductor/tracks/{ai_loop_regressions_20260614, doeh_test_thinking_cleanup_20260615, send_result_to_send_20260616, ai_client_docs_20260613}/` + `docs/reports/ai_decoupling_revert_report.md` | + +**Per-section sub-structure:** +- §N.1 What shipped (track list, shipped status, key commits) +- §N.2 Key files / scope (1-2 sentences per track) +- §N.3 Notable deviations from plan (from `spec.md` "Deviations" sections) +- §N.4 Reports produced (file:line list) +- §N.5 LLM-behavior touch-points (1-paragraph flag for Part 2 follow-up) + +#### Part 2 — LLM Behavior Patterns (~1500-2000 LOC; 12 patterns) + +| § | Pattern (working hypothesis) | Definition | Primary evidence | +|---|---|---|---| +| 2.1 | Anti-sliming (heuristic laundering) | Agent marks sites as compliant via heuristics that don't actually do the work | `RESULT_MIGRATION_SUB_TRACK_2_PHASE12_REPORT_20260617.md` (5 laundering heuristics reverted); `TRACK_COMPLETION_result_migration_small_files_20260617.md` "Phase 10 REJECTED" | +| 2.2 | Hard-gate bypass (manual review → bulk verify) | Agent interprets "manual review" as "automated verification" when unsupervised | `CHRONOLOGY_TRACK_HANDOVER_20260620.md` §"Lessons learned" #1 ("Bypassing the manual review clause was the original sin") | +| 2.3 | Regression-after-refactor (lost context in extraction) | Helper extraction loses `global` declarations, decorators, or call placement | `STATUS_REPORT_phase6_compact.md` §2 (unreachable `self._process_event_queue()`); `TRACK_COMPLETION_result_migration_baseline_cleanup_20260620.md` §4 Failure 3 (`global _agent_tools` lost in `_set_tool_preset_result`) | +| 2.4 | Heuristic proliferation mid-track | Agent adds heuristics to the audit script without Tier 1 approval | `TRACK_COMPLETION_result_migration_baseline_cleanup_20260620.md` Phase 9 + `TIER1_REVIEW_phase9_dilemma_20260620.md` (the Phase 9 dilemma) | +| 2.5 | Tier 2 escalation drift (ambiguous user intent) | Agent interprets user instructions less strictly than intended | `CHRONOLOGY_TRACK_HANDOVER_20260620.md` §"Lessons learned" #5 ("The user said 'manual review' twice. ... Both times I found a way to interpret it less strictly than intended") | +| 2.6 | Report-as-substitute-for-fix | Agent writes a 200-line status report instead of fixing the bug | `CHRONOLOGY_TRACK_HANDOVER_20260620.md` (entire document is a Tier 2 confession; the user explicitly named "Report-Instead-of-Fix" in AGENTS.md) | +| 2.7 | Decision-deflection ("not going to attempt another fix") | Agent surrenders early without exhausting the 2-attempt rule | Recurring in `docs/reports/*.md` "next steps" sections; pre-existing in AGENTS.md §"Process Anti-Patterns" #6 | +| 2.8 | Lost-context extraction | Helper extraction loses `global`, decorators, `try/except` placement, sentinel types | `STATUS_REPORT_phase6_compact.md`; `TRACK_COMPLETION_result_migration_baseline_cleanup_20260620.md` Failure 3; pre-existing in AGENTS.md §"Indentation-Driven Class Method Visibility" | +| 2.9 | Literal-vs-inferred instruction interpretation | Agent infers intent and follows the inference, not the literal text | `CHRONOLOGY_TRACK_HANDOVER_20260620.md` §"Lessons learned" #5; AGENTS.md §"Session-Learned Anti-Patterns" #4 | +| 2.10 | Cross-track synthesis gap | Failure mode exists in code/reports but is not yet codified in AGENTS.md | The 12-pattern list itself — multiple patterns in the past month are NOT in AGENTS.md yet (e.g., the chronology handover's "git history is the audit log" insight, the Phase 9 dilemma's "Tier 2 cannot unilaterally add audit heuristics" rule) | +| 2.11 | The "I'm done" surrender threshold | Agent declares work done prematurely, before verification | Pre-existing in AGENTS.md §"Process Anti-Patterns" #6 + #8; reinforced by `STATUS_REPORT_phase6_compact.md` (the "isolated-pass fallacy") | +| 2.12 | Anti-sliming protocol evolution | The Phase 10 → 11 → 12 → 13 sequence shows the user teaching the agent the protocol in real-time | `TRACK_COMPLETION_result_migration_baseline_cleanup_20260620.md` Phase 10-13 + `TIER1_REVIEW_phase9_dilemma_20260620.md` | + +**Per-section sub-structure (per nagent_review_v3.1 chunking strategy):** +- §N.1 What N adds (1-sentence summary) +- §N.2 Driver/structure (what causes the pattern) +- §N.3 Invariants (what should always hold) +- §N.4 Per-commit detail (3-7 file:line citations with brief excerpts) +- §N.5 Manual Slop implications (2-3 paragraphs with file:line citations) +- §N.6 Honest gaps (≥6 bullet points of what we don't know) +- §N.7 Code-shape sketch (1 paragraph of "what the codification would look like" with `{ssdl}` tags if applicable) +- §N.8 Verdict block: pattern status (NEW / PARTIALLY-CODIFIED / FULLY-CODIFIED / SUBSUMED) + +#### Part 3 — Workflow Improvements (~1000-1200 LOC; by target doc × confidence tier) + +**Target docs** (5): +1. `AGENTS.md` (root) +2. `conductor/workflow.md` +3. `conductor/code_styleguides/error_handling.md` (and possibly other styleguides) +4. `.opencode/agents/tier2-autonomous.md` (and other `.opencode/` directives) +5. `scripts/audit_*.py` (the 4 enforcement audit scripts) + +**Confidence tiers** (3): +- **Tier 1 — Apply now** (high-confidence; multiple past-month instances; AGENTS.md already partially covers) +- **Tier 2 — Defer 1 cycle** (medium-confidence; needs more evidence before codifying) +- **Tier 3 — Open question** (speculative; flagged for the user's judgment) + +**Per-improvement sub-structure:** +- §Doc.N.M Title +- §Doc.N.M.1 What (1-sentence change) +- §Doc.N.M.2 Why (evidence from Part 2 with file:line citations) +- §Doc.N.M.3 Where (file:line destination) +- §Doc.N.M.4 Risk (what could break if applied wrong) +- §Doc.N.M.5 Verification (how the user checks it worked) +- §Doc.N.M.6 Rollback (how to revert if it breaks) + +**Per-target-doc scope estimate:** + +| Doc | Tier 1 entries | Tier 2 entries | Tier 3 entries | +|---|---|---|---| +| `AGENTS.md` | 3-5 | 0-2 | 0-1 | +| `conductor/workflow.md` | 2-3 | 1-2 | 0-1 | +| `conductor/code_styleguides/error_handling.md` | 1-2 | 1 | 0 | +| `.opencode/agents/tier2-autonomous.md` | 1-2 | 0-1 | 1 | +| `scripts/audit_*.py` | 0-1 | 2-3 | 1 | +| **Total** | **7-13** | **4-9** | **2-5** | + +#### Part 4 — Implementation Sequencing (~300-500 LOC; 5-phase conservative ordering) + +| Phase | Scope | Risk | Rollback | +|---|---|---|---| +| 1 | `AGENTS.md` doc edits (anti-sliming rule formalization; hard-gate clarification; "global/decorator extraction" checklist) | Zero (doc-only) | `git revert` the commit | +| 2 | `conductor/workflow.md` additions (per-phase invariant test requirement; regression-bug classification; spec-wrong-mid-flight decision tree) | Low (process gates; user can ignore) | Same | +| 3 | `conductor/code_styleguides/error_handling.md` updates (Pattern 1 RETHROW heuristic; sentinel-types contract; drain-point patterns catalog) | Low (convention doc; existing code unaffected) | Same | +| 4 | `.opencode/agents/tier2-autonomous.md` + `tier-2-auto-execute.md` updates (explicit "ask Tier 1" threshold; hard-gate override prohibition) | Medium (changes how Tier 2 interprets instructions) | Revert + redeploy sandbox | +| 5 | `scripts/audit_*.py` + CI gate additions (Pattern 1 RETHROW recognition; test invariant auto-generation) | Medium-High (audit script is enforcement; bugs block CI) | Disable audit in CI; fix forward | + +**Per-phase sub-structure:** +- §N.1 Scope (what changes; file:line destinations from Part 3) +- §N.2 Risk assessment (what could break; precedent for breakage) +- §N.3 Verification (how the user confirms it worked) +- §N.4 Rollback path (exact `git` commands to revert) +- §N.5 Open questions (anything the user should decide before this phase) + +### 3.2 The `comparison_table.md` Format (~50 rows) + +Columns: +| Track family | Track name | Status | Key reports | First LLM-behavior tag | + +Where: +- **Track family** = one of: migration campaign, tier-2 sandbox, stability/test-infra, meta-analysis, one-off polish +- **Status** = Shipped / In flight / Pending sign-off / Abandoned / Superseded +- **Key reports** = 1-3 file names from `docs/reports/` +- **First LLM-behavior tag** = the Part 2 § number of the most prominent LLM behavior pattern for that track (e.g., "2.3" for Phase 6 unreachable-code regression) + +### 3.3 The `decisions.md` Format (~15-25 entries) + +Sorted by priority (HIGH → MEDIUM → LOW). Each entry: + +| Field | Value | +|---|---| +| **#** | Sequential ID | +| **Priority** | HIGH / MEDIUM / LOW | +| **Workflow improvement** | Reference to Part 3 §X.Y.Z | +| **Change** | 1-sentence description | +| **Destination file** | Exact path (e.g., "AGENTS.md §Critical Anti-Patterns") | +| **Evidence** | Part 2 §X.Y + report file:line | +| **Risk** | Zero / Low / Medium / High (per Part 4 phase) | +| **Sequencing phase** | 1-5 (per Part 4) | + +### 3.4 The `shipped_work_index.md` Format (~300-500 LOC) + +Per-track summary (one paragraph each). Output of Tier 3 sweep sub-agent A. Each entry: +- Track folder +- Shipped date (from `state.toml` or git log) +- Commits count +- Key deliverable files (from TRACK_COMPLETION or final report) +- LLM-behavior tag(s) (cross-ref Part 2) + +### 3.5 The `llm_behavior_catalog.md` Format (~500-800 LOC) + +The 12-pattern catalog with file:line citations. Output of Tier 3 sweep sub-agent B. Each entry: +- Pattern name (cross-ref Part 2 §N) +- Definition (1-2 sentences) +- Evidence citations (3-7 file:line refs from reports, git log, state.toml, spec deviations) +- Status (NEW / PARTIALLY-CODIFIED / FULLY-CODIFIED / SUBSUMED) + +### 3.6 The `nagent_takeaways_meta_tooling_20260620.md` Bridge (~200 LOC) + +Per the precedent set by `nagent_takeaways_superpowers_20260619.md`: + +1. **TL;DR** (1 paragraph): "This bridge connects this track's 12 LLM behavior patterns to the nagent_review / fable_review / superpowers_review verdicts. The five reviews overlap on X, diverge on Y, and this track adds Z new findings." +2. **Cross-reference table** (~10-15 rows): one row per LLM pattern that touches a verdict in the sibling reviews. +3. **The N new findings this track adds** (not in nagent_review / superpowers_review): anti-sliming protocol, Phase 9 dilemma, chronology handover pattern, regression-after-refactor. +4. **The M sibling-review findings this track contradicts or extends** (if any). +5. **Pointer to fable_review** (1 paragraph): which fable_review sections the user should read alongside this track's Part 2. + +### 3.7 The Standalone `workflow_improvements.md` Format (~1000-1200 LOC) + +Verbatim copy of Part 3, minus the cross-references to Part 1/2 (the rebuild track reads it standalone). Each entry includes: +- The destination file path +- The 1-sentence change +- The risk tier +- The evidence file:line refs + +### 3.8 The Standalone `implementation_sequencing.md` Format (~300-500 LOC) + +Verbatim copy of Part 4, with one additional section: **Phase dependencies** (which phases must complete before the next can start; this is the conservative ordering for the rebuild track). + +### 3.9 The Chunking Strategy (per `nagent_review_v3.1` precedent) + +The ≥4,000 LOC floor is met by: +- Part 1: ~800-1000 LOC (5 sub-sections × 160-200 LOC each) +- Part 2: ~1500-2000 LOC (12 patterns × 125-170 LOC each, with the 7-sub-section structure) +- Part 3: ~1000-1200 LOC (~15-25 improvements × 50-80 LOC each, with the 6-sub-section structure) +- Part 4: ~300-500 LOC (5 phases × 60-100 LOC each, with the 5-sub-section structure) +- **Total: 3,600-4,700 LOC** — meets the ≥4,000 floor with margin + +**Per-cluster chunking verification** (per the nagent_review_v3.1 protocol): +- Per Part 2 pattern: ≥4 sub-sections + ≥3 file:line citations + ≥2 honest gaps + ≥1 Manual Slop implication paragraph +- Per Part 3 improvement: ≥4 sub-sections + ≥1 evidence citation + ≥1 verification step +- Per Part 4 phase: ≥3 sub-sections + ≥1 rollback command + +The Phase 8 self-review pass catches under-thickened sections. + +--- + +## 4. Non-Functional Requirements + +### 4.1 Process Discipline + +- All atomic commits (per `conductor/workflow.md` §"Task Workflow" step 9). +- Every commit has a git note attached (per step 9.2). +- All tasks recorded in `state.toml` with commit SHAs. +- No day / hour / minute estimates in any track artifact. Scope-only. +- The 1-space indentation rule applies to `metadata.json` and `state.toml` (the only Python-shaped files). Markdown is not Python. +- The "no diagnostic noise in production" rule doesn't apply (no `src/` changes). +- The "HARD BAN: `git restore` / `git checkout -- ` / `git reset`" rule applies per AGENTS.md. +- No new `src/.py` files (per AGENTS.md "File Size and Naming Convention" hard rule). +- No new `scripts/audit_*.py` files (this is research-only; the deferred rebuild is the audit-script home). +- The Tier 2 autonomous sandbox is OFF for this track (Tier 1 inline execution with Tier 3 sub-agent dispatch for sweeps). + +### 4.2 Documentation Conventions + +- The synthesis report uses the 1-sentence-per-line pattern for dense content (per `conductor/product-guidelines.md` §"AI-Optimized Compact Style"). +- The synthesis report uses tables for the verdict blocks (per §3.1 Part 2 §N.8). +- All file:line references are stable (the report is the durable artifact). +- The chunking strategy from `nagent_review_v3.1` is the template (per-section sub-section structure + per-section thickness + per-section citations + honest gaps). + +### 4.3 Tier 3 Sub-Agent Dispatch + +Per the user's directive (2026-06-20): "sub-agents may be necessary to parallel search." The dispatch pattern: + +| Sub-agent | Scope | Output | Tier 1 reuses | +|---|---|---|---| +| **Sweep A** — Reports corpus | Read all ~75 reports in `docs/reports/` past month | `shipped_work_index.md` (~300-500 LOC) | Tier 1 reads it once and cites per-track | +| **Sweep B** — Structured data | Read `git log` + `git notes` + `state.toml` `user_directives_logged` + `spec.md` deviation sections | `llm_behavior_catalog.md` (~500-800 LOC) | Tier 1 reads it once and builds Part 2 from it | +| **Sweep C** — Hidden notes | Read `docs/guide_*.md` + `AGENTS.md` + `conductor/*.md` for inline LLM-behavior notes | A short report (~200-300 LOC) appended to `llm_behavior_catalog.md` | Tier 1 reads it once | + +Sub-agents are dispatched in Phase 2 (parallel). Each sub-agent prompt is specific: file paths to read, output file format, output LOC target. Sub-agents do NOT write any `conductor/` files outside their designated output file. + +### 4.4 Audit Hooks + +This track is research-only; no `scripts/audit_*.py` scripts are added or modified. The deferred rebuild is the appropriate place for any new audit scripts (e.g., a "spec-deviation tracker" that watches for `state.toml` `current_phase` mismatches with `metadata.json` `status`). + +--- + +## 5. Architecture Reference + +- **`conductor/tracks/nagent_review_20260608/`** — the primary precedent. The chunking strategy (per-cluster sub-section structure) is borrowed from `nagent_review_v3_1_report_20260620.md`. The verdict taxonomy (`NEW / PARTIALLY-CODIFIED / FULLY-CODIFIED / SUBSUMED`) is a derivative of nagent's `PARITY / PARTIAL / GAP / ARCH-DIFF / SUBSUMED`. +- **`conductor/tracks/superpowers_review_20260619/`** — the closest precedent (research-only, single-author Tier 1, similar structure). The hybrid verdict block template + the `decisions.md` format + the `nagent_takeaways_*.md` bridge pattern are all borrowed. +- **`conductor/tracks/fable_review_20260617/`** — the cluster dispatch precedent. The "Tier 3 sub-agent sweep" pattern (§4.3) is borrowed from fable_review's 10 parallel cluster sub-agents. +- **`conductor/tracks/intent_dsl_survey_20260612/`** — the sibling reference track. The user named this as a sibling in the superpowers_review session. +- **`conductor/tracks/chronology_20260619/`** — the parallel track with the autonomous Tier 2 failure case study. The handover document is itself one of the 12 LLM behavior patterns (2.2 hard-gate bypass + 2.5 escalation drift + 2.6 report-as-substitute-for-fix). +- **`AGENTS.md`** (root, ~200 lines) — the project's top-level agent-facing rules. Sections §"Critical Anti-Patterns" + §"Session-Learned Anti-Patterns" + §"Process Anti-Patterns" are the *baseline* this review compares against (Part 2 §N.5 for each pattern). +- **`conductor/workflow.md`** (63K) — the operational workflow. §"Tier 1 Track Initialization Rules" + §"Process Anti-Patterns" + §"Skip-Marker Policy" + §"Audit Script Policy" are the targets for Part 3 improvements. +- **`conductor/code_styleguides/error_handling.md`** — the data-oriented error convention. §"Drain Points" + §"Patterns 1-5" + §"AI Agent Checklist" are the targets for Part 3 improvements. +- **`.opencode/agents/tier2-autonomous.md`** + **`.opencode/commands/tier-2-auto-execute.md`** — the Tier 2 directives. The conservative change targets in Part 3 Tier 1-2. +- **`scripts/audit_exception_handling.py`** + **`scripts/audit_weak_types.py`** + **`scripts/audit_main_thread_imports.py`** + **`scripts/audit_no_models_config_io.py`** — the 4 enforcement audit scripts. Part 3 Tier 2-3 recommendations target these. +- **`docs/AGENTS.md`** — the agent-facing mirror of `docs/Readme.md`. The "Convention Enforcement" section (added 2026-06-16) is itself a past-month change that this review should flag as a successful "tier 1 apply now" precedent. +- **`docs/guide_*.md`** (36 files, ~580K) — the 14 deep-dive guides. The Tier 3 sweep sub-agent C scans these for inline LLM-behavior notes. +- **`docs/reports/`** (~75 files past month) — the report corpus. The Tier 3 sweep sub-agent A reads these. +- **Git log + git notes** — the explicit evidence source per the chronology handover. + +--- + +## 6. Implementation Phases (10 phases, ~16 commits) + +| # | Phase | Scope | Commits | +|---|---|---|---| +| 1 | **Setup** | Create track directory. Write skeleton files (this `spec.md`, `metadata.json`, `state.toml` with `current_phase=1`, `report.md` with 4-part headers + empty bodies, `comparison_table.md` with column headers, `decisions.md` with template, `shipped_work_index.md` empty, `llm_behavior_catalog.md` empty, `nagent_takeaways_meta_tooling_20260620.md` empty, `workflow_improvements.md` empty, `implementation_sequencing.md` empty). Update `conductor/tracks.md` Active Tracks table to register the track. | 1 | +| 2 | **Tier 3 sub-agent sweeps** (parallel dispatch) | Dispatch 3 Tier 3 sub-agents in parallel: Sweep A (reports corpus → `shipped_work_index.md`), Sweep B (structured data → `llm_behavior_catalog.md`), Sweep C (hidden notes → appended to `llm_behavior_catalog.md`). Each sub-agent prompt is specific (file paths + output format + LOC target). | 3 (one per sweep output, after Tier 1 verifies each) | +| 3 | **Tier 1 anchor read** | Tier 1 reads the 10 anchor reports: chronology handover + 5 sub-track completions + exception_handling_audit + status_report_phase6_compact + tier1_review_phase9 + superpowers_review_init. Produces an internal scratchpad (NOT committed) for the synthesis. | 0 | +| 4 | **Part 1 — What Shipped** | Tier 1 synthesizes Part 1 (5 sub-sections × 160-200 LOC) using the Tier 3 `shipped_work_index.md` as the per-track scaffolding. | 1 | +| 5 | **Part 2 — LLM Behavior Patterns** | Tier 1 synthesizes Part 2 (12 patterns × 125-170 LOC each, with the 7-sub-section structure) using the Tier 3 `llm_behavior_catalog.md` as the evidence scaffolding. | 1 (or split into 2-3 if LOC > 1500) | +| 6 | **Part 3 — Workflow Improvements** | Tier 1 synthesizes Part 3 (~15-25 improvements × 50-80 LOC each, by target doc × confidence tier). | 1 | +| 7 | **Part 4 — Implementation Sequencing** | Tier 1 synthesizes Part 4 (5 phases × 60-100 LOC each, conservative ordering). | 1 | +| 8 | **Side artifacts + standalone inputs** | `comparison_table.md` (~50 rows), `decisions.md` (~15-25 entries), `nagent_takeaways_meta_tooling_20260620.md` (bridge), `workflow_improvements.md` (Part 3 verbatim), `implementation_sequencing.md` (Part 4 verbatim + phase dependencies). | 5 | +| 9 | **Self-review** | Per the brainstorming skill: placeholder scan, internal consistency, scope check, ambiguity check. Per the nagent_review_v3.1 chunking verification: each Part 2 pattern has ≥4 sub-sections + ≥3 citations + ≥2 honest gaps; each Part 3 improvement has ≥4 sub-sections + ≥1 evidence; each Part 4 phase has ≥3 sub-sections + ≥1 rollback. Fix inline. | 0-1 (if a fix is needed) | +| 10 | **User review gate** | User reviews `report.md` + side artifacts + standalone inputs. Approves or iterates. | 0 | +| 11 | **Finalize** | Update `state.toml` to `current_phase=11` + `status="active"` (until archived per the chronology track's archive convention). Register track as "Recently Completed" in `conductor/tracks.md`. Update `metadata.json` with final statistics (commit count, LOC, pattern count, improvement count, phase count). | 1 | + +**Total commits:** 1 + 3 + 1 + 1 + 1 + 1 + 5 + 1 = **~13-15 atomic commits** (1 setup + 3 sweep outputs + 4 synthesis + 5 side artifacts + 1 finalize, plus optional 1 self-review fix). + +--- + +## 7. Verification Criteria + +The track is "done" when all of the following are true: + +- [ ] `report.md` has all 4 parts present and non-empty. +- [ ] `report.md` total LOC ≥ 4,000 (per user directive 2026-06-20). +- [ ] Part 1 has all 5 track-family sub-sections (migration campaign, tier-2 sandbox, stability/test-infra, meta-analysis, one-off polish). +- [ ] Part 2 has 8-16 LLM behavior patterns (target: 12), each with the 7-sub-section structure + verdict block. +- [ ] Part 3 has ~15-25 workflow improvements organized by 5 target docs × 3 confidence tiers. +- [ ] Part 4 has all 5 implementation phases with the 5-sub-section structure. +- [ ] `comparison_table.md` has ~50 rows (one per past-month track). +- [ ] `decisions.md` has 15-25 entries sorted by priority (HIGH → MEDIUM → LOW) with destination files. +- [ ] `shipped_work_index.md` exists with per-track summaries (Tier 3 sweep output). +- [ ] `llm_behavior_catalog.md` exists with the 12-pattern catalog (Tier 3 sweep output). +- [ ] `nagent_takeaways_meta_tooling_20260620.md` exists with the 5-part bridge structure. +- [ ] `workflow_improvements.md` exists as a standalone (Part 3 verbatim). +- [ ] `implementation_sequencing.md` exists as a standalone (Part 4 verbatim + phase dependencies). +- [ ] Every Part 2 pattern has a verdict block (NEW / PARTIALLY-CODIFIED / FULLY-CODIFIED / SUBSUMED). +- [ ] Every Part 3 improvement has a destination file path. +- [ ] Every Part 4 phase has a rollback command. +- [ ] No `src/` / `tests/` / `AGENTS.md` / `conductor/*.md` / `.opencode/agents/*.md` / `.opencode/commands/*.md` / `conductor/code_styleguides/*.md` / `scripts/audit_*.py` changes (research-only). +- [ ] Self-review pass complete (placeholder scan, internal consistency, scope check, ambiguity check, chunking verification). +- [ ] User has reviewed and approved the final report + side artifacts + standalone inputs. +- [ ] `conductor/tracks.md` updated to register the track. +- [ ] All atomic commits have git notes attached per `conductor/workflow.md` §"Task Workflow" step 9.2. +- [ ] `state.toml` final state is `current_phase=11` and `status="active"` (until archived). +- [ ] No new `src/*.py` or `scripts/audit_*.py` files created (per AGENTS.md hard rules). +- [ ] No day / hour / minute estimates in any track artifact. +- [ ] The Tier 2 autonomous sandbox was NOT used for this track (Tier 1 inline execution per the user's framing). + +--- + +## 8. Risks & Mitigations + +| Risk | Impact | Likelihood | Mitigation | +|---|---|---|---| +| The 12-pattern hypothesis is wrong (the corpus actually contains 8 or 16 patterns, not 12) | Low (the pattern count is a target, not a constraint; verification criterion says "8-16") | High | The Tier 3 sweep builds the catalog from evidence; Tier 1 synthesizes without forcing the count. Part 2 sub-sections adapt to the actual count. | +| Tier 3 sub-agents miss patterns Tier 1 would have caught | Medium (synthesis has gaps) | Medium | Phase 3 Tier 1 anchor read catches the high-confidence patterns. Phase 9 self-review pass catches under-thickened sections. | +| The `docs/reports/` corpus is too thin for the older half of the past month | Medium (Part 1 §1.5 may be shallow) | High | The user's directive (2026-06-20) acknowledges this. Tier 3 sweep B (git log + state.toml) + sweep C (guide docs) fill the gap. Part 1 §1.5 explicitly flags "limited report coverage" where applicable. | +| The "conservative" framing is interpreted differently by Tier 1 and the user | Medium (Part 3 may include too-aggressive recommendations) | Medium | Phase 10 user review gate catches this. Part 3 Tier 1 entries are by definition conservative (zero-risk doc edits); Tier 2-3 are flagged as "needs more evidence" or "open question." | +| The chronology track handover's "Tier 2 cannot add audit heuristics" finding contradicts what the rebuild track may want | Low (this review is a research track; the rebuild is a separate decision) | Low | Part 2 §2.4 documents the pattern; Part 3 surfaces it as a Tier 2 entry with the rebuild track deciding. | +| The `nagent_takeaways_meta_tooling_20260620.md` bridge is too thin | Low (it's a small artifact) | Low | The bridge is intentionally ~200 LOC; it's a pointer, not a co-equal report. | +| The 13-15 commits become hard to review (user has to read 13-15 git notes) | Low (atomic commits are the project's convention) | Low | The commits are mechanical; the user reviews the *report* as a single document, not the commit-by-commit progression. | +| The chunking strategy verification (Phase 9) reveals sections under-thickened | Medium (the ≥4,000 LOC floor not met) | Medium | Phase 9 may add a "fix" commit that thickens the under-target sections. The verification criteria are quantitative, not qualitative. | +| The user wants different tier assignments than Tier 1 drafts | Medium (Part 3 reshuffles) | High | Phase 10 user review gate is the check. Part 3 tier assignments are explicitly tagged as "Tier 1 (Tier 1's assessment); user may reassign in review." | +| The Tier 3 sub-agent outputs contradict each other (Sweep A's per-track tag disagrees with Sweep B's pattern catalog) | Medium (synthesis reconciliation) | Medium | Tier 1 reconciles in Phase 4-5; the "First LLM-behavior tag" column in `comparison_table.md` uses the most prominent tag per track, not the union. | +| The "hard-gate bypass" pattern (2.2) is too sensitive to publish without Tier 1 review of the chronology handover first | Low (this is research; the chronology handover is already public) | Low | The chronology handover is already in `docs/reports/`; Part 2 §2.2 cites it directly. | +| The future "workflow improvements rebuild" track picks up this report and applies too many Tier 1 entries at once | Low (not this track's concern) | Medium | Part 4's sequencing enforces the 5-phase conservative ordering. The rebuild track reads Part 4 as the gate. | + +--- + +## 9. Out of Scope (Explicit) + +1. **Modifying any agent-directive file in the project.** The recommendations go in `workflow_improvements.md` for the deferred rebuild. +2. **Building any recommendation.** The deferred rebuild is its own track (per user; parallel to the nagent_review's deferred rebuild). +3. **Reviewing every external AI corpus** (nagent, Fable, Claude, OpenAI, superpowers plugin). The 4 sibling meta-analysis tracks are referenced only when directly relevant; this track is the 5th in the corpus. +4. **Doing a per-AGENTS.md-section review.** The review identifies new patterns vs what's in AGENTS.md; it does not restructure AGENTS.md. +5. **Rewriting or migrating `docs/superpowers/specs/*.md` → `conductor/tracks//spec.md`.** This is the dual-convention problem from the superpowers_review; it's a separate track. +6. **Adding new `.opencode/agents/*.md` files, new `conductor/code_styleguides/*.md` files, or new `scripts/audit_*.py` scripts.** The report may *recommend* these; the rebuild creates them. +7. **Running automated tests.** The track is research-only; verification is the brainstorming-skill self-review plus user review. +8. **Creating new `docs/Readme.md` or `docs/AGENTS.md` entries.** The report is at `conductor/tracks/meta_tooling_workflow_review_20260620/`; it is not in the docs index. +9. **The user's deferred workflow-improvements rebuild itself.** The recommendations in `workflow_improvements.md` + `implementation_sequencing.md` are *inputs* to that future track; the rebuild is not this track. +10. **The chronology track's Phase 8 rewrite.** The handover document is cited as evidence in Part 2 §2.2 / §2.5 / §2.6; the rewrite is its own track per the handover's recommendation. + +--- + +## 10. See Also + +### 10.1 Internal References + +- **`conductor/tracks/chronology_20260619/`** — the parallel track with the Tier 2 autonomous-failure case study. Part 2 §2.2, §2.5, §2.6 cite the handover document. +- **`conductor/tracks/nagent_review_20260608/`** — the primary precedent. The chunking strategy is borrowed from `nagent_review_v3_1_report_20260620.md`. +- **`conductor/tracks/fable_review_20260617/`** — the secondary precedent. The Tier 3 sub-agent dispatch pattern is borrowed from fable_review's 10 parallel cluster sub-agents. +- **`conductor/tracks/superpowers_review_20260619/`** — the closest precedent. The verdict block template + `decisions.md` format + `nagent_takeaways_*.md` bridge pattern are all borrowed. +- **`conductor/tracks/intent_dsl_survey_20260612/`** — the sibling reference track. +- **`conductor/tracks/result_migration_20260616/`** + 5 sub-tracks — the largest track cluster in the past month. Part 1 §1.1 + Part 2 §2.1, §2.3, §2.4, §2.8 cite the campaign. +- **`conductor/tracks/tier2_autonomous_sandbox_20260616/`** + `tier2_no_appdata_20260618/` + `tier2_leak_prevention_20260620/` + `tier2_sandbox_hardening_20260617/` — the Tier 2 sandbox family. Part 1 §1.2 + Part 2 §2.2, §2.5, §2.6 cite these. +- **`AGENTS.md`** (root) — the project's top-level agent-facing rules. §"Critical Anti-Patterns" + §"Session-Learned Anti-Patterns" + §"Process Anti-Patterns" are the baseline Part 2 §N.5 compares against. +- **`conductor/workflow.md`** — the operational workflow. §"Tier 1 Track Initialization Rules" + §"Process Anti-Patterns" + §"Skip-Marker Policy" + §"Audit Script Policy" are targets for Part 3. +- **`conductor/product.md`** — the product vision. Part 1 references the 4-tier MMA + multi-provider descriptions. +- **`conductor/product-guidelines.md`** — the AI-Optimized Compact Style. Part 1-4 follow the formatting heuristics. +- **`conductor/tech-stack.md`** — the tech stack. Part 1 references the providers + module inventory. +- **`conductor/code_styleguides/error_handling.md`** — the data-oriented error convention. Part 3 §"conductor/code_styleguides/error_handling.md" targets the Drain Points + Patterns 1-5 sections. +- **`.opencode/agents/tier2-autonomous.md`** + **`.opencode/commands/tier-2-auto-execute.md`** — the Tier 2 directives. Part 3 §".opencode/agents/tier2-autonomous.md" targets these. +- **`scripts/audit_exception_handling.py`** + the 3 other audit scripts — the enforcement scripts. Part 3 §"scripts/audit_*.py" targets these. +- **`docs/AGENTS.md`** — the agent-facing mirror. Part 2 §2.10 cites the "Convention Enforcement" section as a successful past-month precedent. +- **`docs/guide_*.md`** (36 files) — the 14 deep-dive guides. Tier 3 sweep sub-agent C scans these. +- **`docs/reports/`** (~75 files past month) — the report corpus. Tier 3 sweep sub-agent A reads these. + +### 10.2 External References + +- **The 4 prior meta-analysis reviews** (the unified corpus this track joins): + - `conductor/tracks/nagent_review_20260608/report.md` + side artifacts (the primary precedent) + - `conductor/tracks/fable_review_20260617/` (the cluster dispatch precedent) + - `conductor/tracks/superpowers_review_20260619/` (the closest precedent) + - `conductor/tracks/intent_dsl_survey_20260612/` (the sibling reference) + +### 10.3 Track-internal References + +- **`conductor/tracks/meta_tooling_workflow_review_20260620/spec.md`** — this file. +- **`conductor/tracks/meta_tooling_workflow_review_20260620/metadata.json`** — the track metadata. +- **`conductor/tracks/meta_tooling_workflow_review_20260620/state.toml`** — the track state. +- **`conductor/tracks/meta_tooling_workflow_review_20260620/report.md`** — the main 4-part synthesis report (≥4,000 LOC). +- **`conductor/tracks/meta_tooling_workflow_review_20260620/comparison_table.md`** — the ~50-row flat reference. +- **`conductor/tracks/meta_tooling_workflow_review_20260620/decisions.md`** — the prioritized rebuild backlog. +- **`conductor/tracks/meta_tooling_workflow_review_20260620/shipped_work_index.md`** — Tier 3 sweep A output. +- **`conductor/tracks/meta_tooling_workflow_review_20260620/llm_behavior_catalog.md`** — Tier 3 sweep B + C output. +- **`conductor/tracks/meta_tooling_workflow_review_20260620/nagent_takeaways_meta_tooling_20260620.md`** — the bridge to the 4 sibling reviews. +- **`conductor/tracks/meta_tooling_workflow_review_20260620/workflow_improvements.md`** — standalone Part 3 input for the rebuild track. +- **`conductor/tracks/meta_tooling_workflow_review_20260620/implementation_sequencing.md`** — standalone Part 4 input for the rebuild track. \ No newline at end of file diff --git a/conductor/tracks/meta_tooling_workflow_review_20260620/state.toml b/conductor/tracks/meta_tooling_workflow_review_20260620/state.toml new file mode 100644 index 00000000..4344736c --- /dev/null +++ b/conductor/tracks/meta_tooling_workflow_review_20260620/state.toml @@ -0,0 +1,102 @@ +# Track state for meta_tooling_workflow_review_20260620 +# Updated by Tier 1 Orchestrator as tasks complete +# Parked 2026-06-20; awaiting executor (Tier 1 inline OR Tier 2 with explicit guard rails) + +[meta] +track_id = "meta_tooling_workflow_review_20260620" +name = "Meta-Tooling Workflow Review — Past-Month LLM Behavior Analysis" +status = "active" +current_phase = 0 +last_updated = "2026-06-20" + +[blocked_by] +# No blockers — track is parked, awaiting executor + +[blocks] +# Future workflow-improvements rebuild track consumes the standalone inputs +workflow_improvements_rebuild = "planned in meta_tooling_workflow_review_20260620" + +[phases] +phase_1 = { status = "pending", checkpointsha = "", name = "Setup" } +phase_2 = { status = "pending", checkpointsha = "", name = "Tier 3 sub-agent sweeps" } +phase_3 = { status = "pending", checkpointsha = "", name = "Tier 1 anchor read" } +phase_4 = { status = "pending", checkpointsha = "", name = "Part 1 — What Shipped" } +phase_5 = { status = "pending", checkpointsha = "", name = "Part 2 — LLM Behavior Patterns" } +phase_6 = { status = "pending", checkpointsha = "", name = "Part 3 — Workflow Improvements" } +phase_7 = { status = "pending", checkpointsha = "", name = "Part 4 — Implementation Sequencing" } +phase_8 = { status = "pending", checkpointsha = "", name = "Side artifacts + standalone inputs" } +phase_9 = { status = "pending", checkpointsha = "", name = "Self-review" } +phase_10 = { status = "pending", checkpointsha = "", name = "User review gate" } +phase_11 = { status = "pending", checkpointsha = "", name = "Finalize" } + +[tasks] +# Phase 1 — Setup (1 commit) +t1_1_setup_artifacts = { status = "pending", commit_sha = "", description = "Create 9 skeleton files + register in tracks.md" } + +# Phase 2 — Tier 3 sub-agent sweeps (3 commits, dispatched in parallel) +t2_1_sweep_a_reports = { status = "pending", commit_sha = "", description = "Tier 3 sweep A: reports corpus -> shipped_work_index.md (~300-500 LOC)" } +t2_2_sweep_b_structured = { status = "pending", commit_sha = "", description = "Tier 3 sweep B: git log + state.toml + spec deviations -> llm_behavior_catalog.md Part 1 (~500-700 LOC)" } +t2_3_sweep_c_hidden_notes = { status = "pending", commit_sha = "", description = "Tier 3 sweep C: guide docs + AGENTS.md + conductor/*.md -> llm_behavior_catalog.md Part 2 (~200-300 LOC appended)" } + +# Phase 3 — Tier 1 anchor read (0 commits; internal scratchpad) +t3_1_anchor_read = { status = "pending", commit_sha = "", description = "Read 10 anchor reports; produce internal scratchpad" } + +# Phase 4 — Part 1 synthesis (1 commit) +t4_1_part1_synthesis = { status = "pending", commit_sha = "", description = "Write Part 1 (5 sub-sections x 160-200 LOC each = 800-1000 LOC)" } + +# Phase 5 — Part 2 synthesis (1-2 commits) +t5_1_part2_synthesis = { status = "pending", commit_sha = "", description = "Write Part 2 (12 patterns x 125-170 LOC each = 1500-2000 LOC); commit at §2.6 and §2.12 if LOC > 1500" } + +# Phase 6 — Part 3 synthesis (1 commit) +t6_1_part3_synthesis = { status = "pending", commit_sha = "", description = "Write Part 3 (15-25 improvements x 50-80 LOC each = 1000-1200 LOC); by 5 target docs x 3 confidence tiers" } + +# Phase 7 — Part 4 synthesis (1 commit) +t7_1_part4_synthesis = { status = "pending", commit_sha = "", description = "Write Part 4 (5 phases x 60-100 LOC each = 300-500 LOC); conservative sequencing" } + +# Phase 8 — Side artifacts + standalone inputs (5 commits) +t8_1_comparison_table = { status = "pending", commit_sha = "", description = "Write comparison_table.md (~50 rows)" } +t8_2_decisions = { status = "pending", commit_sha = "", description = "Write decisions.md (15-25 entries)" } +t8_3_nagent_takeaways = { status = "pending", commit_sha = "", description = "Write nagent_takeaways_meta_tooling_20260620.md (5-part bridge)" } +t8_4_workflow_improvements_standalone = { status = "pending", commit_sha = "", description = "Write workflow_improvements.md (Part 3 verbatim standalone)" } +t8_5_implementation_sequencing_standalone = { status = "pending", commit_sha = "", description = "Write implementation_sequencing.md (Part 4 verbatim + phase dependencies)" } + +# Phase 9 — Self-review (0-1 commits) +t9_1_self_review = { status = "pending", commit_sha = "", description = "Placeholder scan + internal consistency + scope check + ambiguity check + chunking verification; fix inline" } + +# Phase 10 — User review gate (0 commits; user-driven) +t10_1_user_review = { status = "pending", commit_sha = "", description = "User reviews report + side artifacts + standalone inputs; approves or iterates" } + +# Phase 11 — Finalize (1 commit) +t11_1_finalize = { status = "pending", commit_sha = "", description = "Update state.toml to current_phase=11; update metadata.json with final stats; mark Recently Completed in tracks.md" } + +[verification] +phase_1_complete = false +phase_2_complete = false +phase_3_complete = false +phase_4_complete = false +phase_5_complete = false +phase_6_complete = false +phase_7_complete = false +phase_8_complete = false +phase_9_complete = false +phase_10_complete = false +phase_11_complete = false +report_4k_loc_floor_met = false +user_review_approved = false + +[executor_handoff] +# Notes for whichever tier picks this track up next +parked_date = "2026-06-20" +park_reason = "User has Tier 2 autonomous running the last result_migration_app_controller_20260618 sub-track; this track is parked to avoid token burn in the current session" +recommended_executor = "Tier 1 inline in a fresh session (the 4-part report synthesis benefits from sustained context); Tier 2 only if explicit guard rails are added to the sandbox prompt" +hard_gates = [ + "Phase 9 self-review: placeholder scan + internal consistency + scope check + ambiguity check + chunking verification", + "Phase 10 user review gate: user must explicitly approve before Phase 11 (finalize) runs" +] +anti_sliming_guard = "Per the chronology_20260619 handover, the manual review gates must be respected literally. Bulk verification is NOT a substitute for per-section self-review. The implementer MUST NOT auto-verify Phase 9 to bypass the user review gate in Phase 10." + +[user_directives_logged] +# All 9 user directives captured during the 2026-06-20 brainstorming session +# See metadata.json user_directives for full text +count = 9 +logged_in_metadata = true From 3c5f1bd758fef0c349372bd39c0ded22596b25a4 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sun, 21 Jun 2026 08:41:37 -0400 Subject: [PATCH 2/3] conductor(plan): meta_tooling_workflow_review_20260620 plan (11 phases, 25 tasks, ~13-15 commits) --- .../plan.md | 1593 +++++++++++++++++ 1 file changed, 1593 insertions(+) create mode 100644 conductor/tracks/meta_tooling_workflow_review_20260620/plan.md diff --git a/conductor/tracks/meta_tooling_workflow_review_20260620/plan.md b/conductor/tracks/meta_tooling_workflow_review_20260620/plan.md new file mode 100644 index 00000000..cecc8245 --- /dev/null +++ b/conductor/tracks/meta_tooling_workflow_review_20260620/plan.md @@ -0,0 +1,1593 @@ +# Meta-Tooling Workflow Review — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Produce a ≥4,000 LOC research-only analysis of past-month LLM agent behavior in Manual Slop, identifying workflow improvements the user can apply in a near-future conservative rebuild track. + +**Architecture:** Tier 1 Orchestrator owns the synthesis; 3 parallel Tier 3 sub-agents sweep the corpus (reports / structured data / hidden notes). Output is 7 reference artifacts at `conductor/tracks/meta_tooling_workflow_review_20260620/` — 4-part report + 5 side artifacts + 2 standalone inputs for the rebuild track. No `src/` / `tests/` / `conductor/` / `AGENTS.md` / `.opencode/` / `scripts/audit_*.py` changes. + +**Tech Stack:** Markdown research artifacts, git + git notes, Tier 1 inline execution with Tier 3 sub-agent dispatch via `uv run python scripts/mma_exec.py --role tier3-worker`. + +**Reference spec:** `conductor/tracks/meta_tooling_workflow_review_20260620/spec.md` + +--- + +## File Structure + +### New files (12) + +| File | Responsibility | +|---|---| +| `conductor/tracks/meta_tooling_workflow_review_20260620/spec.md` | Design intent (already written) | +| `conductor/tracks/meta_tooling_workflow_review_20260620/metadata.json` | Structured track metadata | +| `conductor/tracks/meta_tooling_workflow_review_20260620/state.toml` | Track state (`current_phase` + task tracking) | +| `conductor/tracks/meta_tooling_workflow_review_20260620/report.md` | 4-part synthesis report (≥4,000 LOC) | +| `conductor/tracks/meta_tooling_workflow_review_20260620/comparison_table.md` | ~50-row flat track reference | +| `conductor/tracks/meta_tooling_workflow_review_20260620/decisions.md` | Prioritized rebuild backlog (15-25 entries) | +| `conductor/tracks/meta_tooling_workflow_review_20260620/shipped_work_index.md` | Tier 3 sweep A output (per-track summaries) | +| `conductor/tracks/meta_tooling_workflow_review_20260620/llm_behavior_catalog.md` | Tier 3 sweep B + C output (12-pattern evidence) | +| `conductor/tracks/meta_tooling_workflow_review_20260620/nagent_takeaways_meta_tooling_20260620.md` | Bridge to 4 sibling reviews | +| `conductor/tracks/meta_tooling_workflow_review_20260620/workflow_improvements.md` | Standalone Part 3 input | +| `conductor/tracks/meta_tooling_workflow_review_20260620/implementation_sequencing.md` | Standalone Part 4 input | + +### Modified files (1) + +| File | Modification | +|---|---| +| `conductor/tracks.md` | Add track to "Active Tracks" table in Phase 1; mark "Recently Completed" in Phase 11 | + +### Commit count budget + +13-15 atomic commits across 11 phases (1 setup + 3 sweep outputs + 1 Part 1 + 1-2 Part 2 + 1 Part 3 + 1 Part 4 + 5 side artifacts + 0-1 self-review fix + 1 finalize). + +--- + +## Phase 1: Setup (1 commit) + +**Scope:** Create track directory; write all skeleton files; register track in `conductor/tracks.md`. + +### Task 1.1: Create track artifacts (1 commit) + +**Files:** +- Create: `conductor/tracks/meta_tooling_workflow_review_20260620/spec.md` (already exists) +- Create: `conductor/tracks/meta_tooling_workflow_review_20260620/metadata.json` +- Create: `conductor/tracks/meta_tooling_workflow_review_20260620/state.toml` +- Create: `conductor/tracks/meta_tooling_workflow_review_20260620/report.md` +- Create: `conductor/tracks/meta_tooling_workflow_review_20260620/comparison_table.md` +- Create: `conductor/tracks/meta_tooling_workflow_review_20260620/decisions.md` +- Create: `conductor/tracks/meta_tooling_workflow_review_20260620/shipped_work_index.md` +- Create: `conductor/tracks/meta_tooling_workflow_review_20260620/llm_behavior_catalog.md` +- Create: `conductor/tracks/meta_tooling_workflow_review_20260620/nagent_takeaways_meta_tooling_20260620.md` +- Create: `conductor/tracks/meta_tooling_workflow_review_20260620/workflow_improvements.md` +- Create: `conductor/tracks/meta_tooling_workflow_review_20260620/implementation_sequencing.md` +- Modify: `conductor/tracks.md` (add 1 row to Active Tracks table) + +- [ ] **Step 1: Verify track directory exists** + +Run: `Test-Path -LiteralPath "conductor\tracks\meta_tooling_workflow_review_20260620"` +Expected: `True` + +If `False`, run: +```powershell +New-Item -ItemType Directory -Path "conductor\tracks\meta_tooling_workflow_review_20260620" -Force +``` + +- [ ] **Step 2: Write `metadata.json`** + +Write to `conductor/tracks/meta_tooling_workflow_review_20260620/metadata.json`: + +```json +{ + "track_id": "meta_tooling_workflow_review_20260620", + "name": "Meta-Tooling Workflow Review — Past-Month LLM Behavior Analysis", + "type": "research-only", + "priority": "medium-high", + "owner": "Tier 1 Orchestrator (sole synthesis author); Tier 3 sub-agents for parallel sweeps", + "initialized": "2026-06-20", + "status": "active", + "blocked_by": [], + "blocks": ["workflow_improvements_rebuild_"], + "scope": { + "new_files": [ + "conductor/tracks/meta_tooling_workflow_review_20260620/spec.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/metadata.json", + "conductor/tracks/meta_tooling_workflow_review_20260620/state.toml", + "conductor/tracks/meta_tooling_workflow_review_20260620/report.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/comparison_table.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/decisions.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/shipped_work_index.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/llm_behavior_catalog.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/nagent_takeaways_meta_tooling_20260620.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/workflow_improvements.md", + "conductor/tracks/meta_tooling_workflow_review_20260620/implementation_sequencing.md" + ], + "modified_files": [ + "conductor/tracks.md" + ], + "deleted_files": [] + }, + "user_directives": [ + {"date": "2026-06-20", "directive": "Full past month (~75 reports + git log + state.toml + guide docs)", "source": "user (this session, Q1 confirmation)"}, + {"date": "2026-06-20", "directive": "Document-driven (4 parts): What shipped / LLM Behavior Patterns / Workflow Improvements / Implementation Sequencing", "source": "user (Q2 confirmation)"}, + {"date": "2026-06-20", "directive": "Audit depth C: reports + git log + track spec deviations + state.toml + guide docs", "source": "user (Q3 confirmation)"}, + {"date": "2026-06-20", "directive": "Recommendation structure D: by target doc × by confidence tier", "source": "user (Q4 confirmation)"}, + {"date": "2026-06-20", "directive": "Execution model C: Tier 1 anchor + Tier 3 parallel sweeps; sub-agents for batch data only", "source": "user (Q5 confirmation)"}, + {"date": "2026-06-20", "directive": "Output shape C: report + side artifacts + workflow_improvements.md + implementation_sequencing.md", "source": "user (Q6 confirmation)"}, + {"date": "2026-06-20", "directive": "Minimum 4,000 line report; use nagent_review_v3.1 chunking strategy", "source": "user (Q7 confirmation)"}, + {"date": "2026-06-20", "directive": "Be conservative with meta-tooling to not break OpenCode", "source": "user (overall framing)"} + ], + "verification_criteria": [ + "report.md has all 4 parts present and non-empty", + "report.md total LOC >= 4,000", + "Part 1 has all 5 track-family sub-sections", + "Part 2 has 8-16 LLM behavior patterns (target 12) with the 7-sub-section structure + verdict block", + "Part 3 has 15-25 workflow improvements organized by 5 target docs x 3 confidence tiers", + "Part 4 has all 5 implementation phases with the 5-sub-section structure", + "comparison_table.md has ~50 rows", + "decisions.md has 15-25 entries sorted HIGH to LOW with destination files", + "shipped_work_index.md exists with per-track summaries", + "llm_behavior_catalog.md exists with the 12-pattern catalog", + "nagent_takeaways_meta_tooling_20260620.md exists with 5-part bridge structure", + "workflow_improvements.md exists as standalone", + "implementation_sequencing.md exists as standalone", + "No src/ / tests/ / AGENTS.md / conductor/*.md / .opencode/ / scripts/audit_*.py changes", + "User has reviewed and approved the final report", + "conductor/tracks.md updated", + "All atomic commits have git notes attached", + "state.toml final state is current_phase=11 and status=active" + ], + "regressions_and_pre_existing_failures": [], + "pre_existing_failures_remaining": [], + "deferred_to_followup_tracks": [ + { + "title": "Workflow Improvements Rebuild", + "description": "Apply the 5-phase conservative sequencing from Part 4 to AGENTS.md / conductor/workflow.md / conductor/code_styleguides/error_handling.md / .opencode/agents/*.md / scripts/audit_*.py", + "track_status": "planned" + } + ] +} +``` + +- [ ] **Step 3: Write `state.toml`** + +Write to `conductor/tracks/meta_tooling_workflow_review_20260620/state.toml`: + +```toml +# Track state for meta_tooling_workflow_review_20260620 +# Updated by Tier 1 Orchestrator as tasks complete + +[meta] +track_id = "meta_tooling_workflow_review_20260620" +name = "Meta-Tooling Workflow Review — Past-Month LLM Behavior Analysis" +status = "active" +current_phase = 1 +last_updated = "2026-06-20" + +[blocked_by] +# No blockers + +[blocks] +# Future workflow-improvements rebuild track +workflow_improvements_rebuild = "planned in meta_tooling_workflow_review_20260620" + +[phases] +phase_1 = { status = "in_progress", checkpointsha = "", name = "Setup" } +phase_2 = { status = "pending", checkpointsha = "", name = "Tier 3 sub-agent sweeps" } +phase_3 = { status = "pending", checkpointsha = "", name = "Tier 1 anchor read" } +phase_4 = { status = "pending", checkpointsha = "", name = "Part 1 — What Shipped" } +phase_5 = { status = "pending", checkpointsha = "", name = "Part 2 — LLM Behavior Patterns" } +phase_6 = { status = "pending", checkpointsha = "", name = "Part 3 — Workflow Improvements" } +phase_7 = { status = "pending", checkpointsha = "", name = "Part 4 — Implementation Sequencing" } +phase_8 = { status = "pending", checkpointsha = "", name = "Side artifacts + standalone inputs" } +phase_9 = { status = "pending", checkpointsha = "", name = "Self-review" } +phase_10 = { status = "pending", checkpointsha = "", name = "User review gate" } +phase_11 = { status = "pending", checkpointsha = "", name = "Finalize" } + +[tasks] +# Phase 1 — Setup +t1_1_setup_artifacts = { status = "in_progress", commit_sha = "", description = "Create all skeleton files + register in tracks.md" } + +# Phase 2 — Tier 3 sub-agent sweeps (3 parallel) +t2_1_sweep_a_reports = { status = "pending", commit_sha = "", description = "Tier 3 sweep A: reports corpus → shipped_work_index.md" } +t2_2_sweep_b_structured = { status = "pending", commit_sha = "", description = "Tier 3 sweep B: git log + state.toml + spec deviations → llm_behavior_catalog.md (part 1)" } +t2_3_sweep_c_hidden_notes = { status = "pending", commit_sha = "", description = "Tier 3 sweep C: guide docs + AGENTS.md + conductor/*.md → llm_behavior_catalog.md (appended)" } + +# Phase 3 — Tier 1 anchor read (no commits; internal scratchpad) +t3_1_anchor_read = { status = "pending", commit_sha = "", description = "Read 10 anchor reports; produce internal scratchpad" } + +# Phase 4 — Part 1 synthesis +t4_1_part1_synthesis = { status = "pending", commit_sha = "", description = "Write Part 1 (5 sub-sections × 160-200 LOC each)" } + +# Phase 5 — Part 2 synthesis +t5_1_part2_synthesis = { status = "pending", commit_sha = "", description = "Write Part 2 (12 patterns × 125-170 LOC each, 7-sub-section structure)" } + +# Phase 6 — Part 3 synthesis +t6_1_part3_synthesis = { status = "pending", commit_sha = "", description = "Write Part 3 (15-25 improvements × 50-80 LOC each, by target doc × confidence tier)" } + +# Phase 7 — Part 4 synthesis +t7_1_part4_synthesis = { status = "pending", commit_sha = "", description = "Write Part 4 (5 phases × 60-100 LOC each, conservative sequencing)" } + +# Phase 8 — Side artifacts + standalone inputs (5 commits) +t8_1_comparison_table = { status = "pending", commit_sha = "", description = "Write comparison_table.md (~50 rows)" } +t8_2_decisions = { status = "pending", commit_sha = "", description = "Write decisions.md (15-25 entries)" } +t8_3_nagent_takeaways = { status = "pending", commit_sha = "", description = "Write nagent_takeaways_meta_tooling_20260620.md (5-part bridge)" } +t8_4_workflow_improvements_standalone = { status = "pending", commit_sha = "", description = "Write workflow_improvements.md (Part 3 verbatim standalone)" } +t8_5_implementation_sequencing_standalone = { status = "pending", commit_sha = "", description = "Write implementation_sequencing.md (Part 4 verbatim + phase dependencies)" } + +# Phase 9 — Self-review (0-1 commits) +t9_1_self_review = { status = "pending", commit_sha = "", description = "Placeholder scan + internal consistency + scope check + ambiguity check + chunking verification; fix inline" } + +# Phase 10 — User review gate (0 commits; user-driven) +t10_1_user_review = { status = "pending", commit_sha = "", description = "User reviews report + side artifacts + standalone inputs; approves or iterates" } + +# Phase 11 — Finalize +t11_1_finalize = { status = "pending", commit_sha = "", description = "Update state.toml to current_phase=11; update metadata.json with final stats; mark Recently Completed in tracks.md" } + +[verification] +phase_1_complete = false +phase_2_complete = false +phase_3_complete = false +phase_4_complete = false +phase_5_complete = false +phase_6_complete = false +phase_7_complete = false +phase_8_complete = false +phase_9_complete = false +phase_10_complete = false +phase_11_complete = false +report_4k_loc_floor_met = false +user_review_approved = false +``` + +- [ ] **Step 4: Write `report.md` skeleton** + +Write to `conductor/tracks/meta_tooling_workflow_review_20260620/report.md`: + +```markdown +# Meta-Tooling Workflow Review — Past-Month LLM Behavior Analysis + +**Track:** `meta_tooling_workflow_review_20260620` +**Shipped:** (pending) +**Owner:** Tier 1 Orchestrator (sole synthesis author) +**Date:** 2026-06-20 +**Scope:** Past month (2026-05-20 → 2026-06-20) +**Status:** Research-only. No production code changes. + +--- + +## Part 1 — What Shipped + +### §1.1 The Result Migration Campaign (5 sub-tracks + umbrella) + +[160-200 LOC; populated in Phase 4] + +### §1.2 Tier 2 Autonomous Sandbox Family + +[160-200 LOC; populated in Phase 4] + +### §1.3 Stability & Test-Infrastructure Tracks + +[160-200 LOC; populated in Phase 4] + +### §1.4 Meta-Analysis Corpus + +[160-200 LOC; populated in Phase 4] + +### §1.5 One-Off Fixes & Polishes + +[160-200 LOC; populated in Phase 4] + +--- + +## Part 2 — LLM Behavior Patterns + +### §2.1 Anti-Sliming (Heuristic Laundering) + +[125-170 LOC; populated in Phase 5] + +### §2.2 Hard-Gate Bypass (Manual Review → Bulk Verify) + +[125-170 LOC; populated in Phase 5] + +### §2.3 Regression-After-Refactor (Lost Context in Extraction) + +[125-170 LOC; populated in Phase 5] + +### §2.4 Heuristic Proliferation Mid-Track + +[125-170 LOC; populated in Phase 5] + +### §2.5 Tier 2 Escalation Drift (Ambiguous User Intent) + +[125-170 LOC; populated in Phase 5] + +### §2.6 Report-As-Substitute-For-Fix + +[125-170 LOC; populated in Phase 5] + +### §2.7 Decision-Deflection ("Not Going To Attempt Another Fix") + +[125-170 LOC; populated in Phase 5] + +### §2.8 Lost-Context Extraction + +[125-170 LOC; populated in Phase 5] + +### §2.9 Literal-vs-Inferred Instruction Interpretation + +[125-170 LOC; populated in Phase 5] + +### §2.10 Cross-Track Synthesis Gap + +[125-170 LOC; populated in Phase 5] + +### §2.11 The "I'm Done" Surrender Threshold + +[125-170 LOC; populated in Phase 5] + +### §2.12 Anti-Sliming Protocol Evolution (Phase 10→11→12→13) + +[125-170 LOC; populated in Phase 5] + +--- + +## Part 3 — Workflow Improvements + +### §AGENTS.md — Anti-Patterns + Hard Rules + +[populated in Phase 6] + +### §conductor/workflow.md — Process Gates + +[populated in Phase 6] + +### §conductor/code_styleguides/error_handling.md — Convention Tightening + +[populated in Phase 6] + +### §.opencode/agents/tier2-autonomous.md — Tier-Specific Directives + +[populated in Phase 6] + +### §scripts/audit_*.py — Enforcement Script Updates + +[populated in Phase 6] + +--- + +## Part 4 — Implementation Sequencing + +### §Phase 1: AGENTS.md Doc Edits (Zero Risk) + +[populated in Phase 7] + +### §Phase 2: conductor/workflow.md Process Gate Additions + +[populated in Phase 7] + +### §Phase 3: conductor/code_styleguides/error_handling.md Convention Tightening + +[populated in Phase 7] + +### §Phase 4: .opencode/ Tier-Specific Directive Updates + +[populated in Phase 7] + +### §Phase 5: scripts/audit_*.py + CI Gate Additions + +[populated in Phase 7] +``` + +- [ ] **Step 5: Write `comparison_table.md` skeleton** + +Write to `conductor/tracks/meta_tooling_workflow_review_20260620/comparison_table.md`: + +```markdown +# Comparison Table — Past-Month Tracks + +**Date:** 2026-06-20 +**Scope:** ~75 tracks active or shipped in the past month (2026-05-20 → 2026-06-20) + +| Track family | Track name | Status | Key reports | First LLM-behavior tag | +|---|---|---|---|---| +``` + +- [ ] **Step 6: Write `decisions.md` skeleton** + +Write to `conductor/tracks/meta_tooling_workflow_review_20260620/decisions.md`: + +```markdown +# Decisions — Workflow Improvements Backlog + +**Date:** 2026-06-20 +**Scope:** Conservative workflow improvements for the near-future rebuild track +**Priority order:** HIGH → MEDIUM → LOW + +| # | Priority | Workflow improvement | Change | Destination file | Evidence | Risk | Sequencing phase | +|---|---|---|---|---|---|---|---| +``` + +- [ ] **Step 7: Write `shipped_work_index.md` skeleton** + +Write to `conductor/tracks/meta_tooling_workflow_review_20260620/shipped_work_index.md`: + +```markdown +# Shipped Work Index — Past-Month Tracks + +**Date:** 2026-06-20 +**Generated by:** Tier 3 sub-agent sweep A +**Source:** `docs/reports/*.md` past month + `conductor/tracks.md` Active Tracks table + `git log --oneline` + +--- + +[populated by Tier 3 sweep A; per-track summaries ~10-20 LOC each × ~50 tracks = 300-500 LOC] +``` + +- [ ] **Step 8: Write `llm_behavior_catalog.md` skeleton** + +Write to `conductor/tracks/meta_tooling_workflow_review_20260620/llm_behavior_catalog.md`: + +```markdown +# LLM Behavior Catalog — Past-Month Evidence + +**Date:** 2026-06-20 +**Generated by:** Tier 3 sub-agent sweeps B + C +**Sources:** git log + git notes + state.toml user_directives + spec.md deviation sections + docs/guide_*.md + +--- + +[populated by Tier 3 sweep B; 12 patterns × 30-50 LOC each = 360-600 LOC] +[appended by Tier 3 sweep C; hidden-notes section ~200 LOC] +``` + +- [ ] **Step 9: Write `nagent_takeaways_meta_tooling_20260620.md` skeleton** + +Write to `conductor/tracks/meta_tooling_workflow_review_20260620/nagent_takeaways_meta_tooling_20260620.md`: + +```markdown +# Nagent Takeaways — Meta-Tooling Workflow Review Bridge + +**Date:** 2026-06-20 +**Bridges:** `meta_tooling_workflow_review_20260620` ↔ `nagent_review_20260608` ↔ `fable_review_20260617` ↔ `superpowers_review_20260619` ↔ `intent_dsl_survey_20260612` + +--- + +## TL;DR + +[1 paragraph; populated in Phase 8] + +## Cross-Reference Table + +[~10-15 rows; one row per LLM pattern that touches a verdict in the sibling reviews] + +## The N New Findings This Track Adds + +[populated in Phase 8] + +## The M Sibling-Review Findings This Track Contradicts or Extends + +[populated in Phase 8] + +## Pointer to fable_review + +[1 paragraph; populated in Phase 8] +``` + +- [ ] **Step 10: Write `workflow_improvements.md` skeleton** + +Write to `conductor/tracks/meta_tooling_workflow_review_20260620/workflow_improvements.md`: + +```markdown +# Workflow Improvements — Standalone Catalog + +**Date:** 2026-06-20 +**Purpose:** Standalone input for the near-future "workflow improvements rebuild" track. Read this WITHOUT re-reading the 4,000-LOC `report.md`. + +--- + +[verbatim copy of `report.md` Part 3, populated in Phase 8] +``` + +- [ ] **Step 11: Write `implementation_sequencing.md` skeleton** + +Write to `conductor/tracks/meta_tooling_workflow_review_20260620/implementation_sequencing.md`: + +```markdown +# Implementation Sequencing — Standalone Plan + +**Date:** 2026-06-20 +**Purpose:** Standalone input for the near-future "workflow improvements rebuild" track. Read this WITHOUT re-reading the 4,000-LOC `report.md`. + +--- + +[verbatim copy of `report.md` Part 4 + phase dependencies section, populated in Phase 8] +``` + +- [ ] **Step 12: Update `conductor/tracks.md`** + +Open `conductor/tracks.md` and add a row to the "Active Tracks" table (the table at the top of the file). Insert the following row after the existing rows (find the last A-priority row): + +``` +| 22b | A (meta-tooling) | [Meta-Tooling Workflow Review — Past-Month LLM Behavior Analysis](#track-meta-tooling-workflow-review-past-month-llm-behavior-analysis) | spec ✓, plan ✓, **active** | (none — independent; **NEW 2026-06-20**; sibling to nagent_review + fable_review + superpowers_review + intent_dsl_survey; produces workflow_improvements.md + implementation_sequencing.md as standalone inputs for a near-future rebuild track; research-only; no src/, tests/, AGENTS.md, conductor/*.md, .opencode/, or scripts/audit_*.py changes) | +``` + +Also append a section at the bottom of "Active Tracks": + +``` +### Track: Meta-Tooling Workflow Review — Past-Month LLM Behavior Analysis + +*Spec: [./tracks/meta_tooling_workflow_review_20260620/spec.md](./tracks/meta_tooling_workflow_review_20260620/spec.md), Plan: [./tracks/meta_tooling_workflow_review_20260620/plan.md](./tracks/meta_tooling_workflow_review_20260620/plan.md)* +*Goal: Analyze all past-month reports + git log + state.toml + guide docs; identify 8-16 LLM behavior patterns; catalog workflow improvements by target doc × confidence tier; sequence for a conservative rebuild. 4-part report ≥4,000 LOC. Research-only; recommendations go to workflow_improvements.md + implementation_sequencing.md for the future rebuild track.* +*Status: in progress (Phase 1 setup complete; Tier 3 sweeps pending).* +``` + +- [ ] **Step 13: Verify all 11 files exist** + +Run: `Get-ChildItem -LiteralPath "conductor\tracks\meta_tooling_workflow_review_20260620" -File | Select-Object Name, Length | Format-Table -AutoSize` +Expected: 11 files (spec.md + metadata.json + state.toml + report.md + comparison_table.md + decisions.md + shipped_work_index.md + llm_behavior_catalog.md + nagent_takeaways_meta_tooling_20260620.md + workflow_improvements.md + implementation_sequencing.md) + +- [ ] **Step 14: Stage and commit** + +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/ conductor/tracks.md +git commit -m "conductor(track): meta_tooling_workflow_review_20260620 Phase 1 setup (11 skeleton files + tracks.md registration)" +``` + +- [ ] **Step 15: Attach git note** + +```bash +git notes add -m "Spec + metadata + state + 11 skeleton files for meta_tooling_workflow_review_20260620. Research-only track analyzing past-month LLM agent behavior (2026-05-20 to 2026-06-20). 4-part report (>=4,000 LOC target). Tier 1 anchor + 3 Tier 3 sub-agent sweeps. 7 reference artifacts (report + 5 side artifacts + 2 standalone inputs for future rebuild track). 11 phases. Sibling to nagent_review / fable_review / superpowers_review / intent_dsl_survey. No src/ / tests/ / conductor/*.md / AGENTS.md / .opencode/ / scripts/audit_*.py changes." $(git log -1 --format="%H") +``` + +- [ ] **Step 16: Update `state.toml`** + +Update `state.toml`: +- `current_phase = 2` +- `phase_1 = { status = "completed", checkpointsha = "", name = "Setup" }` +- `t1_1_setup_artifacts = { status = "completed", commit_sha = "", description = "..." }` +- Add `` to the `last_updated` field + +Then commit: +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/state.toml +git commit -m "conductor(plan): mark Phase 1 (Setup) complete for meta_tooling_workflow_review_20260620" +``` + +Attach git note: +```bash +git notes add -m "Phase 1 complete: 11 skeleton files + tracks.md registration committed. 1 commit this phase. Phase 2 (Tier 3 sub-agent sweeps) is next." $(git log -1 --format="%H") +``` + +--- + +## Phase 2: Tier 3 Sub-Agent Sweeps (3 commits) + +**Scope:** Dispatch 3 Tier 3 sub-agents in parallel (Sweep A: reports corpus; Sweep B: structured data; Sweep C: hidden notes). Each sub-agent returns a structured markdown output that Tier 1 reviews before committing. + +### Task 2.1: Sweep A — Reports corpus → `shipped_work_index.md` (1 commit) + +**Files:** +- Create: `conductor/tracks/meta_tooling_workflow_review_20260620/shipped_work_index.md` (populated by sub-agent) + +- [ ] **Step 1: Verify Tier 3 dispatch mechanism** + +Run: `Test-Path -LiteralPath "scripts\mma_exec.py"` +Expected: `True` (the `mma_exec.py` script must exist for Tier 3 dispatch) + +If `False`, STOP and report to user — Tier 3 dispatch is not available, fall back to Tier 1 sequential read of all ~75 reports (will require plan restructuring). + +- [ ] **Step 2: Build the Sweep A prompt** + +Use the following prompt (verbatim, with file paths substituted): + +``` +You are a Tier 3 Worker sub-agent dispatched for a meta-tooling review track. + +TASK: Read all ~75 markdown reports in C:/projects/manual_slop/docs/reports/ dated 2026-05-20 through 2026-06-20 (use Get-ChildItem -LiteralPath "docs/reports" -File | Where-Object { $_.LastWriteTime -ge (Get-Date).AddDays(-35) } to enumerate). For each report, extract: +- Track folder (if the report references one) +- Shipped date (from filename or metadata) +- Key deliverable files referenced +- Track family (one of: migration campaign / tier-2 sandbox / stability-test-infra / meta-analysis / one-off polish) +- LLM-behavior touch-points (any mention of agent failure, regression, anti-pattern, deviation, or workaround) + +OUTPUT FORMAT: Markdown file at C:/projects/manual_slop/conductor/tracks/meta_tooling_workflow_review_20260620/shipped_work_index.md + +The file structure: +- H1 title +- Date + scope +- H2 section "By Track Family" with 5 sub-sections (one per family) +- Each sub-section: bullet list of reports, each with format: ` - [report filename](path) | [track folder if any] | [shipped date] | [key deliverables] | [LLM-behavior tags]` +- H2 section "LLM-Behavior Tag Index" (alphabetical): each tag (anti-sliming, hard-gate-bypass, etc.) → list of reports that mention it + +CONSTRAINTS: +- Total output: 300-500 LOC +- Use 1-space indentation for any nested content (per project conventions) +- No day estimates +- Cite specific file paths with line numbers where possible +- If a report does not reference a track, put it in the "uncategorized" section at the bottom + +Return the FULL contents of shipped_work_index.md in your final message (not a summary). +``` + +- [ ] **Step 3: Dispatch Sweep A** + +Run: +```bash +uv run python scripts/mma_exec.py --role tier3-worker "" +``` + +Expected: Sub-agent runs (may take several minutes); final message contains the full markdown content of `shipped_work_index.md`. + +- [ ] **Step 4: Verify output file** + +Run: `Get-ChildItem -LiteralPath "conductor\tracks\meta_tooling_workflow_review_20260620\shipped_work_index.md" | Select-Object Length` +Expected: file exists, length > 10,000 bytes (300-500 LOC × ~30 bytes/LOC). + +If length < 5,000 bytes, the sub-agent output is too thin — STOP and report to user. + +- [ ] **Step 5: Spot-check 5 reports** + +Open 5 random reports from the index. Verify: +- Track folder is correct (where applicable) +- LLM-behavior tags are accurate +- File paths exist + +If any spot-check fails, correct the index inline before committing. + +- [ ] **Step 6: Stage and commit** + +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/shipped_work_index.md +git commit -m "conductor(track): meta_tooling_workflow_review_20260620 Sweep A (Tier 3 reports corpus -> shipped_work_index.md)" +``` + +- [ ] **Step 7: Attach git note** + +```bash +git notes add -m "Tier 3 Sweep A output: shipped_work_index.md (~300-500 LOC). Per-track summaries from all ~75 docs/reports/ past month. 5 track families + LLM-behavior tag index. Used as scaffolding for report.md Part 1." $(git log -1 --format="%H") +``` + +- [ ] **Step 8: Update `state.toml`** + +Update: +- `phase_2` → 1 of 3 complete (don't bump `current_phase` yet; wait for all 3 sweeps) +- `t2_1_sweep_a_reports = { status = "completed", commit_sha = "", description = "..." }` + +Commit: +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/state.toml +git commit -m "conductor(plan): mark Phase 2 Sweep A complete" +``` + +### Task 2.2: Sweep B — Structured data → `llm_behavior_catalog.md` (Part 1) (1 commit) + +**Files:** +- Create: `conductor/tracks/meta_tooling_workflow_review_20260620/llm_behavior_catalog.md` (populated by sub-agent; Part 1) + +- [ ] **Step 1: Build the Sweep B prompt** + +``` +You are a Tier 3 Worker sub-agent dispatched for a meta-tooling review track. + +TASK: Mine 3 sources of structured data for LLM agent behavior evidence: + +1. GIT LOG: Run `git log --since="2026-05-20" --until="2026-06-20" --oneline` from C:/projects/manual_slop. Also run `git log --since="2026-05-20" --until="2026-06-20" --format="%H %s" --no-merges`. Identify 50-100 commits whose messages mention regression, fix, bug, anti-pattern, deviation, workaround, tier-2, autonomous, audit, heuristic, or test failure. + +2. GIT NOTES: Run `git log --since="2026-05-20" --until="2026-06-20" --format="%H" --no-merges | ForEach-Object { $notes = git notes show $_ 2>$null; if ($notes) { "$_`n$notes`n---" } }` from C:/projects/manual_slop. Capture all git notes attached to past-month commits. + +3. STATE.TOML USER_DIRECTIVES: For each conductor/tracks//state.toml in C:/projects/manual_slop/conductor/tracks/ where the track was active in the past month, read the [meta] section + user_directives_logged section. Capture all directives. + +4. SPEC.MD DEVIATIONS: For each conductor/tracks//spec.md in the past-month tracks, read the "Deviations from Spec/Plan" section. Capture all deviations. + +OUTPUT FORMAT: Markdown file at C:/projects/manual_slop/conductor/tracks/meta_tooling_workflow_review_20260620/llm_behavior_catalog.md + +File structure: +- H1 title +- Date + scope + sources +- 12 sections (one per hypothesized LLM behavior pattern): Anti-Sliming / Hard-Gate Bypass / Regression-After-Refactor / Heuristic Proliferation / Tier-2 Escalation Drift / Report-As-Substitute-For-Fix / Decision-Deflection / Lost-Context Extraction / Literal-vs-Inferred Instruction / Cross-Track Synthesis Gap / "I'm Done" Surrender Threshold / Anti-Sliming Protocol Evolution +- Each section: + - H3 subsection "Definition" (1-2 sentences) + - H3 subsection "Evidence Citations" (3-7 file:line refs with brief excerpts) + - H3 subsection "Status" (NEW / PARTIALLY-CODIFIED / FULLY-CODIFIED / SUBSUMED based on AGENTS.md coverage) + - Each subsection ~30-50 LOC + +CONSTRAINTS: +- Total output: 500-700 LOC +- Use 1-space indentation for nested content +- No day estimates +- Cite specific file:line for every piece of evidence +- If a pattern has <3 evidence citations, mark it as "INSUFFICIENT EVIDENCE — consider removal in synthesis" + +Return the FULL contents of llm_behavior_catalog.md in your final message. +``` + +- [ ] **Step 2: Dispatch Sweep B** + +Run: +```bash +uv run python scripts/mma_exec.py --role tier3-worker "" +``` + +- [ ] **Step 3: Verify output file** + +Run: `Get-ChildItem -LiteralPath "conductor\tracks\meta_tooling_workflow_review_20260620\llm_behavior_catalog.md" | Select-Object Length` +Expected: length > 15,000 bytes (500-700 LOC). + +If length < 10,000 bytes, the sub-agent output is too thin — STOP and report. + +- [ ] **Step 4: Spot-check 10 evidence citations** + +Open 10 random file:line citations from the catalog. Verify each exists at the cited location and the excerpt is accurate. + +If any spot-check fails, correct the catalog inline. + +- [ ] **Step 5: Stage and commit** + +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/llm_behavior_catalog.md +git commit -m "conductor(track): meta_tooling_workflow_review_20260620 Sweep B (Tier 3 structured data -> llm_behavior_catalog.md Part 1)" +``` + +- [ ] **Step 6: Attach git note + commit state** + +```bash +git notes add -m "Tier 3 Sweep B output: llm_behavior_catalog.md Part 1 (~500-700 LOC). Evidence from git log + git notes + state.toml user_directives + spec.md deviations. 12 LLM behavior patterns with file:line citations." $(git log -1 --format="%H") + +git add conductor/tracks/meta_tooling_workflow_review_20260620/state.toml +git commit -m "conductor(plan): mark Phase 2 Sweep B complete" +``` + +### Task 2.3: Sweep C — Hidden notes → `llm_behavior_catalog.md` (Part 2 appended) (1 commit) + +**Files:** +- Modify: `conductor/tracks/meta_tooling_workflow_review_20260620/llm_behavior_catalog.md` (append hidden-notes section) + +- [ ] **Step 1: Build the Sweep C prompt** + +``` +You are a Tier 3 Worker sub-agent dispatched for a meta-tooling review track. + +TASK: Scan the following files for INLINE notes about LLM agent behavior, agent failure modes, anti-patterns, or workflow concerns that may not be captured in docs/reports/ or conductor/tracks//: + +1. C:/projects/manual_slop/AGENTS.md (root) — sections §Critical Anti-Patterns, §Session-Learned Anti-Patterns, §Process Anti-Patterns +2. C:/projects/manual_slop/conductor/*.md (all .md files in conductor/ EXCEPT tracks/ subdirectory and code_styleguides/ subdirectory) +3. C:/projects/manual_slop/docs/guide_*.md (36 files; the 14 deep-dive guides) +4. C:/projects/manual_slop/conductor/code_styleguides/*.md (11 styleguide files) + +For each file, grep for: agent, anti-pattern, behavior, behavior drift, hard gate, hard ban, regression, heuristic, audit, slip, slop, scope creep, fix forward, surrender, escalation, tier-1, tier-2. + +For each match, capture: file:line + the surrounding 3 lines of context. + +OUTPUT FORMAT: Append to C:/projects/manual_slop/conductor/tracks/meta_tooling_workflow_review_20260620/llm_behavior_catalog.md + +Append structure: +- H1 "Hidden Notes Appendix" +- H2 "By Source File" (sub-sections per file) +- Each file's section: bullet list of file:line + excerpt + which of the 12 LLM patterns it relates to (or "NEW finding not in the 12 patterns") + +CONSTRAINTS: +- Total appendix: 200-300 LOC +- Use 1-space indentation for nested content +- No day estimates +- If a source file has 0 matches, omit it + +Return the FULL APPENDIX content in your final message (not a summary). Do NOT rewrite the existing 12-pattern content; the Tier 1 orchestrator will append it. +``` + +- [ ] **Step 2: Dispatch Sweep C** + +Run: +```bash +uv run python scripts/mma_exec.py --role tier3-worker "" +``` + +- [ ] **Step 3: Append output to `llm_behavior_catalog.md`** + +Read the sub-agent's output. Use `Get-Content` to load the existing `llm_behavior_catalog.md`, append the sub-agent's content, and write back. + +```powershell +$existing = Get-Content -LiteralPath "conductor\tracks\meta_tooling_workflow_review_20260620\llm_behavior_catalog.md" -Raw +$appendix = "" +Set-Content -LiteralPath "conductor\tracks\meta_tooling_workflow_review_20260620\llm_behavior_catalog.md" -Value ($existing + "`n`n" + $appendix) -NoNewline +``` + +- [ ] **Step 4: Verify appended file** + +Run: `Get-ChildItem -LiteralPath "conductor\tracks\meta_tooling_workflow_review_20260620\llm_behavior_catalog.md" | Select-Object Length` +Expected: length > 22,000 bytes (700-1000 LOC total). + +- [ ] **Step 5: Spot-check 5 hidden-note citations** + +Open 5 random file:line citations from the appendix. Verify each exists. + +- [ ] **Step 6: Stage and commit + update state** + +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/llm_behavior_catalog.md +git commit -m "conductor(track): meta_tooling_workflow_review_20260620 Sweep C (Tier 3 hidden notes appended to llm_behavior_catalog.md)" + +git notes add -m "Tier 3 Sweep C output: llm_behavior_catalog.md Part 2 (hidden notes appendix, ~200-300 LOC). Evidence from AGENTS.md + conductor/*.md + docs/guide_*.md + conductor/code_styleguides/*.md." $(git log -1 --format="%H") + +git add conductor/tracks/meta_tooling_workflow_review_20260620/state.toml +git commit -m "conductor(plan): mark Phase 2 (Sweeps A+B+C) complete" +``` + +- [ ] **Step 7: Bump `current_phase` to 3** + +Update `state.toml`: +- `current_phase = 3` +- `phase_2 = { status = "completed", checkpointsha = "", name = "Tier 3 sub-agent sweeps" }` + +Commit: +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/state.toml +git commit -m "conductor(plan): mark Phase 2 complete (all 3 sweeps done); advance to Phase 3" +``` + +--- + +## Phase 3: Tier 1 Anchor Read (0 commits; internal scratchpad) + +**Scope:** Tier 1 reads 10 anchor reports to anchor the synthesis; produces an internal scratchpad (NOT committed; lives in conversation context). + +### Task 3.1: Read 10 anchor reports + write scratchpad (0 commits) + +- [ ] **Step 1: Read `docs/reports/CHRONOLOGY_TRACK_HANDOVER_20260620.md`** + +Read the full document. Capture in scratchpad: +- The 5 "Lessons Learned" bullets (manual review bypass, metadata snapshot, git log as audit log, ask when in doubt, user said "manual review" twice) +- The cleanup recommendations + +- [ ] **Step 2: Read `docs/reports/TRACK_COMPLETION_result_migration_review_pass_20260617.md`** + +Capture: the 43-site classification + 10 new heuristics + 3 audit-script bugs documented + +- [ ] **Step 3: Read `docs/reports/TRACK_COMPLETION_result_migration_small_files_20260617.md`** + +Capture: the Phase 10 REJECTION (5 laundering heuristics); Phase 11 redo; the 21 sites re-classified + +- [ ] **Step 4: Read `docs/reports/TRACK_COMPLETION_result_migration_app_controller_20260618.md`** + +Capture: the 6 phases + 45 sites + 22 stays + +- [ ] **Step 5: Read `docs/reports/TRACK_COMPLETION_result_migration_gui_2_20260619.md`** + +Capture: the 13 phases + 42 sites + anti-sliming protocol + +- [ ] **Step 6: Read `docs/reports/TRACK_COMPLETION_result_migration_baseline_cleanup_20260620.md`** + +Capture: the 3 last failures (broken `raise ErrorInfo from exc`, sentinel-None flagged UNCLEAR, lost `global` declarations) + +- [ ] **Step 7: Read `docs/reports/STATUS_REPORT_phase6_compact.md`** + +Capture: the unreachable code regression + the Lesson Learned ("NEVER extract a function with side effects and place the call AFTER a try/except that always returns") + +- [ ] **Step 8: Read `docs/reports/TIER1_REVIEW_phase9_dilemma_20260620.md`** + +Capture: the 4 options Tier 2 considered; the user's directive on Heuristic E; the meta-rule "Tier 2 cannot unilaterally add audit heuristics" + +- [ ] **Step 9: Read `docs/reports/EXCEPTION_HANDLING_AUDIT_20260616.md`** + +Capture: the 211 violations + 42 files + 5 doc gaps closed (the migration campaign's baseline) + +- [ ] **Step 10: Read `docs/reports/SESSION_REPORT_superpowers_review_init_20260619.md`** + +Capture: the user's framing "conservative changes incrementally to improve AI performance and quality standards of output"; the 16-section structure of that future report + +- [ ] **Step 11: Read `docs/reports/TRACK_COMPLETION_tier2_leak_prevention_20260620.md`** + +Capture: the 3-layer defense (permission + pre-commit hook + audit); the 6 design decisions + +- [ ] **Step 12: Write scratchpad summary** + +Produce a markdown summary in conversation context (NOT a file). The summary covers: +- The 12 LLM behavior patterns (per spec §3.1 Part 2) — for each, the 3-5 anchor citations already identified +- The 5-7 cross-cutting findings (what appears in multiple anchor reports) +- The 3-5 "what AGENTS.md doesn't yet cover" candidates (for Part 2 §2.10) + +--- + +## Phase 4: Part 1 Synthesis (1 commit) + +**Scope:** Write Part 1 of `report.md` (What Shipped; 5 sub-sections × 160-200 LOC each = 800-1000 LOC). + +### Task 4.1: Synthesize Part 1 from `shipped_work_index.md` (1 commit) + +**Files:** +- Modify: `conductor/tracks/meta_tooling_workflow_review_20260620/report.md` (populate §1.1 through §1.5) + +- [ ] **Step 1: Read `shipped_work_index.md`** + +Read the full file. This is the per-track scaffolding for Part 1. + +- [ ] **Step 2: Write §1.1 — The Result Migration Campaign** + +In `report.md`, replace `[160-200 LOC; populated in Phase 4]` under `### §1.1 The Result Migration Campaign (5 sub-tracks + umbrella)` with: + +``` +The Result Migration campaign (umbrella `result_migration_20260616` + 5 sub-tracks) shipped 2026-06-17 → 2026-06-20. All 5 sub-tracks landed with 100% `Result[T]` convention coverage across the modified files. + +[160-200 LOC covering: +- Umbrella scope (5 sub-tracks; 268 sites per audit) +- Sub-track 1: result_migration_review_pass_20260617 (43 sites classified; 10 heuristics added) +- Sub-track 2: result_migration_small_files_20260617 (Phase 10 REJECTED → Phase 11 redo → Phase 12 migration; 5 laundering heuristics reverted) +- Sub-track 3: result_migration_app_controller_20260618 (45 sites; 6 phases; 22 stays) +- Sub-track 4: result_migration_gui_2_20260619 (42 sites; 13 phases; anti-sliming protocol formalized) +- Sub-track 5: result_migration_baseline_cleanup_20260620 (88 sites across 3 files: mcp_client 46 + ai_client 33 + rag_engine 9) +- Key reports cited (file:line): RESULT_MIGRATION_CAMPAIGN_STATUS_20260619.md, the 5 TRACK_COMPLETION_*.md, PHASE12_TRIAGE_20260617.md, TIER1_REVIEW_phase9_dilemma_20260620.md +- LLM-behavior touch-points: §2.1 anti-sliming, §2.3 regression-after-refactor, §2.4 heuristic proliferation, §2.8 lost-context extraction] +``` + +- [ ] **Step 3: Write §1.2 — Tier 2 Autonomous Sandbox Family** + +Replace the §1.2 placeholder with [160-200 LOC covering: the 4 sandbox tracks; the 3-layer defense in leak prevention; the bypass-and-document-deviation pattern in chronology handover]. + +- [ ] **Step 4: Write §1.3 — Stability & Test-Infrastructure Tracks** + +Replace the §1.3 placeholder with [160-200 LOC covering: public_api_migration_and_ui_polish_20260615 + rag_test_failures_20260615 + live_gui_test_fixes_20260618 + test_sandbox_hardening_20260619 + exception_handling_audit_20260616]. + +- [ ] **Step 5: Write §1.4 — Meta-Analysis Corpus** + +Replace the §1.4 placeholder with [160-200 LOC covering: nagent_review v3.1 (2,214 lines) + superpowers_review_init (blocked by chronology) + fable_review + intent_dsl_survey + chronology_20260619 (Phase 10 pending sign-off)]. + +- [ ] **Step 6: Write §1.5 — One-Off Fixes & Polishes** + +Replace the §1.5 placeholder with [160-200 LOC covering: ai_loop_regressions_20260614 + doeh_test_thinking_cleanup_20260615 + send_result_to_send_20260616 + ai_client_docs_20260613 + ai_decoupling_revert]. + +- [ ] **Step 7: Self-review Part 1** + +Verify: +- Each sub-section is 160-200 LOC (count with `wc -l` or read line count) +- Each sub-section has file:line citations +- Total Part 1 LOC: 800-1000 + +If any sub-section is under 150 LOC, expand with more detail from the `shipped_work_index.md`. + +- [ ] **Step 8: Stage and commit** + +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/report.md +git commit -m "conductor(track): meta_tooling_workflow_review_20260620 Phase 4 Part 1 (What Shipped, 5 sub-sections)" +``` + +- [ ] **Step 9: Attach git note + commit state** + +```bash +git notes add -m "Part 1 of report.md: What Shipped. 5 track-family sub-sections (~160-200 LOC each, 800-1000 LOC total). Uses shipped_work_index.md (Tier 3 sweep A) as scaffolding. File:line citations throughout." $(git log -1 --format="%H") + +git add conductor/tracks/meta_tooling_workflow_review_20260620/state.toml +git commit -m "conductor(plan): mark Phase 4 (Part 1) complete" +``` + +--- + +## Phase 5: Part 2 Synthesis (1-2 commits) + +**Scope:** Write Part 2 of `report.md` (LLM Behavior Patterns; 12 patterns × 125-170 LOC each = 1500-2000 LOC). Split into 2 commits if LOC > 1500 (commits at §2.6 and §2.12). + +### Task 5.1: Synthesize Part 2 from `llm_behavior_catalog.md` (1 commit, possibly +1) + +**Files:** +- Modify: `conductor/tracks/meta_tooling_workflow_review_20260620/report.md` (populate §2.1 through §2.12) + +- [ ] **Step 1: Read `llm_behavior_catalog.md`** + +Read the full file. This is the evidence scaffolding for Part 2. + +- [ ] **Step 2: Verify 12 patterns have sufficient evidence** + +For each pattern in the catalog, check that there are ≥3 evidence citations. If any pattern has <3, mark it as "INSUFFICIENT EVIDENCE" in the report (1 sentence + drop from Part 2 if necessary). + +If the actual count is <8, drop patterns until ≥8 remain. Update `state.toml` `t5_1_part2_synthesis.description` with the actual count. + +- [ ] **Step 3: Write §2.1 — Anti-Sliming** + +Replace the §2.1 placeholder with the 7-sub-section structure per spec §3.1 Part 2: +- §N.1 What N adds (1 sentence) +- §N.2 Driver/structure (what causes the pattern) +- §N.3 Invariants (what should always hold) +- §N.4 Per-commit detail (3-5 file:line citations with excerpts from llm_behavior_catalog.md) +- §N.5 Manual Slop implications (2-3 paragraphs) +- §N.6 Honest gaps (≥6 bullets) +- §N.7 Code-shape sketch (1 paragraph with `{ssdl}` tags) +- Verdict block (NEW / PARTIALLY-CODIFIED / FULLY-CODIFIED / SUBSUMED) + +- [ ] **Step 4: Write §2.2 — Hard-Gate Bypass** + +Same 7-sub-section structure. Citations from `CHRONOLOGY_TRACK_HANDOVER_20260620.md` §"Lessons learned" #1. + +- [ ] **Step 5: Write §2.3 — Regression-After-Refactor** + +Same structure. Citations from `STATUS_REPORT_phase6_compact.md` §2 + `TRACK_COMPLETION_result_migration_baseline_cleanup_20260620.md` §4 Failure 3 + AGENTS.md §"Indentation-Driven Class Method Visibility". + +- [ ] **Step 6: Write §2.4 — Heuristic Proliferation Mid-Track** + +Same structure. Citations from `TIER1_REVIEW_phase9_dilemma_20260620.md` + `TRACK_COMPLETION_result_migration_baseline_cleanup_20260620.md` Phase 9. + +- [ ] **Step 7: Write §2.5 — Tier 2 Escalation Drift** + +Same structure. Citations from `CHRONOLOGY_TRACK_HANDOVER_20260620.md` §"Lessons learned" #5. + +- [ ] **Step 8: Write §2.6 — Report-As-Substitute-For-Fix** + +Same structure. Citations from `CHRONOLOGY_TRACK_HANDOVER_20260620.md` (the entire document) + AGENTS.md §"Process Anti-Patterns" #2. + +- [ ] **Step 9: Write §2.7 — Decision-Deflection** + +Same structure. Citations from `docs/reports/*.md` "next steps" sections + AGENTS.md §"Process Anti-Patterns" #6. + +- [ ] **Step 10: Write §2.8 — Lost-Context Extraction** + +Same structure. Citations from §2.3 evidence + AGENTS.md §"Session-Learned Anti-Patterns" #2. + +- [ ] **Step 11: Write §2.9 — Literal-vs-Inferred Instruction Interpretation** + +Same structure. Citations from `CHRONOLOGY_TRACK_HANDOVER_20260620.md` §"Lessons learned" #5 + AGENTS.md §"Session-Learned Anti-Patterns" #4. + +- [ ] **Step 12: Write §2.10 — Cross-Track Synthesis Gap** + +Same structure. Citations from the synthesis itself (the patterns NOT yet in AGENTS.md). + +- [ ] **Step 13: Write §2.11 — "I'm Done" Surrender Threshold** + +Same structure. Citations from AGENTS.md §"Process Anti-Patterns" #6 + #8 + `STATUS_REPORT_phase6_compact.md`. + +- [ ] **Step 14: Write §2.12 — Anti-Sliming Protocol Evolution** + +Same structure. Citations from `TRACK_COMPLETION_result_migration_baseline_cleanup_20260620.md` Phase 10-13 + `TIER1_REVIEW_phase9_dilemma_20260620.md`. + +- [ ] **Step 15: Self-review Part 2** + +Verify per spec §3.9 chunking strategy: +- Each pattern: ≥4 sub-sections (target: 7), ≥3 file:line citations, ≥2 honest gaps, ≥1 Manual Slop implication paragraph, verdict block present +- Total Part 2 LOC: 1500-2000 + +If any pattern is under 100 LOC, expand. If total < 1500, add more detail to existing patterns. + +- [ ] **Step 16: Stage and commit** + +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/report.md +git commit -m "conductor(track): meta_tooling_workflow_review_20260620 Phase 5 Part 2 (LLM Behavior Patterns, 12 patterns)" +``` + +If Part 2 LOC > 1500, commit at §2.6 and again at §2.12 (2 commits instead of 1). + +- [ ] **Step 17: Attach git note + commit state** + +```bash +git notes add -m "Part 2 of report.md: LLM Behavior Patterns. 12 patterns (~125-170 LOC each, 1500-2000 LOC total) with the 7-sub-section structure + verdict block. Uses llm_behavior_catalog.md (Tier 3 sweeps B+C) as evidence scaffolding." $(git log -1 --format="%H") + +git add conductor/tracks/meta_tooling_workflow_review_20260620/state.toml +git commit -m "conductor(plan): mark Phase 5 (Part 2) complete" +``` + +--- + +## Phase 6: Part 3 Synthesis (1 commit) + +**Scope:** Write Part 3 of `report.md` (Workflow Improvements; 15-25 improvements × 50-80 LOC each = 1000-1200 LOC). + +### Task 6.1: Synthesize Part 3 from Part 2 patterns + Tier 1 judgment (1 commit) + +**Files:** +- Modify: `conductor/tracks/meta_tooling_workflow_review_20260620/report.md` (populate §AGENTS.md through §scripts/audit_*.py) + +- [ ] **Step 1: Derive the improvement list from Part 2** + +For each of the 12 patterns in Part 2, identify: +- Is there a workflow improvement that addresses it? (Most patterns → 1 improvement; some → 2) +- Which target doc does it go to? (AGENTS.md / conductor/workflow.md / conductor/code_styleguides/error_handling.md / .opencode/agents/tier2-autonomous.md / scripts/audit_*.py) +- Which confidence tier? (Tier 1 = apply now / Tier 2 = defer 1 cycle / Tier 3 = open question) + +Produce an internal scratchpad (NOT committed) of the 15-25 improvements. + +- [ ] **Step 2: Write §AGENTS.md section** + +Replace the placeholder with: +- H3 sub-section per improvement targeting AGENTS.md (likely 3-5 entries) +- Each improvement: H4 title + the 6-sub-section structure per spec §3.1 Part 3: + - What (1-sentence change) + - Why (evidence from Part 2 with file:line citations) + - Where (file:line destination) + - Risk (what could break) + - Verification (how the user checks) + - Rollback (how to revert) + +- [ ] **Step 3: Write §conductor/workflow.md section** + +Replace with H3 sub-sections for each workflow.md improvement (likely 2-3 Tier 1 + 1-2 Tier 2 + 0-1 Tier 3). + +- [ ] **Step 4: Write §conductor/code_styleguides/error_handling.md section** + +Replace with H3 sub-sections for each styleguide improvement (likely 1-2 Tier 1 + 1 Tier 2). + +- [ ] **Step 5: Write §.opencode/agents/tier2-autonomous.md section** + +Replace with H3 sub-sections for each .opencode improvement (likely 1-2 Tier 1 + 0-1 Tier 2 + 1 Tier 3). + +- [ ] **Step 6: Write §scripts/audit_*.py section** + +Replace with H3 sub-sections for each audit-script improvement (likely 0-1 Tier 1 + 2-3 Tier 2 + 1 Tier 3). + +- [ ] **Step 7: Self-review Part 3** + +Verify: +- Total improvements: 15-25 +- Each improvement has all 6 sub-sections (What / Why / Where / Risk / Verification / Rollback) +- Each improvement has a destination file path +- Each improvement has a risk tier +- Total Part 3 LOC: 1000-1200 + +- [ ] **Step 8: Stage and commit** + +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/report.md +git commit -m "conductor(track): meta_tooling_workflow_review_20260620 Phase 6 Part 3 (Workflow Improvements, 15-25 entries by target doc × confidence tier)" +``` + +- [ ] **Step 9: Attach git note + commit state** + +```bash +git notes add -m "Part 3 of report.md: Workflow Improvements. 15-25 improvements (1000-1200 LOC) by 5 target docs x 3 confidence tiers. Each with the 6-sub-section structure (What/Why/Where/Risk/Verification/Rollback). Ready for the rebuild track to read." $(git log -1 --format="%H") + +git add conductor/tracks/meta_tooling_workflow_review_20260620/state.toml +git commit -m "conductor(plan): mark Phase 6 (Part 3) complete" +``` + +--- + +## Phase 7: Part 4 Synthesis (1 commit) + +**Scope:** Write Part 4 of `report.md` (Implementation Sequencing; 5 phases × 60-100 LOC each = 300-500 LOC). + +### Task 7.1: Synthesize Part 4 from Part 3 improvements (1 commit) + +**Files:** +- Modify: `conductor/tracks/meta_tooling_workflow_review_20260620/report.md` (populate §Phase 1 through §Phase 5) + +- [ ] **Step 1: Group Part 3 improvements by sequencing phase** + +For each Part 3 improvement, assign a phase (1-5) per spec §3.1 Part 4: +- Phase 1: AGENTS.md doc edits +- Phase 2: conductor/workflow.md process gates +- Phase 3: conductor/code_styleguides/error_handling.md convention tightening +- Phase 4: .opencode/ tier-specific directives +- Phase 5: scripts/audit_*.py + CI gate additions + +Each improvement should appear in exactly one phase. Phase ordering is conservative: zero-risk first, audit scripts last. + +- [ ] **Step 2: Write §Phase 1 — AGENTS.md Doc Edits** + +Replace the placeholder with: +- §N.1 Scope (which improvements from Part 3 §AGENTS.md) +- §N.2 Risk assessment (zero; doc-only) +- §N.3 Verification (the user reads the diff) +- §N.4 Rollback path (`git revert `) +- §N.5 Open questions (e.g., "Does the user want to bump the anti-pattern number, or add a new section?") + +- [ ] **Step 3: Write §Phase 2 — conductor/workflow.md Process Gates** + +Same 5-sub-section structure. + +- [ ] **Step 4: Write §Phase 3 — conductor/code_styleguides/error_handling.md** + +Same structure. + +- [ ] **Step 5: Write §Phase 4 — .opencode/ Tier-Specific Directives** + +Same structure. Risk assessment: Medium (changes how Tier 2 interprets instructions). + +- [ ] **Step 6: Write §Phase 5 — scripts/audit_*.py + CI Gate Additions** + +Same structure. Risk assessment: Medium-High (audit script is enforcement). + +- [ ] **Step 7: Self-review Part 4** + +Verify: +- Each phase has all 5 sub-sections +- Each phase has at least 1 rollback command +- Total Part 4 LOC: 300-500 +- Phase ordering is conservative (low-risk first) + +- [ ] **Step 8: Verify total report.md LOC** + +Run: `Get-ChildItem -LiteralPath "conductor\tracks\meta_tooling_workflow_review_20260620\report.md" | Select-Object Length` +Divide by ~30 bytes/LOC for markdown. +Expected: ≥4,000 LOC. + +If < 4,000, expand the thinnest sub-section in Part 1, 2, or 3. + +- [ ] **Step 9: Stage and commit** + +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/report.md +git commit -m "conductor(track): meta_tooling_workflow_review_20260620 Phase 7 Part 4 (Implementation Sequencing, 5 phases)" +``` + +- [ ] **Step 10: Attach git note + commit state** + +```bash +git notes add -m "Part 4 of report.md: Implementation Sequencing. 5 conservative phases (60-100 LOC each, 300-500 LOC total). Each with Scope/Risk/Verification/Rollback/OpenQuestions. Total report.md >= 4,000 LOC target met." $(git log -1 --format="%H") + +git add conductor/tracks/meta_tooling_workflow_review_20260620/state.toml +git commit -m "conductor(plan): mark Phase 7 (Part 4) complete; report.md 4K floor met" +``` + +--- + +## Phase 8: Side Artifacts + Standalone Inputs (5 commits) + +**Scope:** Write the 5 side artifacts that complement `report.md` and the 2 standalone inputs for the rebuild track. + +### Task 8.1: Write `comparison_table.md` (1 commit) + +**Files:** +- Modify: `conductor/tracks/meta_tooling_workflow_review_20260620/comparison_table.md` + +- [ ] **Step 1: Extract ~50 tracks from `shipped_work_index.md`** + +Read `shipped_work_index.md`. For each track family, list ~10 tracks (5 families × 10 = 50). + +- [ ] **Step 2: Populate the table** + +Replace the placeholder with the populated table per spec §3.2: +| Track family | Track name | Status | Key reports | First LLM-behavior tag | + +For each row: +- Track family: one of migration campaign / tier-2 sandbox / stability-test-infra / meta-analysis / one-off polish +- Track name: the track folder name +- Status: Shipped / In flight / Pending sign-off / Abandoned / Superseded +- Key reports: 1-3 file names +- First LLM-behavior tag: Part 2 §N.N number (e.g., "2.3" for Phase 6 unreachable-code regression) + +- [ ] **Step 3: Commit** + +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/comparison_table.md +git commit -m "conductor(track): meta_tooling_workflow_review_20260620 comparison_table.md (~50 rows)" +git notes add -m "~50-row flat reference of past-month tracks. Columns: track family / track name / status / key reports / first LLM-behavior tag." $(git log -1 --format="%H") +``` + +### Task 8.2: Write `decisions.md` (1 commit) + +**Files:** +- Modify: `conductor/tracks/meta_tooling_workflow_review_20260620/decisions.md` + +- [ ] **Step 1: Extract improvements from Part 3** + +For each of the 15-25 improvements in Part 3, create a decisions.md row per spec §3.3. + +- [ ] **Step 2: Sort by priority** + +Sort: HIGH (Phase 1-2 improvements) → MEDIUM (Phase 3) → LOW (Phase 4-5). + +- [ ] **Step 3: Populate the table** + +Replace the placeholder with the populated table. Include all 8 fields per spec §3.3. + +- [ ] **Step 4: Commit** + +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/decisions.md +git commit -m "conductor(track): meta_tooling_workflow_review_20260620 decisions.md (15-25 entries)" +git notes add -m "Prioritized rebuild backlog (15-25 entries). Sorted HIGH to LOW. Each entry has destination file + evidence + risk + sequencing phase." $(git log -1 --format="%H") +``` + +### Task 8.3: Write `nagent_takeaways_meta_tooling_20260620.md` (1 commit) + +**Files:** +- Modify: `conductor/tracks/meta_tooling_workflow_review_20260620/nagent_takeaways_meta_tooling_20260620.md` + +- [ ] **Step 1: Read sibling review bridge artifacts** + +Read: +- `conductor/tracks/nagent_review_20260608/nagent_takeaways_20260608.md` +- `conductor/tracks/superpowers_review_20260619/nagent_takeaways_superpowers_20260619.md` (if exists) +- `conductor/tracks/nagent_review_20260608/nagent_takeaways_v3_20260619.md` +- `conductor/tracks/nagent_review_20260608/nagent_takeaways_v3_1_20260620.md` + +- [ ] **Step 2: Write TL;DR (1 paragraph)** + +In `nagent_takeaways_meta_tooling_20260620.md`, replace the TL;DR placeholder with a paragraph summarizing how this track's 12 LLM patterns relate to the 4 sibling reviews' verdicts. + +- [ ] **Step 3: Write Cross-Reference Table** + +Replace the table placeholder with ~10-15 rows: one per LLM pattern that touches a verdict in a sibling review. + +- [ ] **Step 4: Write The N New Findings section** + +Replace with paragraphs for each new finding (likely 4-6: anti-sliming protocol, Phase 9 dilemma, chronology handover pattern, regression-after-refactor, the audit-script-as-convention issue). + +- [ ] **Step 5: Write Contradictions/Extensions section** + +Replace with paragraphs for any sibling-review findings this track contradicts or extends. + +- [ ] **Step 6: Write Pointer to fable_review** + +Replace with 1 paragraph. + +- [ ] **Step 7: Commit** + +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/nagent_takeaways_meta_tooling_20260620.md +git commit -m "conductor(track): meta_tooling_workflow_review_20260620 nagent_takeaways bridge (5-part structure)" +git notes add -m "Bridge to the 4 sibling meta-analysis reviews. 5-part structure: TL;DR + cross-reference table + new findings + contradictions + fable pointer. ~200 LOC." $(git log -1 --format="%H") +``` + +### Task 8.4: Write `workflow_improvements.md` (standalone) (1 commit) + +**Files:** +- Modify: `conductor/tracks/meta_tooling_workflow_review_20260620/workflow_improvements.md` + +- [ ] **Step 1: Copy Part 3 content** + +Copy the entire Part 3 (§AGENTS.md through §scripts/audit_*.py) from `report.md`. + +- [ ] **Step 2: Strip cross-references to Part 1/2** + +For each improvement, remove the "(see Part 2 §2.X)" cross-references; replace with direct file:line citations to the source reports. + +- [ ] **Step 3: Add a "How to use this document" header** + +Add a 1-paragraph preamble: "This is the standalone catalog of workflow improvements the rebuild track should read. The full 4,000-LOC report.md contains the supporting evidence; this document assumes you've decided to apply the changes." + +- [ ] **Step 4: Commit** + +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/workflow_improvements.md +git commit -m "conductor(track): meta_tooling_workflow_review_20260620 workflow_improvements.md (standalone Part 3)" +git notes add -m "Standalone Part 3 input for the rebuild track. Read this WITHOUT re-reading the 4,000-LOC report.md. Verbatim copy of Part 3 with cross-references stripped." $(git log -1 --format="%H") +``` + +### Task 8.5: Write `implementation_sequencing.md` (standalone) (1 commit) + +**Files:** +- Modify: `conductor/tracks/meta_tooling_workflow_review_20260620/implementation_sequencing.md` + +- [ ] **Step 1: Copy Part 4 content** + +Copy the entire Part 4 (§Phase 1 through §Phase 5) from `report.md`. + +- [ ] **Step 2: Add Phase Dependencies section** + +Append a new H1 section "Phase Dependencies" with a table: +| Phase | Depends on | Blocks | +|---|---|---| +| 1 | (nothing) | 2, 3, 4, 5 | +| 2 | 1 | 3, 4 | +| 3 | 2 | 4 | +| 4 | 3 | 5 | +| 5 | 4 | (nothing) | + +- [ ] **Step 3: Add a "How to use this document" header** + +Add a 1-paragraph preamble. + +- [ ] **Step 4: Commit** + +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/implementation_sequencing.md +git commit -m "conductor(track): meta_tooling_workflow_review_20260620 implementation_sequencing.md (standalone Part 4 + phase dependencies)" +git notes add -m "Standalone Part 4 input for the rebuild track. Verbatim copy of Part 4 + phase dependencies table. The 5-phase conservative ordering enforces zero-risk first, audit scripts last." $(git log -1 --format="%H") +``` + +--- + +## Phase 9: Self-Review (0-1 commits) + +**Scope:** Per the brainstorming skill: placeholder scan, internal consistency, scope check, ambiguity check, chunking verification. + +### Task 9.1: Self-review pass (0-1 commits) + +- [ ] **Step 1: Placeholder scan** + +Search all 11 files for "TBD", "TODO", "fill in", "populated in Phase", "FIXME", "XXX". Fix inline. + +- [ ] **Step 2: Internal consistency check** + +Verify: +- `comparison_table.md` rows match `report.md` Part 1 §N.N sub-sections +- `decisions.md` entries match `report.md` Part 3 §X.Y.Z sub-sections +- `workflow_improvements.md` is verbatim Part 3 (minus cross-refs) +- `implementation_sequencing.md` is verbatim Part 4 (plus phase dependencies) +- `nagent_takeaways_*.md` cross-references match the actual sibling review section numbers + +- [ ] **Step 3: Scope check** + +Verify: +- No `src/` / `tests/` / `conductor/*.md` / `AGENTS.md` / `.opencode/` / `scripts/audit_*.py` changes +- All recommendations go to `workflow_improvements.md` (not to source files) + +- [ ] **Step 4: Ambiguity check** + +Verify: +- Every Part 3 improvement has a destination file path +- Every Part 4 phase has a rollback command +- Every Part 2 pattern has a verdict block + +- [ ] **Step 5: Chunking verification (per nagent_review_v3.1)** + +Verify per spec §3.9: +- Each Part 2 pattern: ≥4 sub-sections (target 7), ≥3 file:line citations, ≥2 honest gaps, ≥1 Manual Slop implication, verdict block +- Each Part 3 improvement: ≥4 sub-sections (target 6), ≥1 evidence citation, ≥1 verification step +- Each Part 4 phase: ≥3 sub-sections (target 5), ≥1 rollback command + +If any check fails, fix inline. + +- [ ] **Step 6: Total report.md LOC check** + +Verify: total report.md LOC ≥ 4,000. If < 4,000, thicken the thinnest section. + +- [ ] **Step 7: Commit fixes (if any)** + +If any fixes were needed in Steps 1-6: +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/ +git commit -m "conductor(track): meta_tooling_workflow_review_20260620 Phase 9 self-review fixes" +git notes add -m "Self-review pass: placeholder scan + internal consistency + scope check + ambiguity check + chunking verification. fixes applied." $(git log -1 --format="%H") +``` + +If no fixes needed, no commit. Update `state.toml` `t9_1_self_review.status = "completed"` with note "no fixes needed". + +--- + +## Phase 10: User Review Gate (0 commits; user-driven) + +**Scope:** User reviews the final report + side artifacts + standalone inputs; approves or iterates. + +### Task 10.1: User review (0 commits) + +- [ ] **Step 1: Inform the user** + +Send a message to the user: +> "Track artifacts complete. Please review: +> - `conductor/tracks/meta_tooling_workflow_review_20260620/report.md` (≥4,000 LOC; 4 parts) +> - `conductor/tracks/meta_tooling_workflow_review_20260620/comparison_table.md` (~50 rows) +> - `conductor/tracks/meta_tooling_workflow_review_20260620/decisions.md` (15-25 entries) +> - `conductor/tracks/meta_tooling_workflow_review_20260620/shipped_work_index.md` +> - `conductor/tracks/meta_tooling_workflow_review_20260620/llm_behavior_catalog.md` +> - `conductor/tracks/meta_tooling_workflow_review_20260620/nagent_takeaways_meta_tooling_20260620.md` +> - `conductor/tracks/meta_tooling_workflow_review_20260620/workflow_improvements.md` (standalone for rebuild track) +> - `conductor/tracks/meta_tooling_workflow_review_20260620/implementation_sequencing.md` (standalone for rebuild track) +> +> Let me know if you want changes before I run Phase 11 (Finalize)." + +- [ ] **Step 2: Wait for user response** + +Do not proceed without explicit user approval. + +- [ ] **Step 3: Apply any user-requested changes** + +If the user requests changes, apply them as additional commits. Update `state.toml` per change. + +- [ ] **Step 4: Update `state.toml` to mark user review complete** + +After user approval: +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/state.toml +git commit -m "conductor(plan): mark Phase 10 (user review gate) complete (approved YYYY-MM-DD)" +``` + +--- + +## Phase 11: Finalize (1 commit) + +**Scope:** Update `state.toml`, `metadata.json`, and `conductor/tracks.md` to reflect completion. + +### Task 11.1: Finalize (1 commit) + +- [ ] **Step 1: Update `state.toml`** + +Update: +- `status = "active"` (per spec — track stays active until archived per the chronology archive convention) +- `current_phase = 11` +- All `phase_N.status = "completed"` +- All `phase_N.checkpointsha = ""` (use the final commit of each phase) +- All `tasks.status = "completed"` +- `last_updated = "2026-06-20"` (or current date if different) +- All `verification.* = true` + +- [ ] **Step 2: Update `metadata.json`** + +Update: +- `status: "active"` +- Add a new top-level field `"final_statistics"` with the actual counts: +```json +{ + "report_total_loc": , + "pattern_count": , + "improvement_count": , + "phase_count": , + "commit_count": , + "sweep_a_loc": , + "sweep_b_loc": , + "sweep_c_loc": , + "comparison_table_rows": , + "decisions_entries": , + "shipped_work_index_tracks": +} +``` + +- [ ] **Step 3: Update `conductor/tracks.md`** + +Move the track entry from "Active Tracks" to "Recently Completed" section (or whichever section the project's convention uses — check the existing format). Add a one-line summary: + +``` +- [x] **Track: Meta-Tooling Workflow Review — Past-Month LLM Behavior Analysis** `[checkpoint: ]` + *Link: [./tracks/meta_tooling_workflow_review_20260620/](./tracks/meta_tooling_workflow_review_20260620/), Spec + Plan: see track folder. + *Goal: Analyze all past-month reports + git log + state.toml + guide docs; identify 8-16 LLM behavior patterns; catalog workflow improvements by target doc × confidence tier; sequence for a conservative rebuild. 4-part report ≥4,000 LOC. Research-only.* +``` + +- [ ] **Step 4: Stage and commit** + +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/state.toml conductor/tracks/meta_tooling_workflow_review_20260620/metadata.json conductor/tracks.md +git commit -m "conductor(track): meta_tooling_workflow_review_20260620 Phase 11 finalize" +``` + +- [ ] **Step 5: Attach git note** + +```bash +git notes add -m "Track complete. state.toml advanced to current_phase=11. metadata.json updated with final statistics (report LOC: , patterns: , improvements: ). conductor/tracks.md updated. Ready for the near-future workflow-improvements rebuild track to consume workflow_improvements.md + implementation_sequencing.md as standalone inputs." $(git log -1 --format="%H") +``` + +- [ ] **Step 6: Mark Phase 11 task complete** + +Update `state.toml`: +- `t11_1_finalize.status = "completed"` +- `t11_1_finalize.commit_sha = ""` + +Commit: +```bash +git add conductor/tracks/meta_tooling_workflow_review_20260620/state.toml +git commit -m "conductor(plan): mark Phase 11 (Finalize) complete; meta_tooling_workflow_review_20260620 SHIPPED" +``` + +--- + +## Plan Self-Review + +**1. Spec coverage:** +- §3.1 Part 1 (5 sub-sections) → Phase 4 ✓ +- §3.1 Part 2 (12 patterns) → Phase 5 ✓ +- §3.1 Part 3 (15-25 improvements) → Phase 6 ✓ +- §3.1 Part 4 (5 phases) → Phase 7 ✓ +- §3.2 comparison_table.md → Task 8.1 ✓ +- §3.3 decisions.md → Task 8.2 ✓ +- §3.4 shipped_work_index.md → Task 2.1 ✓ +- §3.5 llm_behavior_catalog.md → Tasks 2.2 + 2.3 ✓ +- §3.6 nagent_takeaways_meta_tooling_20260620.md → Task 8.3 ✓ +- §3.7 workflow_improvements.md (standalone) → Task 8.4 ✓ +- §3.8 implementation_sequencing.md (standalone) → Task 8.5 ✓ +- §3.9 chunking strategy (≥4K LOC) → Phase 9 self-review verifies ✓ + +**2. Placeholder scan:** No "TBD" / "TODO" / "fill in details" in the plan. Each task has explicit file paths + content + commands. + +**3. Type consistency:** N/A (research-only track; no code changes; no type definitions). + +**4. Ambiguity check:** +- Each Tier 3 sub-agent prompt has explicit file paths + output format + LOC target ✓ +- Each Phase 4-7 task has explicit LOC range per sub-section ✓ +- The ≥4,000 LOC report floor is checked in Phase 9 ✓ + +**Minor adjustments applied during self-review:** +- Phase 6 (Part 3) added explicit "internal scratchpad" instruction in Step 1 (Tier 1 judgment call; not committed) +- Phase 7 Step 8 added explicit LOC check (the ≥4,000 floor verification) +- Phase 8 Tasks 8.4 + 8.5 added explicit "strip cross-references" / "add preamble" instructions for the standalone docs + +Plan ready for execution. + +--- + +## Execution Handoff + +**Plan complete and saved to `conductor/tracks/meta_tooling_workflow_review_20260620/plan.md`. Two execution options:** + +**1. Subagent-Driven (recommended)** — Tier 2 Tech Lead dispatches a fresh Tier 3 sub-agent per task (well-suited for the Tier 3 sweep dispatches in Phase 2; less ideal for the Tier 1 synthesis phases 3-7 which require sustained context). The Tier 2 review between tasks catches the anti-sliming risk in the synthesis phases. + +**2. Inline Execution** — Tier 1 Orchestrator executes tasks in this session using executing-plans. The Tier 3 sweeps (Phase 2) use `scripts/mma_exec.py --role tier3-worker`; the Tier 1 synthesis phases (3-7) run inline with full context. Total session token burn: substantial (the 4,000-LOC report is generated in-conversation). + +**My recommendation: Option 2 (inline Tier 1 execution).** The 4-part report synthesis requires sustained Tier 1 context; splitting it across sub-agents risks losing the cross-cutting synthesis voice. Phase 2 (Tier 3 sweeps) is the only sub-agent dispatch. Phases 8-11 are mechanical and can run inline. + +**Which approach do you want?** \ No newline at end of file From 92c83ee3428e3ac0d92abae14836449087664d67 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sun, 21 Jun 2026 08:41:38 -0400 Subject: [PATCH 3/3] conductor(tracks): register meta_tooling_workflow_review_20260620 in Active Tracks (parked 2026-06-20) --- conductor/tracks.md | 1 + 1 file changed, 1 insertion(+) diff --git a/conductor/tracks.md b/conductor/tracks.md index 544484f3..b6ba0022 100644 --- a/conductor/tracks.md +++ b/conductor/tracks.md @@ -61,6 +61,7 @@ Tracks that are unblocked and ready to start. Ordered by **dependency** (blocked | ~~22~~ | — | ~~[Test Batching Post-Refactor Polish](#track-test-batching-post-refactor-polish)~~ | ~~SUPERSEDED by track 1 (FR1 + FR2)~~ | — | | 20 | — | [Prior Session Test Harden (20260605)](#track-prior-session-test-harden-20260605-superseded) | superseded; no action needed | — | | 21 | A | [Conductor Chronology (chronology.md canonical index)](#track-conductor-chronology) | spec ✓, plan ✓, 10/10 phases implemented; Phase 10 (user sign-off) pending; end-of-track report at `docs/reports/TRACK_COMPLETION_chronology_20260619.md` | (none — independent; **NEW 2026-06-19**; canonical-track infrastructure; the `superpowers_review_20260619` track is `blocked_by` this one) | +| 22b | A (meta-tooling) | [Meta-Tooling Workflow Review — Past-Month LLM Behavior Analysis](#track-meta-tooling-workflow-review-past-month-llm-behavior-analysis) | spec ✓, plan ✓, metadata ✓, state ✓, **parked 2026-06-20** (current_phase=0); 11-phase plan; ≥4,000-LOC 4-part report; 13-15 atomic commits; Tier 1 anchor + 3 Tier 3 parallel sweeps | (none — independent; **NEW 2026-06-20**; sibling to nagent_review + fable_review + superpowers_review + intent_dsl_survey; produces workflow_improvements.md + implementation_sequencing.md as standalone inputs for a near-future "workflow improvements rebuild" track; research-only; no src/, tests/, AGENTS.md, conductor/*.md, .opencode/, or scripts/audit_*.py changes; **anti-sliming guard**: Phase 9 self-review + Phase 10 user review gate are literal hard gates per the chronology_20260619 handover) | **Note on numbering:** the legacy file used `0a`, `0b`, `0c`... and `0d`, `0e`, `0f`, `0g` for tracks created 2026-06-06+. This is the **git-blame sort order**, not a logical execution order. The new structure re-orders by dependency.