conductor(deob_warmup): Initialize warmup track (precursor)

Research-style track. Produces 2 deliverables from the user's past de-obfuscation samples: - report.md: design philosophy + curated lexicon + 3 noise-dedup maps + sample transformations - prompt_template.md: LLM-direct operational spec; can be invoked as-is with a new Pass 1 report Phase 0: USER action item (gather 3-10 samples into samples/, gitignored) Phase 1: Tier 3 worker surveys (term frequency, structural patterns, form projection heuristics) Phase 2: Write report.md Phase 3: Write prompt_template.md Phase 4: User review + approval blocked_by: user samples blocks: lexicon, pilot, apply (3 phase children)
2026-06-23 00:08:22 -04:00
parent 59ba8ff2ba
commit f830798822
4 changed files with 580 additions and 0 deletions
@@ -0,0 +1,130 @@
+{
+  "track_id": "video_analysis_deob_warmup_20260621",
+  "name": "Video Analysis De-obfuscation Warmup (Pass 2 precursor)",
+  "created": "2026-06-21",
+  "status": "spec_approved",
+  "blocked_by": ["user_samples_provided"],
+  "blocks": [
+    "video_analysis_deob_lexicon_20260621",
+    "video_analysis_deob_pilot_20260621",
+    "video_analysis_deob_apply_20260621"
+  ],
+  "priority": "A",
+  "rationale": "User-blocking precursor to Pass 2 of the 3-pass research campaign. Produces the initial de-obfuscation lexicon (evidence-based from the user's past samples) and the LLM prompt template. Research-only; no src/ changes. The 3 phase children (lexicon/pilot/apply) all depend on this warmup's output.",
+  "type": "research-only (warmup track; produces report.md + prompt_template.md)",
+  "domain": "meta-tooling (research deliverable + LLM operational spec; no manual_slop src/ changes)",
+  "scope": {
+    "new_folders": [
+      "conductor/tracks/video_analysis_deob_warmup_20260621/"
+    ],
+    "new_files": [
+      "spec.md",
+      "plan.md",
+      "metadata.json",
+      "state.toml",
+      "samples/<user-provided-files> (gitignored)",
+      "report.md (the design doc; ~1000-3000 LOC)",
+      "prompt_template.md (the LLM operational spec; ~200-500 LOC)"
+    ],
+    "modified_files": [],
+    "deleted_files": [],
+    "gitignored_patterns": [
+      "samples/** (user's past de-obfuscation notes are local-only)"
+    ]
+  },
+  "estimated_effort": {
+    "method": "scope (per conductor/workflow.md Tier 1 Track Initialization Rules). NO day estimates.",
+    "phase_0": "1 USER action item: gather 3-10 samples into samples/",
+    "phase_1": "1 task: Tier 3 worker surveys the samples (term frequency, structural patterns, form projection heuristics, noise-dedup maps, etymology style, example transformations)",
+    "phase_2": "2 tasks: write report.md + commit with git note",
+    "phase_3": "2 tasks: write prompt_template.md + commit with git note",
+    "phase_4": "2 tasks: user review + state.toml update",
+    "summary": "4 phases, 6 tasks, 2 deliverables (~1500-3500 LOC combined), 1 user action item. No day estimates per project convention."
+  },
+  "verification_criteria": [
+    "report.md exists, follows §FR4 structure, has provenance for every claim",
+    "prompt_template.md exists, follows §FR5 structure, can be invoked as-is with a new Pass 1 report",
+    "At least 1 sample has been processed (user has provided ≥1 sample)",
+    "The 4 verification criteria (lossless, bounded, constructively typed, etymology-cited) are documented in the template",
+    "The 3 noise-dedup maps (at minimum) are documented",
+    "Sample transformations (3-5 canonical before/after pairs) are included",
+    "User has reviewed and approved both deliverables",
+    "Both deliverables are committed atomically (one commit per deliverable)",
+    "Git notes attached to both commits",
+    "state.toml updated to status = 'completed'"
+  ],
+  "risk_register": [
+    {
+      "id": "R1",
+      "title": "User cannot provide samples in time",
+      "likelihood": "medium",
+      "scope_impact": "Warmup blocked",
+      "mitigation": "User can provide partial samples (1-2 examples); warmup can use them as a starter; Phase 1 (lexicon) extends with constructive type theory defaults"
+    },
+    {
+      "id": "R2",
+      "title": "User's samples don't have enough de-obfuscation patterns",
+      "likelihood": "medium",
+      "scope_impact": "Thin lexicon",
+      "mitigation": "Phase 1 (lexicon) extends with constructive type theory defaults"
+    },
+    {
+      "id": "R3",
+      "title": "Report grows too large (>3000 LOC)",
+      "likelihood": "low",
+      "scope_impact": "Hard to reference",
+      "mitigation": "Cap at 3000 LOC; defer deep examples to Appendix"
+    },
+    {
+      "id": "R4",
+      "title": "Template is too abstract for LLM to follow",
+      "likelihood": "medium",
+      "scope_impact": "Apply phase produces poor de-obfuscations",
+      "mitigation": "Pilot (Phase 2) catches this; pilot_report.md captures refinements"
+    },
+    {
+      "id": "R5",
+      "title": "User's style is too idiosyncratic to formalize",
+      "likelihood": "low",
+      "scope_impact": "Lexicon is 'in your head' only",
+      "mitigation": "Capture 1-2 example transformations explicitly; the rest can be implicit in the prompt template"
+    }
+  ],
+  "architecture_reference": {
+    "primary_documents": [
+      "conductor/workflow.md (track convention, per-task commits, git notes)",
+      "conductor/tracks/video_analysis_deob_20260621/spec.md (umbrella design)",
+      "conductor/tracks/intent_dsl_survey_20260612/report_v1.2.md (report structure precedent)"
+    ],
+    "related_tracks": [
+      "conductor/tracks/video_analysis_deob_lexicon_20260621/ (downstream consumer of this warmup)",
+      "conductor/tracks/video_analysis_deob_pilot_20260621/ (downstream consumer of this warmup)",
+      "conductor/tracks/video_analysis_deob_apply_20260621/ (downstream consumer of this warmup)"
+    ]
+  },
+  "deferred_to_followup_tracks": [
+    {
+      "title": "Lexicon refinement (Phase 1 child)",
+      "description": "Refines the warmup's draft into a codified operational spec.",
+      "track_status": "blocked_by this warmup"
+    },
+    {
+      "title": "Pilot application (Phase 2 child)",
+      "description": "Applies the refined lexicon to 2 Pass 1 reports (cs229_building_llms, entropy_epiplexity).",
+      "track_status": "blocked_by the lexicon child"
+    },
+    {
+      "title": "Apply phase (Phase 3 child)",
+      "description": "Applies the refined lexicon to 10 remaining Pass 1 reports + 1 cross-cutting synthesis.",
+      "track_status": "blocked_by the pilot child"
+    }
+  ],
+  "regressions_and_pre_existing_failures": [],
+  "pre_existing_failures_remaining": [],
+  "user_directives": [
+    "De-obfuscation philosophy anchors (2026-06-21) - see umbrella spec §10",
+    "Warmup is the precursor; lexicon is evidence-based from user's past samples (2026-06-21)",
+    "Report + prompt template as the warmup output (2026-06-21)",
+    "No day estimates per conductor/workflow.md Tier 1 Track Initialization Rules (added 2026-06-16). Scope measured in files/sites only."
+  ]
+}
@@ -0,0 +1,63 @@
+# Plan: Video Analysis De-obfuscation Warmup
+
+This is the warmup-level plan for the precursor to Pass 2 of the 3-pass research campaign. Per the Tier 1 Track Initialization Rules, scope is measured in files/sites — no day estimates.
+
+## Phase 0: User samples provided (USER action item)
+
+- [ ] **Task 0.1:** User gathers 3-10 samples of past de-obfuscation notes and places them in `conductor/tracks/video_analysis_deob_warmup_20260621/samples/`. Format: any text the user has (markdown, txt, mixed). Samples are gitignored.
+
+## Phase 1: Survey the samples
+
+- [ ] **Task 1.1:** Tier 2 worker delegates to a Tier 3 worker: survey each sample in `samples/`. Identify:
+  - Term frequency (which math/DSL terms appear most often)
+  - Structural patterns (how the user organizes de-obfuscations — section structure, table format, etc.)
+  - "Form projection" heuristics (how the user bounds the indefinite)
+  - "Noise dedup" maps (synonyms that collapse)
+  - Etymology-citation style (how the user documents word origins)
+  - Example transformations (1-2 canonical "before/after" pairs from the samples)
+
+## Phase 2: Write `report.md` (the design doc)
+
+- [ ] **Task 2.1:** Write `report.md` (~1000-3000 LOC). Follow the structure from spec §FR4:
+  - §1 De-obfuscation philosophy (anchored to user directives)
+  - §2 Prior art (the user's influences)
+  - §3 Lexicon (terms + re-encodings, ~4 tiers)
+  - §4 3 noise-dedup maps (or however many emerge)
+  - §5 Form-anchor rule
+  - §6 Etymology rule
+  - §7 Sample transformations (3-5 canonical before/after pairs)
+  - §8 Connection to the 3 phase children
+  - Appendix A. Provenance
+- [ ] **Task 2.2:** Commit `report.md` with git note summarizing the lexicon + dedup maps discovered.
+
+## Phase 3: Write `prompt_template.md` (the LLM operational spec)
+
+- [ ] **Task 3.1:** Write `prompt_template.md` (~200-500 LOC). Follow the structure from spec §FR5:
+  - Role, input, output (3-layer format)
+  - Lexicon (concise version; reference report.md for full)
+  - The 4 rules (boundedness, form anchor, etymology, lossless)
+  - The 3 noise-dedup maps (apply automatically)
+  - The 3-layer format spec
+  - Verification checklist
+  - Example transformations (drawn from samples)
+- [ ] **Task 3.2:** Commit `prompt_template.md` with git note summarizing the template's operational scope.
+
+## Phase 4: User review + approval
+
+- [ ] **Task 4.1:** User reviews both deliverables. Approves the lexicon + template, or iterates (loop back to Phase 2 or 3).
+- [ ] **Task 4.2:** Update `state.toml` to `status = "completed"`.
+
+## Verification (gate per workflow.md)
+
+- [ ] `report.md` follows the §FR4 structure with provenance per claim
+- [ ] `prompt_template.md` follows the §FR5 structure, can be invoked as-is with a new report
+- [ ] At least 1 sample has been processed
+- [ ] The 4 verification criteria are documented in the template
+- [ ] The 3 noise-dedup maps are documented
+- [ ] Sample transformations (3-5 canonical before/after pairs) are included
+- [ ] User has reviewed and approved
+- [ ] Both deliverables are committed atomically
+- [ ] Git notes attached to both commits
+- [ ] `state.toml` updated to `status = "completed"`
+
+The warmup is "Pass 2 warmup complete" when both deliverables are committed + user-approved. The 3 phase children can then start.
@@ -0,0 +1,328 @@
+# Track Specification: Video Analysis De-obfuscation Warmup (2026-06-21)
+
+**Status:** Active (spec approved 2026-06-21)
+**Initialized:** 2026-06-21
+**Owner:** Tier 1 Orchestrator (spec); Tier 2 Tech Lead (execution)
+**Priority:** A (user-blocking; Pass 2 of the 3-pass research campaign)
+**Type:** Research-only track (precursor to Pass 2; produces 2 deliverables: `report.md` + `prompt_template.md`)
+**Domain:** Meta-tooling (research deliverable + LLM operational spec; no `src/` changes)
+
+> **Parent:** This is a precursor to the [video_analysis_deob_20260621](../video_analysis_deob_20260621/) umbrella (Pass 2 of the 3-pass research campaign). See [umbrella spec.md](../video_analysis_deob_20260621/spec.md) for the full design and the user's de-obfuscation philosophy.
+
+> **Purpose.** This track is the warmup that produces the initial de-obfuscation lexicon + LLM prompt template from the user's own past de-obfuscation notes. The lexicon is **evidence-based**, not invented. The user provides samples; Tier 2/3 surveys them; the deliverables are the design doc + the LLM operational spec that the 3 phase children consume.
+
+> **Multi-pass context.** The de-obfuscation is the second of 3 passes in the research campaign (Pass 1 = information extraction; Pass 2 = de-obfuscation; Pass 3 = projection to applied domain). Pass 2 takes Pass 1's 12 deep-dive reports + 1 cross-cutting synthesis and produces a 3-layer de-obfuscated deliverable per video.
+
+---
+
+## 1. Overview
+
+This track is a research-style track that produces 2 deliverables:
+
+1. **`report.md`** (the design doc) — the curated de-obfuscation lexicon + the 3 noise-dedup maps + sample transformations drawn from the user's past notes. Follows the `intent_dsl_survey_20260612/report_v1.2.md` precedent (research-style, multi-section, with provenance for every claim).
+
+2. **`prompt_template.md`** (the LLM operational spec) — a self-contained prompt template that an LLM can be invoked with to perform the de-obfuscation on a new Pass 1 report. The template references the report (for grounding) and includes example transformations drawn from the user's samples.
+
+The two deliverables together form the **lexicon contract** that the 3 phase children consume.
+
+### 1.1 The user's de-obfuscation philosophy (anchors for the report)
+
+The report documents the philosophy the user described in 2026-06-21 brainstorming:
+
+| Anchor | Take |
+|---|---|
+| **Form requires bounds** | "To be known is to project a form." Boundedness is required for direct knowledge. |
+| **Indefinite is not directly knowable** | What is unbounded is indefinite; what is indefinite is indiscernible, unobserved, unsubject, unknowable. |
+| **Cycles/iteration/repetition are allowed** | Indefinite *operations* on bounded *forms* are expressible. `Stream A = nat -> A` is fine; `∞_val` is not. |
+| **Constructive type theory** | Proofs = programs (Curry-Howard); every value is a bounded form; operations are transformations. |
+| **Lexicon is etymology-aware** | Each term's word origin + definitional history is documented. |
+| **PL inspiration** | Concatenative (Forth/KYRA/CoSy), data-oriented imperative (Lottes), immediate-mode DAG-building DSLs (O'Donnell's IMGUI). |
+
+### 1.2 The 3 noise-dedup maps (to be discovered from samples)
+
+The report includes 3 noise-dedup maps (or however many emerge from the samples). The expected categories, per the user's "Curry-Howard collapse" framing:
+
+1. **Proofs = Programs = Computations** (Curry-Howard: types=propositions, programs=proofs)
+2. **Sets = Kinds = Types** (constructive type theory: no unbounded "set of all sets")
+3. **Functions = Procedures = Words** (concatenative: each function is a "word" with implicit args)
+
+The report may discover additional collapse maps from the samples.
+
+---
+
+## 2. Current State Audit (as of 2026-06-21)
+
+### 2.1 Already Available (DO NOT re-derive)
+
+| Asset | Location | Use in this track |
+|---|---|---|
+| Pass 1 reports | `conductor/tracks/video_analysis_<slug>_20260621/report.md` | The input to be de-obfuscated; the report should anticipate this input's patterns |
+| The user's directives | (this brainstorming session, 2026-06-21) | The anchors for the philosophy sections |
+| `intent_dsl_survey_20260612/report_v1.2.md` | `conductor/tracks/intent_dsl_survey_20260612/` | Sibling DSL (tool-verb DSL for AI agents); shares philosophy but is for tool verbs, not math re-encoding |
+| Research-track precedent | `conductor/tracks/intent_dsl_survey_20260612/` | Same structure: multi-section report with provenance per claim |
+
+### 2.2 Gaps to Fill (this track's scope)
+
+| # | Gap | Resolution |
+|---|---|---|
+| G1 | No codified de-obfuscation DSL | Survey the user's samples; produce `report.md` |
+| G2 | No LLM-direct operational spec | Produce `prompt_template.md` based on the survey |
+| G3 | No "form anchor" pattern documented | Document it in the report; require it in the prompt template |
+| G4 | No "etymology rule" applied to math terms | Discover from samples; document in the report |
+| G5 | No 3 noise-dedup maps | Discover from samples; document in the report |
+
+---
+
+## 3. Goals
+
+1. **Evidence-based lexicon.** Every term in the lexicon is grounded in a pattern from the user's past samples (with a citation).
+2. **LLM-direct prompt template.** The template can be invoked as-is with a new report to produce a de-obfuscation. The template + a Pass 1 report → a 3-layer deliverable.
+3. **Self-referential.** The template references the report (for grounding) and the samples (for example transformations).
+4. **Philosopher-of-record.** The report captures the user's de-obfuscation philosophy (per §1.1 anchors) so future agents can pick it up.
+5. **Pass 3 readiness.** The lexicon + template is designed so Pass 3 (projection to applied domain) can consume the de-obfuscated outputs as input.
+
+---
+
+## 4. Functional Requirements
+
+### FR1. Warmup folder structure
+
+**WHERE:** `conductor/tracks/video_analysis_deob_warmup_20260621/`
+
+**WHAT:** This folder contains the warmup design (this spec) + 3 sibling files (`plan.md`, `metadata.json`, `state.toml`) + the user's samples in `samples/` (gitignored) + the 2 deliverables (`report.md` + `prompt_template.md`).
+
+### FR2. User samples (Phase 0)
+
+**WHERE:** `conductor/tracks/video_analysis_deob_warmup_20260621/samples/`
+
+**WHAT:** The user gathers 3-10 samples of their past de-obfuscation notes and places them here. Format: any text the user has (markdown, txt, mixed). The samples are **gitignored** (per AGENTS.md — local-only artifacts).
+
+If the user has <3 samples, the warmup can proceed with 1-2 as a starter; the lexicon will be thin and Phase 1 (lexicon) will extend it with constructive type theory defaults.
+
+### FR3. Survey process (Tier 2 + Tier 3)
+
+**Process:** Tier 2 delegates the survey to a Tier 3 worker. The worker:
+- Reads each sample in `samples/`
+- Identifies recurring terms (with frequency counts)
+- Identifies structural patterns (how the user organizes de-obfuscations)
+- Identifies "form projection" heuristics (how the user bounds the indefinite)
+- Identifies "noise dedup" maps (synonyms that collapse)
+- Identifies the user's typical etymology-citation style
+- Identifies example transformations (1-2 canonical "before/after" pairs from the samples)
+
+**Output:** A survey report (in `state.toml`'s working memory or a draft `report.md`).
+
+### FR4. Deliverable 1: `report.md`
+
+**Structure (per `intent_dsl_survey_20260612/report_v1.2.md` precedent):**
+
+```markdown
+# De-obfuscation Lexicon (2026-06-21)
+
+**Track:** video_analysis_deob_warmup_20260621
+**Date:** 2026-06-21
+**Status:** v1 (warmup deliverable)
+**User directives:** See §1 (anchors from 2026-06-21 brainstorming)
+
+## 1. The De-obfuscation Philosophy
+- 1.1 Form requires bounds
+- 1.2 Indefinite is not directly knowable
+- 1.3 Cycles/iteration are allowed
+- 1.4 Constructive type theory as foundation
+- 1.5 Etymology-aware lexicon
+- 1.6 PL inspiration: concatenative + data-oriented + immediate-mode
+
+## 2. Prior Art (the user's influences)
+- 2.1 Norman Wildberger (rational trigonometry, algebraic finitism)
+- 2.2 Constructive type theory (Curry-Howard)
+- 2.3 Concatenative PLs (Forth, ColorForth, KYRA/VAMP, CoSy)
+- 2.4 Data-oriented imperative (Lottes)
+- 2.5 Immediate-mode DSL DAGs (O'Donnell)
+
+## 3. The Lexicon (terms + re-encodings)
+- 3.1 Tier 1: Core concepts (~10 terms, e.g., Set→Kind, ∀→forall, ∃→exists)
+- 3.2 Tier 2: Data-oriented pipeline terms (~12 terms)
+- 3.3 Tier 3: Type-theoretic primitives (~10 terms)
+- 3.4 Tier 4: AI-fuzzing tolerance terms (~8 terms)
+- 3.5 Boundedness rules (∞_val banned, ∞_proc allowed, ∞_card banned)
+
+## 4. The 3 Noise-Dedup Maps
+- 4.1 Proofs = Programs = Computations
+- 4.2 Sets = Kinds = Types
+- 4.3 Functions = Procedures = Words
+- 4.4 (additional maps discovered from samples)
+
+## 5. The Form-Anchor Rule
+- Every re-encoding must have a form anchor: "what bounded form does this project from the indefinite?"
+- Examples from the user's samples
+
+## 6. The Etymology Rule
+- Every new term has a 1-line origin + 1-line definition history
+- Examples from the user's samples
+
+## 7. Sample Transformations
+- 3-5 canonical "before/after" pairs drawn from the user's samples
+- Each pair: original notation → re-encoded form, with the lexicon terms used
+
+## 8. Connection to the 3 Phase Children
+- 8.1 Phase 1 (lexicon): consumes this report, refines into a codified spec
+- 8.2 Phase 2 (pilot): consumes the refined lexicon, applies to 2 videos
+- 8.3 Phase 3 (apply): consumes the pilot's refined lexicon, applies to 10 + synthesis
+
+## Appendix A. Provenance
+- Every claim cites: the user's directive, the sample file, or the prior-art reference
+```
+
+**Target:** ~1000-3000 LOC. Smaller than `intent_dsl_survey_20260612/report_v1.2.md` (1343 lines) because the prior-art survey is shorter here (the philosophy is the user's, not derived from external sources).
+
+### FR5. Deliverable 2: `prompt_template.md`
+
+**Structure (LLM-direct, self-contained):**
+
+```markdown
+# De-obfuscation Prompt Template (v1, 2026-06-21)
+
+> Use this template to de-obfuscate a Pass 1 video report.
+> Reference: report.md (the design doc) for the full lexicon.
+> Reference: samples/ (gitignored, the user's past de-obfuscations) for examples.
+
+## Your role
+You are a de-obfuscator. Your task: take a Pass 1 report (full of standard math notation + verbose verbiage) and produce a 3-layer de-obfuscated deliverable.
+
+## Input
+- <path/to/pass1-report.md> (e.g., conductor/tracks/video_analysis_<slug>_20260621/report.md)
+- <path/to/summary.md> (optional, for cross-referencing)
+
+## Output (3 files in <output-dir>/<slug>/)
+- <slug>_translation.md (side-by-side table)
+- <slug>_deobfuscated.md (replacement; re-encoded form replaces the original)
+- <slug>_decoder.md (per-term decoder with form anchor + etymology + definition history)
+
+## Lexicon (reference report.md §3 for full)
+[concise version: Tier 1-4 term tables]
+
+## The 4 Rules
+1. Boundedness: every value is a finite form. `∞_val` is banned; `∞_proc` is allowed (as `Stream A = nat -> A`).
+2. Form anchor: every re-encoding must have a "what bounded form does this project from the indefinite?"
+3. Etymology: every new term gets a 1-line origin + 1-line definition history in the decoder.
+4. Lossless: every Pass 1 concept is represented. If a concept can't be bounded, mark it "indefinite — see original" rather than forcing a translation.
+
+## The 3 Noise-Dedup Maps (apply automatically)
+- Proofs = Programs = Computations (use the chosen term per the sample's style)
+- Sets = Kinds = Types
+- Functions = Procedures = Words
+
+## The 3-Layer Format
+### Layer 1: Translation (side-by-side table)
+| # | Original Section | Original Expression | Re-encoded Form | Form Anchor |
+| ... |
+
+### Layer 2: De-obfuscated (replacement)
+[Same 8-section structure as Pass 1, but with re-encoded math]
+
+### Layer 3: Decoder (per-term)
+## Term: <name>
+- Original notation: ...
+- Re-encoded: ...
+- Form anchor: ...
+- Etymology: ...
+- Definition history: ...
+- Source sections in original: ...
+
+## Verification
+After producing the 3 files, verify:
+- [ ] Lossless (no Pass 1 concept dropped)
+- [ ] Bounded (no ∞_val or ∞_card)
+- [ ] Constructively typed (every expression has a type)
+- [ ] Etymology-cited (every new term has the 1-line origin + 1-line definition history)
+
+## Example transformations (drawn from samples)
+[3-5 canonical before/after pairs]
+```
+
+**Target:** ~200-500 LOC. The template is dense (every word is operational).
+
+### FR6. Dependency
+
+- **`blocked_by`:** the user providing samples (USER action item, Phase 0).
+- **`blocks`:** the 3 phase children (lexicon, pilot, apply) all depend on this warmup's `report.md` + `prompt_template.md`.
+
+---
+
+## 5. Non-Functional Requirements
+
+- **TDD not applicable** — research-only track; no automated tests.
+- **Per-task atomic commits** — each deliverable (`report.md` + `prompt_template.md`) is committed in its own commit.
+- **Git notes** — each commit gets a note summarizing what was produced.
+- **No day estimates** — scope measured in files/sites per `conductor/workflow.md`.
+- **No `src/` changes** — research-only.
+
+---
+
+## 6. Out of Scope (Explicit)
+
+- **Lexicon refinement** — handled by the lexicon child track (`video_analysis_deob_lexicon_20260621/`).
+- **Application to Pass 1 reports** — handled by the pilot + apply child tracks.
+- **Pass 3 (projection to applied domain)** — future, user-led.
+- **Interpreter for the de-obfuscation DSL** — out of scope. The LLM is the executor.
+- **Modifying `src/*.py` files in manual_slop** — research-only.
+- **Adding `pyproject.toml` dependencies** — all work is research (markdown files).
+
+---
+
+## 7. Architecture Reference
+
+This track does not modify the manual_slop application architecture. The architecture refs that DO apply:
+
+- **Research-track precedent:** `conductor/tracks/intent_dsl_survey_20260612/` (multi-section report + provenance per claim)
+- **Track convention:** `conductor/workflow.md` "Standard Task Workflow" + per-task commit discipline
+- **Multi-pass framing:** `conductor/tracks/video_analysis_campaign_20260621/spec.md` §0, §11
+- **De-obfuscation design:** `conductor/tracks/video_analysis_deob_20260621/spec.md`
+
+---
+
+## 8. Verification Criteria
+
+The warmup is "done" when all of the following are true:
+
+- [ ] `report.md` exists, is non-empty, follows the §FR4 structure, has provenance for every claim
+- [ ] `prompt_template.md` exists, is non-empty, follows the §FR5 structure, can be invoked as-is with a new report
+- [ ] At least 1 sample has been processed (the user has provided ≥1 sample)
+- [ ] The 4 verification criteria (lossless, bounded, constructively typed, etymology-cited) are documented in the template
+- [ ] The 3 noise-dedup maps (at minimum) are documented
+- [ ] Sample transformations (3-5 canonical before/after pairs) are included
+- [ ] User has reviewed and approved the deliverables
+- [ ] Both deliverables are committed (one commit per deliverable, per the per-task commit discipline)
+- [ ] Git notes attached to both commits
+- [ ] `state.toml` updated to `status = "completed"`
+
+---
+
+## 9. Risk Register
+
+| ID | Title | Likelihood | Scope impact | Mitigation |
+|---|---|---|---|---|
+| R1 | User cannot provide samples in time | Medium | Warmup blocked | User can provide partial samples (1-2 examples); warmup can use them as a starter |
+| R2 | User's samples don't have enough de-obfuscation patterns | Medium | Thin lexicon | Phase 1 (lexicon) extends with constructive type theory defaults |
+| R3 | Report grows too large | Low | Hard to reference | Cap at 3000 LOC; defer deep examples to Appendix |
+| R4 | Template is too abstract (LLM doesn't follow it) | Medium | Apply phase produces poor de-obfuscations | Pilot (Phase 2) catches this; pilot_report.md captures refinements |
+| R5 | User's style is too idiosyncratic to formalize | Low | Lexicon is "in your head" only | Capture 1-2 example transformations explicitly; the rest can be implicit in the prompt template |
+
+---
+
+## 10. User Directives (recorded for next agent / future-self)
+
+- **2026-06-21:** The user's de-obfuscation philosophy anchors (see umbrella spec §10 / §1.1 here).
+- **2026-06-21:** "I can provide samples of notes I've done but it will take time and might be best to leave to a 'warmup' track to gather and survey those, to then codify how this de-obfuscation via an llm following that within a track's plan would do."
+- **2026-06-21:** "Report + prompt template" (the warmup's output shape).
+
+---
+
+## 11. See Also
+
+- [Umbrella spec.md](../video_analysis_deob_20260621/spec.md) — full design
+- [Umbrella README.md](../video_analysis_deob_20260621/README.md) — child index
+- [Pass 1 campaign spec §0, §11](../video_analysis_campaign_20260621/spec.md) — multi-pass framing + Pass 2 handoff (now superseded by the umbrella above)
+- [intent_dsl_survey_20260612/report_v1.2.md](../intent_dsl_survey_20260612/report_v1.2.md) — sibling DSL; same report structure
+- `conductor/workflow.md` "Standard Task Workflow" + "Tier 1 Track Initialization Rules"
+- `conductor/code_styleguides/agent_memory_dimensions.md` — Pass 2 produces a "knowledge" memory artifact
@@ -0,0 +1,59 @@
+# Track state for video_analysis_deob_warmup_20260621
+# Updated by Tier 2 Tech Lead during execution
+
+[meta]
+track_id = "video_analysis_deob_warmup_20260621"
+name = "Video Analysis De-obfuscation Warmup (Pass 2 precursor)"
+status = "active"
+current_phase = 0  # Phase 0 = waiting for user samples
+last_updated = "2026-06-21"
+
+[blocked_by]
+# User action item: user must provide 3-10 samples of past de-obfuscation notes in samples/
+
+[blocks]
+video_analysis_deob_lexicon_20260621 = "blocked (consumes report.md + prompt_template.md)"
+video_analysis_deob_pilot_20260621 = "blocked (consumes report.md + prompt_template.md)"
+video_analysis_deob_apply_20260621 = "blocked (consumes report.md + prompt_template.md)"
+
+[phases]
+phase_0 = { status = "in_progress", checkpointsha = "", name = "User samples provided (USER action item)" }
+phase_1 = { status = "pending", checkpointsha = "", name = "Survey the samples (Tier 3 worker)" }
+phase_2 = { status = "pending", checkpointsha = "", name = "Write report.md (the design doc)" }
+phase_3 = { status = "pending", checkpointsha = "", name = "Write prompt_template.md (the LLM operational spec)" }
+phase_4 = { status = "pending", checkpointsha = "", name = "User review + approval" }
+
+[tasks]
+# Phase 0 (USER action)
+t0_1 = { status = "pending", commit_sha = "", description = "User gathers 3-10 samples of past de-obfuscation notes and places them in samples/. Format: any text (markdown, txt, mixed). Gitignored." }
+
+# Phase 1 (survey)
+t1_1 = { status = "pending", commit_sha = "", description = "Tier 3 worker surveys the samples: term frequency, structural patterns, form projection heuristics, noise-dedup maps, etymology style, example transformations" }
+
+# Phase 2 (report.md)
+t2_1 = { status = "pending", commit_sha = "", description = "Write report.md (~1000-3000 LOC) following §FR4 structure: philosophy + prior art + lexicon (4 tiers) + 3 dedup maps + form-anchor rule + etymology rule + sample transformations + connection to phase children + provenance appendix" }
+t2_2 = { status = "pending", commit_sha = "", description = "Commit report.md with git note summarizing the lexicon + dedup maps discovered" }
+
+# Phase 3 (prompt_template.md)
+t3_1 = { status = "pending", commit_sha = "", description = "Write prompt_template.md (~200-500 LOC) following §FR5 structure: role + input + output (3-layer) + lexicon + 4 rules + 3 dedup maps + 3-layer format + verification + example transformations" }
+t3_2 = { status = "pending", commit_sha = "", description = "Commit prompt_template.md with git note summarizing the template's operational scope" }
+
+# Phase 4 (user review)
+t4_1 = { status = "pending", commit_sha = "", description = "User reviews both deliverables. Approves or iterates (loop back to Phase 2 or 3)" }
+t4_2 = { status = "pending", commit_sha = "", description = "Update state.toml to status = 'completed'" }
+
+[verification]
+samples_provided = false
+report_md_committed = false
+prompt_template_md_committed = false
+user_approved = false
+state_toml_completed = false
+
+[user_directives_logged]
+unorthodox_curation = "Per user 2026-06-21: 'I have a very unorthodox take for how I curate knowledge, especially formal knowledge in the math and sciences.'"
+constructive_type_theory = "Per user 2026-06-21: 'I like Norman Wildberger's work. And I like the constructivist current progress on type theories as a foundational system.'"
+bounded_for_knowledge = "Per user 2026-06-21: 'No observer or mechanism or construct can be infinite in resolution or quantification. To have distinction must have a bounds.'"
+cycles_iteration_allowed = "Per user 2026-06-21: 'Infinite is okay well handled CORRECTLY... What can be indefinite is that can be subjected upon is that of cycles, that of iteration, that of repetition.'"
+warmup_evidence_based = "Per user 2026-06-21: 'I can provide samples of notes I've done but it will take time and might be best to leave to a warmup track to gather and survey those, to then codify how this de-obfuscation via an llm following that within a track's plan would do.'"
+report_plus_template = "Per user 2026-06-21: warmup output is report.md + prompt_template.md"
+no_day_estimates = "Per conductor/workflow.md Tier 1 Track Initialization Rules (added 2026-06-16). Scope measured in files/sites only."