diff --git a/conductor/tracks/video_analysis_deob_warmup_20260621/metadata.json b/conductor/tracks/video_analysis_deob_warmup_20260621/metadata.json new file mode 100644 index 00000000..da7f8943 --- /dev/null +++ b/conductor/tracks/video_analysis_deob_warmup_20260621/metadata.json @@ -0,0 +1,130 @@ +{ + "track_id": "video_analysis_deob_warmup_20260621", + "name": "Video Analysis De-obfuscation Warmup (Pass 2 precursor)", + "created": "2026-06-21", + "status": "spec_approved", + "blocked_by": ["user_samples_provided"], + "blocks": [ + "video_analysis_deob_lexicon_20260621", + "video_analysis_deob_pilot_20260621", + "video_analysis_deob_apply_20260621" + ], + "priority": "A", + "rationale": "User-blocking precursor to Pass 2 of the 3-pass research campaign. Produces the initial de-obfuscation lexicon (evidence-based from the user's past samples) and the LLM prompt template. Research-only; no src/ changes. The 3 phase children (lexicon/pilot/apply) all depend on this warmup's output.", + "type": "research-only (warmup track; produces report.md + prompt_template.md)", + "domain": "meta-tooling (research deliverable + LLM operational spec; no manual_slop src/ changes)", + "scope": { + "new_folders": [ + "conductor/tracks/video_analysis_deob_warmup_20260621/" + ], + "new_files": [ + "spec.md", + "plan.md", + "metadata.json", + "state.toml", + "samples/ (gitignored)", + "report.md (the design doc; ~1000-3000 LOC)", + "prompt_template.md (the LLM operational spec; ~200-500 LOC)" + ], + "modified_files": [], + "deleted_files": [], + "gitignored_patterns": [ + "samples/** (user's past de-obfuscation notes are local-only)" + ] + }, + "estimated_effort": { + "method": "scope (per conductor/workflow.md Tier 1 Track Initialization Rules). NO day estimates.", + "phase_0": "1 USER action item: gather 3-10 samples into samples/", + "phase_1": "1 task: Tier 3 worker surveys the samples (term frequency, structural patterns, form projection heuristics, noise-dedup maps, etymology style, example transformations)", + "phase_2": "2 tasks: write report.md + commit with git note", + "phase_3": "2 tasks: write prompt_template.md + commit with git note", + "phase_4": "2 tasks: user review + state.toml update", + "summary": "4 phases, 6 tasks, 2 deliverables (~1500-3500 LOC combined), 1 user action item. No day estimates per project convention." + }, + "verification_criteria": [ + "report.md exists, follows §FR4 structure, has provenance for every claim", + "prompt_template.md exists, follows §FR5 structure, can be invoked as-is with a new Pass 1 report", + "At least 1 sample has been processed (user has provided ≥1 sample)", + "The 4 verification criteria (lossless, bounded, constructively typed, etymology-cited) are documented in the template", + "The 3 noise-dedup maps (at minimum) are documented", + "Sample transformations (3-5 canonical before/after pairs) are included", + "User has reviewed and approved both deliverables", + "Both deliverables are committed atomically (one commit per deliverable)", + "Git notes attached to both commits", + "state.toml updated to status = 'completed'" + ], + "risk_register": [ + { + "id": "R1", + "title": "User cannot provide samples in time", + "likelihood": "medium", + "scope_impact": "Warmup blocked", + "mitigation": "User can provide partial samples (1-2 examples); warmup can use them as a starter; Phase 1 (lexicon) extends with constructive type theory defaults" + }, + { + "id": "R2", + "title": "User's samples don't have enough de-obfuscation patterns", + "likelihood": "medium", + "scope_impact": "Thin lexicon", + "mitigation": "Phase 1 (lexicon) extends with constructive type theory defaults" + }, + { + "id": "R3", + "title": "Report grows too large (>3000 LOC)", + "likelihood": "low", + "scope_impact": "Hard to reference", + "mitigation": "Cap at 3000 LOC; defer deep examples to Appendix" + }, + { + "id": "R4", + "title": "Template is too abstract for LLM to follow", + "likelihood": "medium", + "scope_impact": "Apply phase produces poor de-obfuscations", + "mitigation": "Pilot (Phase 2) catches this; pilot_report.md captures refinements" + }, + { + "id": "R5", + "title": "User's style is too idiosyncratic to formalize", + "likelihood": "low", + "scope_impact": "Lexicon is 'in your head' only", + "mitigation": "Capture 1-2 example transformations explicitly; the rest can be implicit in the prompt template" + } + ], + "architecture_reference": { + "primary_documents": [ + "conductor/workflow.md (track convention, per-task commits, git notes)", + "conductor/tracks/video_analysis_deob_20260621/spec.md (umbrella design)", + "conductor/tracks/intent_dsl_survey_20260612/report_v1.2.md (report structure precedent)" + ], + "related_tracks": [ + "conductor/tracks/video_analysis_deob_lexicon_20260621/ (downstream consumer of this warmup)", + "conductor/tracks/video_analysis_deob_pilot_20260621/ (downstream consumer of this warmup)", + "conductor/tracks/video_analysis_deob_apply_20260621/ (downstream consumer of this warmup)" + ] + }, + "deferred_to_followup_tracks": [ + { + "title": "Lexicon refinement (Phase 1 child)", + "description": "Refines the warmup's draft into a codified operational spec.", + "track_status": "blocked_by this warmup" + }, + { + "title": "Pilot application (Phase 2 child)", + "description": "Applies the refined lexicon to 2 Pass 1 reports (cs229_building_llms, entropy_epiplexity).", + "track_status": "blocked_by the lexicon child" + }, + { + "title": "Apply phase (Phase 3 child)", + "description": "Applies the refined lexicon to 10 remaining Pass 1 reports + 1 cross-cutting synthesis.", + "track_status": "blocked_by the pilot child" + } + ], + "regressions_and_pre_existing_failures": [], + "pre_existing_failures_remaining": [], + "user_directives": [ + "De-obfuscation philosophy anchors (2026-06-21) - see umbrella spec §10", + "Warmup is the precursor; lexicon is evidence-based from user's past samples (2026-06-21)", + "Report + prompt template as the warmup output (2026-06-21)", + "No day estimates per conductor/workflow.md Tier 1 Track Initialization Rules (added 2026-06-16). Scope measured in files/sites only." + ] +} diff --git a/conductor/tracks/video_analysis_deob_warmup_20260621/plan.md b/conductor/tracks/video_analysis_deob_warmup_20260621/plan.md new file mode 100644 index 00000000..9bf3006b --- /dev/null +++ b/conductor/tracks/video_analysis_deob_warmup_20260621/plan.md @@ -0,0 +1,63 @@ +# Plan: Video Analysis De-obfuscation Warmup + +This is the warmup-level plan for the precursor to Pass 2 of the 3-pass research campaign. Per the Tier 1 Track Initialization Rules, scope is measured in files/sites — no day estimates. + +## Phase 0: User samples provided (USER action item) + +- [ ] **Task 0.1:** User gathers 3-10 samples of past de-obfuscation notes and places them in `conductor/tracks/video_analysis_deob_warmup_20260621/samples/`. Format: any text the user has (markdown, txt, mixed). Samples are gitignored. + +## Phase 1: Survey the samples + +- [ ] **Task 1.1:** Tier 2 worker delegates to a Tier 3 worker: survey each sample in `samples/`. Identify: + - Term frequency (which math/DSL terms appear most often) + - Structural patterns (how the user organizes de-obfuscations — section structure, table format, etc.) + - "Form projection" heuristics (how the user bounds the indefinite) + - "Noise dedup" maps (synonyms that collapse) + - Etymology-citation style (how the user documents word origins) + - Example transformations (1-2 canonical "before/after" pairs from the samples) + +## Phase 2: Write `report.md` (the design doc) + +- [ ] **Task 2.1:** Write `report.md` (~1000-3000 LOC). Follow the structure from spec §FR4: + - §1 De-obfuscation philosophy (anchored to user directives) + - §2 Prior art (the user's influences) + - §3 Lexicon (terms + re-encodings, ~4 tiers) + - §4 3 noise-dedup maps (or however many emerge) + - §5 Form-anchor rule + - §6 Etymology rule + - §7 Sample transformations (3-5 canonical before/after pairs) + - §8 Connection to the 3 phase children + - Appendix A. Provenance +- [ ] **Task 2.2:** Commit `report.md` with git note summarizing the lexicon + dedup maps discovered. + +## Phase 3: Write `prompt_template.md` (the LLM operational spec) + +- [ ] **Task 3.1:** Write `prompt_template.md` (~200-500 LOC). Follow the structure from spec §FR5: + - Role, input, output (3-layer format) + - Lexicon (concise version; reference report.md for full) + - The 4 rules (boundedness, form anchor, etymology, lossless) + - The 3 noise-dedup maps (apply automatically) + - The 3-layer format spec + - Verification checklist + - Example transformations (drawn from samples) +- [ ] **Task 3.2:** Commit `prompt_template.md` with git note summarizing the template's operational scope. + +## Phase 4: User review + approval + +- [ ] **Task 4.1:** User reviews both deliverables. Approves the lexicon + template, or iterates (loop back to Phase 2 or 3). +- [ ] **Task 4.2:** Update `state.toml` to `status = "completed"`. + +## Verification (gate per workflow.md) + +- [ ] `report.md` follows the §FR4 structure with provenance per claim +- [ ] `prompt_template.md` follows the §FR5 structure, can be invoked as-is with a new report +- [ ] At least 1 sample has been processed +- [ ] The 4 verification criteria are documented in the template +- [ ] The 3 noise-dedup maps are documented +- [ ] Sample transformations (3-5 canonical before/after pairs) are included +- [ ] User has reviewed and approved +- [ ] Both deliverables are committed atomically +- [ ] Git notes attached to both commits +- [ ] `state.toml` updated to `status = "completed"` + +The warmup is "Pass 2 warmup complete" when both deliverables are committed + user-approved. The 3 phase children can then start. diff --git a/conductor/tracks/video_analysis_deob_warmup_20260621/spec.md b/conductor/tracks/video_analysis_deob_warmup_20260621/spec.md new file mode 100644 index 00000000..a951e596 --- /dev/null +++ b/conductor/tracks/video_analysis_deob_warmup_20260621/spec.md @@ -0,0 +1,328 @@ +# Track Specification: Video Analysis De-obfuscation Warmup (2026-06-21) + +**Status:** Active (spec approved 2026-06-21) +**Initialized:** 2026-06-21 +**Owner:** Tier 1 Orchestrator (spec); Tier 2 Tech Lead (execution) +**Priority:** A (user-blocking; Pass 2 of the 3-pass research campaign) +**Type:** Research-only track (precursor to Pass 2; produces 2 deliverables: `report.md` + `prompt_template.md`) +**Domain:** Meta-tooling (research deliverable + LLM operational spec; no `src/` changes) + +> **Parent:** This is a precursor to the [video_analysis_deob_20260621](../video_analysis_deob_20260621/) umbrella (Pass 2 of the 3-pass research campaign). See [umbrella spec.md](../video_analysis_deob_20260621/spec.md) for the full design and the user's de-obfuscation philosophy. + +> **Purpose.** This track is the warmup that produces the initial de-obfuscation lexicon + LLM prompt template from the user's own past de-obfuscation notes. The lexicon is **evidence-based**, not invented. The user provides samples; Tier 2/3 surveys them; the deliverables are the design doc + the LLM operational spec that the 3 phase children consume. + +> **Multi-pass context.** The de-obfuscation is the second of 3 passes in the research campaign (Pass 1 = information extraction; Pass 2 = de-obfuscation; Pass 3 = projection to applied domain). Pass 2 takes Pass 1's 12 deep-dive reports + 1 cross-cutting synthesis and produces a 3-layer de-obfuscated deliverable per video. + +--- + +## 1. Overview + +This track is a research-style track that produces 2 deliverables: + +1. **`report.md`** (the design doc) — the curated de-obfuscation lexicon + the 3 noise-dedup maps + sample transformations drawn from the user's past notes. Follows the `intent_dsl_survey_20260612/report_v1.2.md` precedent (research-style, multi-section, with provenance for every claim). + +2. **`prompt_template.md`** (the LLM operational spec) — a self-contained prompt template that an LLM can be invoked with to perform the de-obfuscation on a new Pass 1 report. The template references the report (for grounding) and includes example transformations drawn from the user's samples. + +The two deliverables together form the **lexicon contract** that the 3 phase children consume. + +### 1.1 The user's de-obfuscation philosophy (anchors for the report) + +The report documents the philosophy the user described in 2026-06-21 brainstorming: + +| Anchor | Take | +|---|---| +| **Form requires bounds** | "To be known is to project a form." Boundedness is required for direct knowledge. | +| **Indefinite is not directly knowable** | What is unbounded is indefinite; what is indefinite is indiscernible, unobserved, unsubject, unknowable. | +| **Cycles/iteration/repetition are allowed** | Indefinite *operations* on bounded *forms* are expressible. `Stream A = nat -> A` is fine; `∞_val` is not. | +| **Constructive type theory** | Proofs = programs (Curry-Howard); every value is a bounded form; operations are transformations. | +| **Lexicon is etymology-aware** | Each term's word origin + definitional history is documented. | +| **PL inspiration** | Concatenative (Forth/KYRA/CoSy), data-oriented imperative (Lottes), immediate-mode DAG-building DSLs (O'Donnell's IMGUI). | + +### 1.2 The 3 noise-dedup maps (to be discovered from samples) + +The report includes 3 noise-dedup maps (or however many emerge from the samples). The expected categories, per the user's "Curry-Howard collapse" framing: + +1. **Proofs = Programs = Computations** (Curry-Howard: types=propositions, programs=proofs) +2. **Sets = Kinds = Types** (constructive type theory: no unbounded "set of all sets") +3. **Functions = Procedures = Words** (concatenative: each function is a "word" with implicit args) + +The report may discover additional collapse maps from the samples. + +--- + +## 2. Current State Audit (as of 2026-06-21) + +### 2.1 Already Available (DO NOT re-derive) + +| Asset | Location | Use in this track | +|---|---|---| +| Pass 1 reports | `conductor/tracks/video_analysis__20260621/report.md` | The input to be de-obfuscated; the report should anticipate this input's patterns | +| The user's directives | (this brainstorming session, 2026-06-21) | The anchors for the philosophy sections | +| `intent_dsl_survey_20260612/report_v1.2.md` | `conductor/tracks/intent_dsl_survey_20260612/` | Sibling DSL (tool-verb DSL for AI agents); shares philosophy but is for tool verbs, not math re-encoding | +| Research-track precedent | `conductor/tracks/intent_dsl_survey_20260612/` | Same structure: multi-section report with provenance per claim | + +### 2.2 Gaps to Fill (this track's scope) + +| # | Gap | Resolution | +|---|---|---| +| G1 | No codified de-obfuscation DSL | Survey the user's samples; produce `report.md` | +| G2 | No LLM-direct operational spec | Produce `prompt_template.md` based on the survey | +| G3 | No "form anchor" pattern documented | Document it in the report; require it in the prompt template | +| G4 | No "etymology rule" applied to math terms | Discover from samples; document in the report | +| G5 | No 3 noise-dedup maps | Discover from samples; document in the report | + +--- + +## 3. Goals + +1. **Evidence-based lexicon.** Every term in the lexicon is grounded in a pattern from the user's past samples (with a citation). +2. **LLM-direct prompt template.** The template can be invoked as-is with a new report to produce a de-obfuscation. The template + a Pass 1 report → a 3-layer deliverable. +3. **Self-referential.** The template references the report (for grounding) and the samples (for example transformations). +4. **Philosopher-of-record.** The report captures the user's de-obfuscation philosophy (per §1.1 anchors) so future agents can pick it up. +5. **Pass 3 readiness.** The lexicon + template is designed so Pass 3 (projection to applied domain) can consume the de-obfuscated outputs as input. + +--- + +## 4. Functional Requirements + +### FR1. Warmup folder structure + +**WHERE:** `conductor/tracks/video_analysis_deob_warmup_20260621/` + +**WHAT:** This folder contains the warmup design (this spec) + 3 sibling files (`plan.md`, `metadata.json`, `state.toml`) + the user's samples in `samples/` (gitignored) + the 2 deliverables (`report.md` + `prompt_template.md`). + +### FR2. User samples (Phase 0) + +**WHERE:** `conductor/tracks/video_analysis_deob_warmup_20260621/samples/` + +**WHAT:** The user gathers 3-10 samples of their past de-obfuscation notes and places them here. Format: any text the user has (markdown, txt, mixed). The samples are **gitignored** (per AGENTS.md — local-only artifacts). + +If the user has <3 samples, the warmup can proceed with 1-2 as a starter; the lexicon will be thin and Phase 1 (lexicon) will extend it with constructive type theory defaults. + +### FR3. Survey process (Tier 2 + Tier 3) + +**Process:** Tier 2 delegates the survey to a Tier 3 worker. The worker: +- Reads each sample in `samples/` +- Identifies recurring terms (with frequency counts) +- Identifies structural patterns (how the user organizes de-obfuscations) +- Identifies "form projection" heuristics (how the user bounds the indefinite) +- Identifies "noise dedup" maps (synonyms that collapse) +- Identifies the user's typical etymology-citation style +- Identifies example transformations (1-2 canonical "before/after" pairs from the samples) + +**Output:** A survey report (in `state.toml`'s working memory or a draft `report.md`). + +### FR4. Deliverable 1: `report.md` + +**Structure (per `intent_dsl_survey_20260612/report_v1.2.md` precedent):** + +```markdown +# De-obfuscation Lexicon (2026-06-21) + +**Track:** video_analysis_deob_warmup_20260621 +**Date:** 2026-06-21 +**Status:** v1 (warmup deliverable) +**User directives:** See §1 (anchors from 2026-06-21 brainstorming) + +## 1. The De-obfuscation Philosophy +- 1.1 Form requires bounds +- 1.2 Indefinite is not directly knowable +- 1.3 Cycles/iteration are allowed +- 1.4 Constructive type theory as foundation +- 1.5 Etymology-aware lexicon +- 1.6 PL inspiration: concatenative + data-oriented + immediate-mode + +## 2. Prior Art (the user's influences) +- 2.1 Norman Wildberger (rational trigonometry, algebraic finitism) +- 2.2 Constructive type theory (Curry-Howard) +- 2.3 Concatenative PLs (Forth, ColorForth, KYRA/VAMP, CoSy) +- 2.4 Data-oriented imperative (Lottes) +- 2.5 Immediate-mode DSL DAGs (O'Donnell) + +## 3. The Lexicon (terms + re-encodings) +- 3.1 Tier 1: Core concepts (~10 terms, e.g., Set→Kind, ∀→forall, ∃→exists) +- 3.2 Tier 2: Data-oriented pipeline terms (~12 terms) +- 3.3 Tier 3: Type-theoretic primitives (~10 terms) +- 3.4 Tier 4: AI-fuzzing tolerance terms (~8 terms) +- 3.5 Boundedness rules (∞_val banned, ∞_proc allowed, ∞_card banned) + +## 4. The 3 Noise-Dedup Maps +- 4.1 Proofs = Programs = Computations +- 4.2 Sets = Kinds = Types +- 4.3 Functions = Procedures = Words +- 4.4 (additional maps discovered from samples) + +## 5. The Form-Anchor Rule +- Every re-encoding must have a form anchor: "what bounded form does this project from the indefinite?" +- Examples from the user's samples + +## 6. The Etymology Rule +- Every new term has a 1-line origin + 1-line definition history +- Examples from the user's samples + +## 7. Sample Transformations +- 3-5 canonical "before/after" pairs drawn from the user's samples +- Each pair: original notation → re-encoded form, with the lexicon terms used + +## 8. Connection to the 3 Phase Children +- 8.1 Phase 1 (lexicon): consumes this report, refines into a codified spec +- 8.2 Phase 2 (pilot): consumes the refined lexicon, applies to 2 videos +- 8.3 Phase 3 (apply): consumes the pilot's refined lexicon, applies to 10 + synthesis + +## Appendix A. Provenance +- Every claim cites: the user's directive, the sample file, or the prior-art reference +``` + +**Target:** ~1000-3000 LOC. Smaller than `intent_dsl_survey_20260612/report_v1.2.md` (1343 lines) because the prior-art survey is shorter here (the philosophy is the user's, not derived from external sources). + +### FR5. Deliverable 2: `prompt_template.md` + +**Structure (LLM-direct, self-contained):** + +```markdown +# De-obfuscation Prompt Template (v1, 2026-06-21) + +> Use this template to de-obfuscate a Pass 1 video report. +> Reference: report.md (the design doc) for the full lexicon. +> Reference: samples/ (gitignored, the user's past de-obfuscations) for examples. + +## Your role +You are a de-obfuscator. Your task: take a Pass 1 report (full of standard math notation + verbose verbiage) and produce a 3-layer de-obfuscated deliverable. + +## Input +- (e.g., conductor/tracks/video_analysis__20260621/report.md) +- (optional, for cross-referencing) + +## Output (3 files in //) +- _translation.md (side-by-side table) +- _deobfuscated.md (replacement; re-encoded form replaces the original) +- _decoder.md (per-term decoder with form anchor + etymology + definition history) + +## Lexicon (reference report.md §3 for full) +[concise version: Tier 1-4 term tables] + +## The 4 Rules +1. Boundedness: every value is a finite form. `∞_val` is banned; `∞_proc` is allowed (as `Stream A = nat -> A`). +2. Form anchor: every re-encoding must have a "what bounded form does this project from the indefinite?" +3. Etymology: every new term gets a 1-line origin + 1-line definition history in the decoder. +4. Lossless: every Pass 1 concept is represented. If a concept can't be bounded, mark it "indefinite — see original" rather than forcing a translation. + +## The 3 Noise-Dedup Maps (apply automatically) +- Proofs = Programs = Computations (use the chosen term per the sample's style) +- Sets = Kinds = Types +- Functions = Procedures = Words + +## The 3-Layer Format +### Layer 1: Translation (side-by-side table) +| # | Original Section | Original Expression | Re-encoded Form | Form Anchor | +| ... | + +### Layer 2: De-obfuscated (replacement) +[Same 8-section structure as Pass 1, but with re-encoded math] + +### Layer 3: Decoder (per-term) +## Term: +- Original notation: ... +- Re-encoded: ... +- Form anchor: ... +- Etymology: ... +- Definition history: ... +- Source sections in original: ... + +## Verification +After producing the 3 files, verify: +- [ ] Lossless (no Pass 1 concept dropped) +- [ ] Bounded (no ∞_val or ∞_card) +- [ ] Constructively typed (every expression has a type) +- [ ] Etymology-cited (every new term has the 1-line origin + 1-line definition history) + +## Example transformations (drawn from samples) +[3-5 canonical before/after pairs] +``` + +**Target:** ~200-500 LOC. The template is dense (every word is operational). + +### FR6. Dependency + +- **`blocked_by`:** the user providing samples (USER action item, Phase 0). +- **`blocks`:** the 3 phase children (lexicon, pilot, apply) all depend on this warmup's `report.md` + `prompt_template.md`. + +--- + +## 5. Non-Functional Requirements + +- **TDD not applicable** — research-only track; no automated tests. +- **Per-task atomic commits** — each deliverable (`report.md` + `prompt_template.md`) is committed in its own commit. +- **Git notes** — each commit gets a note summarizing what was produced. +- **No day estimates** — scope measured in files/sites per `conductor/workflow.md`. +- **No `src/` changes** — research-only. + +--- + +## 6. Out of Scope (Explicit) + +- **Lexicon refinement** — handled by the lexicon child track (`video_analysis_deob_lexicon_20260621/`). +- **Application to Pass 1 reports** — handled by the pilot + apply child tracks. +- **Pass 3 (projection to applied domain)** — future, user-led. +- **Interpreter for the de-obfuscation DSL** — out of scope. The LLM is the executor. +- **Modifying `src/*.py` files in manual_slop** — research-only. +- **Adding `pyproject.toml` dependencies** — all work is research (markdown files). + +--- + +## 7. Architecture Reference + +This track does not modify the manual_slop application architecture. The architecture refs that DO apply: + +- **Research-track precedent:** `conductor/tracks/intent_dsl_survey_20260612/` (multi-section report + provenance per claim) +- **Track convention:** `conductor/workflow.md` "Standard Task Workflow" + per-task commit discipline +- **Multi-pass framing:** `conductor/tracks/video_analysis_campaign_20260621/spec.md` §0, §11 +- **De-obfuscation design:** `conductor/tracks/video_analysis_deob_20260621/spec.md` + +--- + +## 8. Verification Criteria + +The warmup is "done" when all of the following are true: + +- [ ] `report.md` exists, is non-empty, follows the §FR4 structure, has provenance for every claim +- [ ] `prompt_template.md` exists, is non-empty, follows the §FR5 structure, can be invoked as-is with a new report +- [ ] At least 1 sample has been processed (the user has provided ≥1 sample) +- [ ] The 4 verification criteria (lossless, bounded, constructively typed, etymology-cited) are documented in the template +- [ ] The 3 noise-dedup maps (at minimum) are documented +- [ ] Sample transformations (3-5 canonical before/after pairs) are included +- [ ] User has reviewed and approved the deliverables +- [ ] Both deliverables are committed (one commit per deliverable, per the per-task commit discipline) +- [ ] Git notes attached to both commits +- [ ] `state.toml` updated to `status = "completed"` + +--- + +## 9. Risk Register + +| ID | Title | Likelihood | Scope impact | Mitigation | +|---|---|---|---|---| +| R1 | User cannot provide samples in time | Medium | Warmup blocked | User can provide partial samples (1-2 examples); warmup can use them as a starter | +| R2 | User's samples don't have enough de-obfuscation patterns | Medium | Thin lexicon | Phase 1 (lexicon) extends with constructive type theory defaults | +| R3 | Report grows too large | Low | Hard to reference | Cap at 3000 LOC; defer deep examples to Appendix | +| R4 | Template is too abstract (LLM doesn't follow it) | Medium | Apply phase produces poor de-obfuscations | Pilot (Phase 2) catches this; pilot_report.md captures refinements | +| R5 | User's style is too idiosyncratic to formalize | Low | Lexicon is "in your head" only | Capture 1-2 example transformations explicitly; the rest can be implicit in the prompt template | + +--- + +## 10. User Directives (recorded for next agent / future-self) + +- **2026-06-21:** The user's de-obfuscation philosophy anchors (see umbrella spec §10 / §1.1 here). +- **2026-06-21:** "I can provide samples of notes I've done but it will take time and might be best to leave to a 'warmup' track to gather and survey those, to then codify how this de-obfuscation via an llm following that within a track's plan would do." +- **2026-06-21:** "Report + prompt template" (the warmup's output shape). + +--- + +## 11. See Also + +- [Umbrella spec.md](../video_analysis_deob_20260621/spec.md) — full design +- [Umbrella README.md](../video_analysis_deob_20260621/README.md) — child index +- [Pass 1 campaign spec §0, §11](../video_analysis_campaign_20260621/spec.md) — multi-pass framing + Pass 2 handoff (now superseded by the umbrella above) +- [intent_dsl_survey_20260612/report_v1.2.md](../intent_dsl_survey_20260612/report_v1.2.md) — sibling DSL; same report structure +- `conductor/workflow.md` "Standard Task Workflow" + "Tier 1 Track Initialization Rules" +- `conductor/code_styleguides/agent_memory_dimensions.md` — Pass 2 produces a "knowledge" memory artifact diff --git a/conductor/tracks/video_analysis_deob_warmup_20260621/state.toml b/conductor/tracks/video_analysis_deob_warmup_20260621/state.toml new file mode 100644 index 00000000..61b79ff5 --- /dev/null +++ b/conductor/tracks/video_analysis_deob_warmup_20260621/state.toml @@ -0,0 +1,59 @@ +# Track state for video_analysis_deob_warmup_20260621 +# Updated by Tier 2 Tech Lead during execution + +[meta] +track_id = "video_analysis_deob_warmup_20260621" +name = "Video Analysis De-obfuscation Warmup (Pass 2 precursor)" +status = "active" +current_phase = 0 # Phase 0 = waiting for user samples +last_updated = "2026-06-21" + +[blocked_by] +# User action item: user must provide 3-10 samples of past de-obfuscation notes in samples/ + +[blocks] +video_analysis_deob_lexicon_20260621 = "blocked (consumes report.md + prompt_template.md)" +video_analysis_deob_pilot_20260621 = "blocked (consumes report.md + prompt_template.md)" +video_analysis_deob_apply_20260621 = "blocked (consumes report.md + prompt_template.md)" + +[phases] +phase_0 = { status = "in_progress", checkpointsha = "", name = "User samples provided (USER action item)" } +phase_1 = { status = "pending", checkpointsha = "", name = "Survey the samples (Tier 3 worker)" } +phase_2 = { status = "pending", checkpointsha = "", name = "Write report.md (the design doc)" } +phase_3 = { status = "pending", checkpointsha = "", name = "Write prompt_template.md (the LLM operational spec)" } +phase_4 = { status = "pending", checkpointsha = "", name = "User review + approval" } + +[tasks] +# Phase 0 (USER action) +t0_1 = { status = "pending", commit_sha = "", description = "User gathers 3-10 samples of past de-obfuscation notes and places them in samples/. Format: any text (markdown, txt, mixed). Gitignored." } + +# Phase 1 (survey) +t1_1 = { status = "pending", commit_sha = "", description = "Tier 3 worker surveys the samples: term frequency, structural patterns, form projection heuristics, noise-dedup maps, etymology style, example transformations" } + +# Phase 2 (report.md) +t2_1 = { status = "pending", commit_sha = "", description = "Write report.md (~1000-3000 LOC) following §FR4 structure: philosophy + prior art + lexicon (4 tiers) + 3 dedup maps + form-anchor rule + etymology rule + sample transformations + connection to phase children + provenance appendix" } +t2_2 = { status = "pending", commit_sha = "", description = "Commit report.md with git note summarizing the lexicon + dedup maps discovered" } + +# Phase 3 (prompt_template.md) +t3_1 = { status = "pending", commit_sha = "", description = "Write prompt_template.md (~200-500 LOC) following §FR5 structure: role + input + output (3-layer) + lexicon + 4 rules + 3 dedup maps + 3-layer format + verification + example transformations" } +t3_2 = { status = "pending", commit_sha = "", description = "Commit prompt_template.md with git note summarizing the template's operational scope" } + +# Phase 4 (user review) +t4_1 = { status = "pending", commit_sha = "", description = "User reviews both deliverables. Approves or iterates (loop back to Phase 2 or 3)" } +t4_2 = { status = "pending", commit_sha = "", description = "Update state.toml to status = 'completed'" } + +[verification] +samples_provided = false +report_md_committed = false +prompt_template_md_committed = false +user_approved = false +state_toml_completed = false + +[user_directives_logged] +unorthodox_curation = "Per user 2026-06-21: 'I have a very unorthodox take for how I curate knowledge, especially formal knowledge in the math and sciences.'" +constructive_type_theory = "Per user 2026-06-21: 'I like Norman Wildberger's work. And I like the constructivist current progress on type theories as a foundational system.'" +bounded_for_knowledge = "Per user 2026-06-21: 'No observer or mechanism or construct can be infinite in resolution or quantification. To have distinction must have a bounds.'" +cycles_iteration_allowed = "Per user 2026-06-21: 'Infinite is okay well handled CORRECTLY... What can be indefinite is that can be subjected upon is that of cycles, that of iteration, that of repetition.'" +warmup_evidence_based = "Per user 2026-06-21: 'I can provide samples of notes I've done but it will take time and might be best to leave to a warmup track to gather and survey those, to then codify how this de-obfuscation via an llm following that within a track's plan would do.'" +report_plus_template = "Per user 2026-06-21: warmup output is report.md + prompt_template.md" +no_day_estimates = "Per conductor/workflow.md Tier 1 Track Initialization Rules (added 2026-06-16). Scope measured in files/sites only."