conductor(deob_warmup): Initialize warmup track (precursor)
Research-style track. Produces 2 deliverables from the user's past de-obfuscation samples: - report.md: design philosophy + curated lexicon + 3 noise-dedup maps + sample transformations - prompt_template.md: LLM-direct operational spec; can be invoked as-is with a new Pass 1 report Phase 0: USER action item (gather 3-10 samples into samples/, gitignored) Phase 1: Tier 3 worker surveys (term frequency, structural patterns, form projection heuristics) Phase 2: Write report.md Phase 3: Write prompt_template.md Phase 4: User review + approval blocked_by: user samples blocks: lexicon, pilot, apply (3 phase children)
This commit is contained in:
@@ -0,0 +1,130 @@
|
||||
{
|
||||
"track_id": "video_analysis_deob_warmup_20260621",
|
||||
"name": "Video Analysis De-obfuscation Warmup (Pass 2 precursor)",
|
||||
"created": "2026-06-21",
|
||||
"status": "spec_approved",
|
||||
"blocked_by": ["user_samples_provided"],
|
||||
"blocks": [
|
||||
"video_analysis_deob_lexicon_20260621",
|
||||
"video_analysis_deob_pilot_20260621",
|
||||
"video_analysis_deob_apply_20260621"
|
||||
],
|
||||
"priority": "A",
|
||||
"rationale": "User-blocking precursor to Pass 2 of the 3-pass research campaign. Produces the initial de-obfuscation lexicon (evidence-based from the user's past samples) and the LLM prompt template. Research-only; no src/ changes. The 3 phase children (lexicon/pilot/apply) all depend on this warmup's output.",
|
||||
"type": "research-only (warmup track; produces report.md + prompt_template.md)",
|
||||
"domain": "meta-tooling (research deliverable + LLM operational spec; no manual_slop src/ changes)",
|
||||
"scope": {
|
||||
"new_folders": [
|
||||
"conductor/tracks/video_analysis_deob_warmup_20260621/"
|
||||
],
|
||||
"new_files": [
|
||||
"spec.md",
|
||||
"plan.md",
|
||||
"metadata.json",
|
||||
"state.toml",
|
||||
"samples/<user-provided-files> (gitignored)",
|
||||
"report.md (the design doc; ~1000-3000 LOC)",
|
||||
"prompt_template.md (the LLM operational spec; ~200-500 LOC)"
|
||||
],
|
||||
"modified_files": [],
|
||||
"deleted_files": [],
|
||||
"gitignored_patterns": [
|
||||
"samples/** (user's past de-obfuscation notes are local-only)"
|
||||
]
|
||||
},
|
||||
"estimated_effort": {
|
||||
"method": "scope (per conductor/workflow.md Tier 1 Track Initialization Rules). NO day estimates.",
|
||||
"phase_0": "1 USER action item: gather 3-10 samples into samples/",
|
||||
"phase_1": "1 task: Tier 3 worker surveys the samples (term frequency, structural patterns, form projection heuristics, noise-dedup maps, etymology style, example transformations)",
|
||||
"phase_2": "2 tasks: write report.md + commit with git note",
|
||||
"phase_3": "2 tasks: write prompt_template.md + commit with git note",
|
||||
"phase_4": "2 tasks: user review + state.toml update",
|
||||
"summary": "4 phases, 6 tasks, 2 deliverables (~1500-3500 LOC combined), 1 user action item. No day estimates per project convention."
|
||||
},
|
||||
"verification_criteria": [
|
||||
"report.md exists, follows §FR4 structure, has provenance for every claim",
|
||||
"prompt_template.md exists, follows §FR5 structure, can be invoked as-is with a new Pass 1 report",
|
||||
"At least 1 sample has been processed (user has provided ≥1 sample)",
|
||||
"The 4 verification criteria (lossless, bounded, constructively typed, etymology-cited) are documented in the template",
|
||||
"The 3 noise-dedup maps (at minimum) are documented",
|
||||
"Sample transformations (3-5 canonical before/after pairs) are included",
|
||||
"User has reviewed and approved both deliverables",
|
||||
"Both deliverables are committed atomically (one commit per deliverable)",
|
||||
"Git notes attached to both commits",
|
||||
"state.toml updated to status = 'completed'"
|
||||
],
|
||||
"risk_register": [
|
||||
{
|
||||
"id": "R1",
|
||||
"title": "User cannot provide samples in time",
|
||||
"likelihood": "medium",
|
||||
"scope_impact": "Warmup blocked",
|
||||
"mitigation": "User can provide partial samples (1-2 examples); warmup can use them as a starter; Phase 1 (lexicon) extends with constructive type theory defaults"
|
||||
},
|
||||
{
|
||||
"id": "R2",
|
||||
"title": "User's samples don't have enough de-obfuscation patterns",
|
||||
"likelihood": "medium",
|
||||
"scope_impact": "Thin lexicon",
|
||||
"mitigation": "Phase 1 (lexicon) extends with constructive type theory defaults"
|
||||
},
|
||||
{
|
||||
"id": "R3",
|
||||
"title": "Report grows too large (>3000 LOC)",
|
||||
"likelihood": "low",
|
||||
"scope_impact": "Hard to reference",
|
||||
"mitigation": "Cap at 3000 LOC; defer deep examples to Appendix"
|
||||
},
|
||||
{
|
||||
"id": "R4",
|
||||
"title": "Template is too abstract for LLM to follow",
|
||||
"likelihood": "medium",
|
||||
"scope_impact": "Apply phase produces poor de-obfuscations",
|
||||
"mitigation": "Pilot (Phase 2) catches this; pilot_report.md captures refinements"
|
||||
},
|
||||
{
|
||||
"id": "R5",
|
||||
"title": "User's style is too idiosyncratic to formalize",
|
||||
"likelihood": "low",
|
||||
"scope_impact": "Lexicon is 'in your head' only",
|
||||
"mitigation": "Capture 1-2 example transformations explicitly; the rest can be implicit in the prompt template"
|
||||
}
|
||||
],
|
||||
"architecture_reference": {
|
||||
"primary_documents": [
|
||||
"conductor/workflow.md (track convention, per-task commits, git notes)",
|
||||
"conductor/tracks/video_analysis_deob_20260621/spec.md (umbrella design)",
|
||||
"conductor/tracks/intent_dsl_survey_20260612/report_v1.2.md (report structure precedent)"
|
||||
],
|
||||
"related_tracks": [
|
||||
"conductor/tracks/video_analysis_deob_lexicon_20260621/ (downstream consumer of this warmup)",
|
||||
"conductor/tracks/video_analysis_deob_pilot_20260621/ (downstream consumer of this warmup)",
|
||||
"conductor/tracks/video_analysis_deob_apply_20260621/ (downstream consumer of this warmup)"
|
||||
]
|
||||
},
|
||||
"deferred_to_followup_tracks": [
|
||||
{
|
||||
"title": "Lexicon refinement (Phase 1 child)",
|
||||
"description": "Refines the warmup's draft into a codified operational spec.",
|
||||
"track_status": "blocked_by this warmup"
|
||||
},
|
||||
{
|
||||
"title": "Pilot application (Phase 2 child)",
|
||||
"description": "Applies the refined lexicon to 2 Pass 1 reports (cs229_building_llms, entropy_epiplexity).",
|
||||
"track_status": "blocked_by the lexicon child"
|
||||
},
|
||||
{
|
||||
"title": "Apply phase (Phase 3 child)",
|
||||
"description": "Applies the refined lexicon to 10 remaining Pass 1 reports + 1 cross-cutting synthesis.",
|
||||
"track_status": "blocked_by the pilot child"
|
||||
}
|
||||
],
|
||||
"regressions_and_pre_existing_failures": [],
|
||||
"pre_existing_failures_remaining": [],
|
||||
"user_directives": [
|
||||
"De-obfuscation philosophy anchors (2026-06-21) - see umbrella spec §10",
|
||||
"Warmup is the precursor; lexicon is evidence-based from user's past samples (2026-06-21)",
|
||||
"Report + prompt template as the warmup output (2026-06-21)",
|
||||
"No day estimates per conductor/workflow.md Tier 1 Track Initialization Rules (added 2026-06-16). Scope measured in files/sites only."
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
# Plan: Video Analysis De-obfuscation Warmup
|
||||
|
||||
This is the warmup-level plan for the precursor to Pass 2 of the 3-pass research campaign. Per the Tier 1 Track Initialization Rules, scope is measured in files/sites — no day estimates.
|
||||
|
||||
## Phase 0: User samples provided (USER action item)
|
||||
|
||||
- [ ] **Task 0.1:** User gathers 3-10 samples of past de-obfuscation notes and places them in `conductor/tracks/video_analysis_deob_warmup_20260621/samples/`. Format: any text the user has (markdown, txt, mixed). Samples are gitignored.
|
||||
|
||||
## Phase 1: Survey the samples
|
||||
|
||||
- [ ] **Task 1.1:** Tier 2 worker delegates to a Tier 3 worker: survey each sample in `samples/`. Identify:
|
||||
- Term frequency (which math/DSL terms appear most often)
|
||||
- Structural patterns (how the user organizes de-obfuscations — section structure, table format, etc.)
|
||||
- "Form projection" heuristics (how the user bounds the indefinite)
|
||||
- "Noise dedup" maps (synonyms that collapse)
|
||||
- Etymology-citation style (how the user documents word origins)
|
||||
- Example transformations (1-2 canonical "before/after" pairs from the samples)
|
||||
|
||||
## Phase 2: Write `report.md` (the design doc)
|
||||
|
||||
- [ ] **Task 2.1:** Write `report.md` (~1000-3000 LOC). Follow the structure from spec §FR4:
|
||||
- §1 De-obfuscation philosophy (anchored to user directives)
|
||||
- §2 Prior art (the user's influences)
|
||||
- §3 Lexicon (terms + re-encodings, ~4 tiers)
|
||||
- §4 3 noise-dedup maps (or however many emerge)
|
||||
- §5 Form-anchor rule
|
||||
- §6 Etymology rule
|
||||
- §7 Sample transformations (3-5 canonical before/after pairs)
|
||||
- §8 Connection to the 3 phase children
|
||||
- Appendix A. Provenance
|
||||
- [ ] **Task 2.2:** Commit `report.md` with git note summarizing the lexicon + dedup maps discovered.
|
||||
|
||||
## Phase 3: Write `prompt_template.md` (the LLM operational spec)
|
||||
|
||||
- [ ] **Task 3.1:** Write `prompt_template.md` (~200-500 LOC). Follow the structure from spec §FR5:
|
||||
- Role, input, output (3-layer format)
|
||||
- Lexicon (concise version; reference report.md for full)
|
||||
- The 4 rules (boundedness, form anchor, etymology, lossless)
|
||||
- The 3 noise-dedup maps (apply automatically)
|
||||
- The 3-layer format spec
|
||||
- Verification checklist
|
||||
- Example transformations (drawn from samples)
|
||||
- [ ] **Task 3.2:** Commit `prompt_template.md` with git note summarizing the template's operational scope.
|
||||
|
||||
## Phase 4: User review + approval
|
||||
|
||||
- [ ] **Task 4.1:** User reviews both deliverables. Approves the lexicon + template, or iterates (loop back to Phase 2 or 3).
|
||||
- [ ] **Task 4.2:** Update `state.toml` to `status = "completed"`.
|
||||
|
||||
## Verification (gate per workflow.md)
|
||||
|
||||
- [ ] `report.md` follows the §FR4 structure with provenance per claim
|
||||
- [ ] `prompt_template.md` follows the §FR5 structure, can be invoked as-is with a new report
|
||||
- [ ] At least 1 sample has been processed
|
||||
- [ ] The 4 verification criteria are documented in the template
|
||||
- [ ] The 3 noise-dedup maps are documented
|
||||
- [ ] Sample transformations (3-5 canonical before/after pairs) are included
|
||||
- [ ] User has reviewed and approved
|
||||
- [ ] Both deliverables are committed atomically
|
||||
- [ ] Git notes attached to both commits
|
||||
- [ ] `state.toml` updated to `status = "completed"`
|
||||
|
||||
The warmup is "Pass 2 warmup complete" when both deliverables are committed + user-approved. The 3 phase children can then start.
|
||||
@@ -0,0 +1,328 @@
|
||||
# Track Specification: Video Analysis De-obfuscation Warmup (2026-06-21)
|
||||
|
||||
**Status:** Active (spec approved 2026-06-21)
|
||||
**Initialized:** 2026-06-21
|
||||
**Owner:** Tier 1 Orchestrator (spec); Tier 2 Tech Lead (execution)
|
||||
**Priority:** A (user-blocking; Pass 2 of the 3-pass research campaign)
|
||||
**Type:** Research-only track (precursor to Pass 2; produces 2 deliverables: `report.md` + `prompt_template.md`)
|
||||
**Domain:** Meta-tooling (research deliverable + LLM operational spec; no `src/` changes)
|
||||
|
||||
> **Parent:** This is a precursor to the [video_analysis_deob_20260621](../video_analysis_deob_20260621/) umbrella (Pass 2 of the 3-pass research campaign). See [umbrella spec.md](../video_analysis_deob_20260621/spec.md) for the full design and the user's de-obfuscation philosophy.
|
||||
|
||||
> **Purpose.** This track is the warmup that produces the initial de-obfuscation lexicon + LLM prompt template from the user's own past de-obfuscation notes. The lexicon is **evidence-based**, not invented. The user provides samples; Tier 2/3 surveys them; the deliverables are the design doc + the LLM operational spec that the 3 phase children consume.
|
||||
|
||||
> **Multi-pass context.** The de-obfuscation is the second of 3 passes in the research campaign (Pass 1 = information extraction; Pass 2 = de-obfuscation; Pass 3 = projection to applied domain). Pass 2 takes Pass 1's 12 deep-dive reports + 1 cross-cutting synthesis and produces a 3-layer de-obfuscated deliverable per video.
|
||||
|
||||
---
|
||||
|
||||
## 1. Overview
|
||||
|
||||
This track is a research-style track that produces 2 deliverables:
|
||||
|
||||
1. **`report.md`** (the design doc) — the curated de-obfuscation lexicon + the 3 noise-dedup maps + sample transformations drawn from the user's past notes. Follows the `intent_dsl_survey_20260612/report_v1.2.md` precedent (research-style, multi-section, with provenance for every claim).
|
||||
|
||||
2. **`prompt_template.md`** (the LLM operational spec) — a self-contained prompt template that an LLM can be invoked with to perform the de-obfuscation on a new Pass 1 report. The template references the report (for grounding) and includes example transformations drawn from the user's samples.
|
||||
|
||||
The two deliverables together form the **lexicon contract** that the 3 phase children consume.
|
||||
|
||||
### 1.1 The user's de-obfuscation philosophy (anchors for the report)
|
||||
|
||||
The report documents the philosophy the user described in 2026-06-21 brainstorming:
|
||||
|
||||
| Anchor | Take |
|
||||
|---|---|
|
||||
| **Form requires bounds** | "To be known is to project a form." Boundedness is required for direct knowledge. |
|
||||
| **Indefinite is not directly knowable** | What is unbounded is indefinite; what is indefinite is indiscernible, unobserved, unsubject, unknowable. |
|
||||
| **Cycles/iteration/repetition are allowed** | Indefinite *operations* on bounded *forms* are expressible. `Stream A = nat -> A` is fine; `∞_val` is not. |
|
||||
| **Constructive type theory** | Proofs = programs (Curry-Howard); every value is a bounded form; operations are transformations. |
|
||||
| **Lexicon is etymology-aware** | Each term's word origin + definitional history is documented. |
|
||||
| **PL inspiration** | Concatenative (Forth/KYRA/CoSy), data-oriented imperative (Lottes), immediate-mode DAG-building DSLs (O'Donnell's IMGUI). |
|
||||
|
||||
### 1.2 The 3 noise-dedup maps (to be discovered from samples)
|
||||
|
||||
The report includes 3 noise-dedup maps (or however many emerge from the samples). The expected categories, per the user's "Curry-Howard collapse" framing:
|
||||
|
||||
1. **Proofs = Programs = Computations** (Curry-Howard: types=propositions, programs=proofs)
|
||||
2. **Sets = Kinds = Types** (constructive type theory: no unbounded "set of all sets")
|
||||
3. **Functions = Procedures = Words** (concatenative: each function is a "word" with implicit args)
|
||||
|
||||
The report may discover additional collapse maps from the samples.
|
||||
|
||||
---
|
||||
|
||||
## 2. Current State Audit (as of 2026-06-21)
|
||||
|
||||
### 2.1 Already Available (DO NOT re-derive)
|
||||
|
||||
| Asset | Location | Use in this track |
|
||||
|---|---|---|
|
||||
| Pass 1 reports | `conductor/tracks/video_analysis_<slug>_20260621/report.md` | The input to be de-obfuscated; the report should anticipate this input's patterns |
|
||||
| The user's directives | (this brainstorming session, 2026-06-21) | The anchors for the philosophy sections |
|
||||
| `intent_dsl_survey_20260612/report_v1.2.md` | `conductor/tracks/intent_dsl_survey_20260612/` | Sibling DSL (tool-verb DSL for AI agents); shares philosophy but is for tool verbs, not math re-encoding |
|
||||
| Research-track precedent | `conductor/tracks/intent_dsl_survey_20260612/` | Same structure: multi-section report with provenance per claim |
|
||||
|
||||
### 2.2 Gaps to Fill (this track's scope)
|
||||
|
||||
| # | Gap | Resolution |
|
||||
|---|---|---|
|
||||
| G1 | No codified de-obfuscation DSL | Survey the user's samples; produce `report.md` |
|
||||
| G2 | No LLM-direct operational spec | Produce `prompt_template.md` based on the survey |
|
||||
| G3 | No "form anchor" pattern documented | Document it in the report; require it in the prompt template |
|
||||
| G4 | No "etymology rule" applied to math terms | Discover from samples; document in the report |
|
||||
| G5 | No 3 noise-dedup maps | Discover from samples; document in the report |
|
||||
|
||||
---
|
||||
|
||||
## 3. Goals
|
||||
|
||||
1. **Evidence-based lexicon.** Every term in the lexicon is grounded in a pattern from the user's past samples (with a citation).
|
||||
2. **LLM-direct prompt template.** The template can be invoked as-is with a new report to produce a de-obfuscation. The template + a Pass 1 report → a 3-layer deliverable.
|
||||
3. **Self-referential.** The template references the report (for grounding) and the samples (for example transformations).
|
||||
4. **Philosopher-of-record.** The report captures the user's de-obfuscation philosophy (per §1.1 anchors) so future agents can pick it up.
|
||||
5. **Pass 3 readiness.** The lexicon + template is designed so Pass 3 (projection to applied domain) can consume the de-obfuscated outputs as input.
|
||||
|
||||
---
|
||||
|
||||
## 4. Functional Requirements
|
||||
|
||||
### FR1. Warmup folder structure
|
||||
|
||||
**WHERE:** `conductor/tracks/video_analysis_deob_warmup_20260621/`
|
||||
|
||||
**WHAT:** This folder contains the warmup design (this spec) + 3 sibling files (`plan.md`, `metadata.json`, `state.toml`) + the user's samples in `samples/` (gitignored) + the 2 deliverables (`report.md` + `prompt_template.md`).
|
||||
|
||||
### FR2. User samples (Phase 0)
|
||||
|
||||
**WHERE:** `conductor/tracks/video_analysis_deob_warmup_20260621/samples/`
|
||||
|
||||
**WHAT:** The user gathers 3-10 samples of their past de-obfuscation notes and places them here. Format: any text the user has (markdown, txt, mixed). The samples are **gitignored** (per AGENTS.md — local-only artifacts).
|
||||
|
||||
If the user has <3 samples, the warmup can proceed with 1-2 as a starter; the lexicon will be thin and Phase 1 (lexicon) will extend it with constructive type theory defaults.
|
||||
|
||||
### FR3. Survey process (Tier 2 + Tier 3)
|
||||
|
||||
**Process:** Tier 2 delegates the survey to a Tier 3 worker. The worker:
|
||||
- Reads each sample in `samples/`
|
||||
- Identifies recurring terms (with frequency counts)
|
||||
- Identifies structural patterns (how the user organizes de-obfuscations)
|
||||
- Identifies "form projection" heuristics (how the user bounds the indefinite)
|
||||
- Identifies "noise dedup" maps (synonyms that collapse)
|
||||
- Identifies the user's typical etymology-citation style
|
||||
- Identifies example transformations (1-2 canonical "before/after" pairs from the samples)
|
||||
|
||||
**Output:** A survey report (in `state.toml`'s working memory or a draft `report.md`).
|
||||
|
||||
### FR4. Deliverable 1: `report.md`
|
||||
|
||||
**Structure (per `intent_dsl_survey_20260612/report_v1.2.md` precedent):**
|
||||
|
||||
```markdown
|
||||
# De-obfuscation Lexicon (2026-06-21)
|
||||
|
||||
**Track:** video_analysis_deob_warmup_20260621
|
||||
**Date:** 2026-06-21
|
||||
**Status:** v1 (warmup deliverable)
|
||||
**User directives:** See §1 (anchors from 2026-06-21 brainstorming)
|
||||
|
||||
## 1. The De-obfuscation Philosophy
|
||||
- 1.1 Form requires bounds
|
||||
- 1.2 Indefinite is not directly knowable
|
||||
- 1.3 Cycles/iteration are allowed
|
||||
- 1.4 Constructive type theory as foundation
|
||||
- 1.5 Etymology-aware lexicon
|
||||
- 1.6 PL inspiration: concatenative + data-oriented + immediate-mode
|
||||
|
||||
## 2. Prior Art (the user's influences)
|
||||
- 2.1 Norman Wildberger (rational trigonometry, algebraic finitism)
|
||||
- 2.2 Constructive type theory (Curry-Howard)
|
||||
- 2.3 Concatenative PLs (Forth, ColorForth, KYRA/VAMP, CoSy)
|
||||
- 2.4 Data-oriented imperative (Lottes)
|
||||
- 2.5 Immediate-mode DSL DAGs (O'Donnell)
|
||||
|
||||
## 3. The Lexicon (terms + re-encodings)
|
||||
- 3.1 Tier 1: Core concepts (~10 terms, e.g., Set→Kind, ∀→forall, ∃→exists)
|
||||
- 3.2 Tier 2: Data-oriented pipeline terms (~12 terms)
|
||||
- 3.3 Tier 3: Type-theoretic primitives (~10 terms)
|
||||
- 3.4 Tier 4: AI-fuzzing tolerance terms (~8 terms)
|
||||
- 3.5 Boundedness rules (∞_val banned, ∞_proc allowed, ∞_card banned)
|
||||
|
||||
## 4. The 3 Noise-Dedup Maps
|
||||
- 4.1 Proofs = Programs = Computations
|
||||
- 4.2 Sets = Kinds = Types
|
||||
- 4.3 Functions = Procedures = Words
|
||||
- 4.4 (additional maps discovered from samples)
|
||||
|
||||
## 5. The Form-Anchor Rule
|
||||
- Every re-encoding must have a form anchor: "what bounded form does this project from the indefinite?"
|
||||
- Examples from the user's samples
|
||||
|
||||
## 6. The Etymology Rule
|
||||
- Every new term has a 1-line origin + 1-line definition history
|
||||
- Examples from the user's samples
|
||||
|
||||
## 7. Sample Transformations
|
||||
- 3-5 canonical "before/after" pairs drawn from the user's samples
|
||||
- Each pair: original notation → re-encoded form, with the lexicon terms used
|
||||
|
||||
## 8. Connection to the 3 Phase Children
|
||||
- 8.1 Phase 1 (lexicon): consumes this report, refines into a codified spec
|
||||
- 8.2 Phase 2 (pilot): consumes the refined lexicon, applies to 2 videos
|
||||
- 8.3 Phase 3 (apply): consumes the pilot's refined lexicon, applies to 10 + synthesis
|
||||
|
||||
## Appendix A. Provenance
|
||||
- Every claim cites: the user's directive, the sample file, or the prior-art reference
|
||||
```
|
||||
|
||||
**Target:** ~1000-3000 LOC. Smaller than `intent_dsl_survey_20260612/report_v1.2.md` (1343 lines) because the prior-art survey is shorter here (the philosophy is the user's, not derived from external sources).
|
||||
|
||||
### FR5. Deliverable 2: `prompt_template.md`
|
||||
|
||||
**Structure (LLM-direct, self-contained):**
|
||||
|
||||
```markdown
|
||||
# De-obfuscation Prompt Template (v1, 2026-06-21)
|
||||
|
||||
> Use this template to de-obfuscate a Pass 1 video report.
|
||||
> Reference: report.md (the design doc) for the full lexicon.
|
||||
> Reference: samples/ (gitignored, the user's past de-obfuscations) for examples.
|
||||
|
||||
## Your role
|
||||
You are a de-obfuscator. Your task: take a Pass 1 report (full of standard math notation + verbose verbiage) and produce a 3-layer de-obfuscated deliverable.
|
||||
|
||||
## Input
|
||||
- <path/to/pass1-report.md> (e.g., conductor/tracks/video_analysis_<slug>_20260621/report.md)
|
||||
- <path/to/summary.md> (optional, for cross-referencing)
|
||||
|
||||
## Output (3 files in <output-dir>/<slug>/)
|
||||
- <slug>_translation.md (side-by-side table)
|
||||
- <slug>_deobfuscated.md (replacement; re-encoded form replaces the original)
|
||||
- <slug>_decoder.md (per-term decoder with form anchor + etymology + definition history)
|
||||
|
||||
## Lexicon (reference report.md §3 for full)
|
||||
[concise version: Tier 1-4 term tables]
|
||||
|
||||
## The 4 Rules
|
||||
1. Boundedness: every value is a finite form. `∞_val` is banned; `∞_proc` is allowed (as `Stream A = nat -> A`).
|
||||
2. Form anchor: every re-encoding must have a "what bounded form does this project from the indefinite?"
|
||||
3. Etymology: every new term gets a 1-line origin + 1-line definition history in the decoder.
|
||||
4. Lossless: every Pass 1 concept is represented. If a concept can't be bounded, mark it "indefinite — see original" rather than forcing a translation.
|
||||
|
||||
## The 3 Noise-Dedup Maps (apply automatically)
|
||||
- Proofs = Programs = Computations (use the chosen term per the sample's style)
|
||||
- Sets = Kinds = Types
|
||||
- Functions = Procedures = Words
|
||||
|
||||
## The 3-Layer Format
|
||||
### Layer 1: Translation (side-by-side table)
|
||||
| # | Original Section | Original Expression | Re-encoded Form | Form Anchor |
|
||||
| ... |
|
||||
|
||||
### Layer 2: De-obfuscated (replacement)
|
||||
[Same 8-section structure as Pass 1, but with re-encoded math]
|
||||
|
||||
### Layer 3: Decoder (per-term)
|
||||
## Term: <name>
|
||||
- Original notation: ...
|
||||
- Re-encoded: ...
|
||||
- Form anchor: ...
|
||||
- Etymology: ...
|
||||
- Definition history: ...
|
||||
- Source sections in original: ...
|
||||
|
||||
## Verification
|
||||
After producing the 3 files, verify:
|
||||
- [ ] Lossless (no Pass 1 concept dropped)
|
||||
- [ ] Bounded (no ∞_val or ∞_card)
|
||||
- [ ] Constructively typed (every expression has a type)
|
||||
- [ ] Etymology-cited (every new term has the 1-line origin + 1-line definition history)
|
||||
|
||||
## Example transformations (drawn from samples)
|
||||
[3-5 canonical before/after pairs]
|
||||
```
|
||||
|
||||
**Target:** ~200-500 LOC. The template is dense (every word is operational).
|
||||
|
||||
### FR6. Dependency
|
||||
|
||||
- **`blocked_by`:** the user providing samples (USER action item, Phase 0).
|
||||
- **`blocks`:** the 3 phase children (lexicon, pilot, apply) all depend on this warmup's `report.md` + `prompt_template.md`.
|
||||
|
||||
---
|
||||
|
||||
## 5. Non-Functional Requirements
|
||||
|
||||
- **TDD not applicable** — research-only track; no automated tests.
|
||||
- **Per-task atomic commits** — each deliverable (`report.md` + `prompt_template.md`) is committed in its own commit.
|
||||
- **Git notes** — each commit gets a note summarizing what was produced.
|
||||
- **No day estimates** — scope measured in files/sites per `conductor/workflow.md`.
|
||||
- **No `src/` changes** — research-only.
|
||||
|
||||
---
|
||||
|
||||
## 6. Out of Scope (Explicit)
|
||||
|
||||
- **Lexicon refinement** — handled by the lexicon child track (`video_analysis_deob_lexicon_20260621/`).
|
||||
- **Application to Pass 1 reports** — handled by the pilot + apply child tracks.
|
||||
- **Pass 3 (projection to applied domain)** — future, user-led.
|
||||
- **Interpreter for the de-obfuscation DSL** — out of scope. The LLM is the executor.
|
||||
- **Modifying `src/*.py` files in manual_slop** — research-only.
|
||||
- **Adding `pyproject.toml` dependencies** — all work is research (markdown files).
|
||||
|
||||
---
|
||||
|
||||
## 7. Architecture Reference
|
||||
|
||||
This track does not modify the manual_slop application architecture. The architecture refs that DO apply:
|
||||
|
||||
- **Research-track precedent:** `conductor/tracks/intent_dsl_survey_20260612/` (multi-section report + provenance per claim)
|
||||
- **Track convention:** `conductor/workflow.md` "Standard Task Workflow" + per-task commit discipline
|
||||
- **Multi-pass framing:** `conductor/tracks/video_analysis_campaign_20260621/spec.md` §0, §11
|
||||
- **De-obfuscation design:** `conductor/tracks/video_analysis_deob_20260621/spec.md`
|
||||
|
||||
---
|
||||
|
||||
## 8. Verification Criteria
|
||||
|
||||
The warmup is "done" when all of the following are true:
|
||||
|
||||
- [ ] `report.md` exists, is non-empty, follows the §FR4 structure, has provenance for every claim
|
||||
- [ ] `prompt_template.md` exists, is non-empty, follows the §FR5 structure, can be invoked as-is with a new report
|
||||
- [ ] At least 1 sample has been processed (the user has provided ≥1 sample)
|
||||
- [ ] The 4 verification criteria (lossless, bounded, constructively typed, etymology-cited) are documented in the template
|
||||
- [ ] The 3 noise-dedup maps (at minimum) are documented
|
||||
- [ ] Sample transformations (3-5 canonical before/after pairs) are included
|
||||
- [ ] User has reviewed and approved the deliverables
|
||||
- [ ] Both deliverables are committed (one commit per deliverable, per the per-task commit discipline)
|
||||
- [ ] Git notes attached to both commits
|
||||
- [ ] `state.toml` updated to `status = "completed"`
|
||||
|
||||
---
|
||||
|
||||
## 9. Risk Register
|
||||
|
||||
| ID | Title | Likelihood | Scope impact | Mitigation |
|
||||
|---|---|---|---|---|
|
||||
| R1 | User cannot provide samples in time | Medium | Warmup blocked | User can provide partial samples (1-2 examples); warmup can use them as a starter |
|
||||
| R2 | User's samples don't have enough de-obfuscation patterns | Medium | Thin lexicon | Phase 1 (lexicon) extends with constructive type theory defaults |
|
||||
| R3 | Report grows too large | Low | Hard to reference | Cap at 3000 LOC; defer deep examples to Appendix |
|
||||
| R4 | Template is too abstract (LLM doesn't follow it) | Medium | Apply phase produces poor de-obfuscations | Pilot (Phase 2) catches this; pilot_report.md captures refinements |
|
||||
| R5 | User's style is too idiosyncratic to formalize | Low | Lexicon is "in your head" only | Capture 1-2 example transformations explicitly; the rest can be implicit in the prompt template |
|
||||
|
||||
---
|
||||
|
||||
## 10. User Directives (recorded for next agent / future-self)
|
||||
|
||||
- **2026-06-21:** The user's de-obfuscation philosophy anchors (see umbrella spec §10 / §1.1 here).
|
||||
- **2026-06-21:** "I can provide samples of notes I've done but it will take time and might be best to leave to a 'warmup' track to gather and survey those, to then codify how this de-obfuscation via an llm following that within a track's plan would do."
|
||||
- **2026-06-21:** "Report + prompt template" (the warmup's output shape).
|
||||
|
||||
---
|
||||
|
||||
## 11. See Also
|
||||
|
||||
- [Umbrella spec.md](../video_analysis_deob_20260621/spec.md) — full design
|
||||
- [Umbrella README.md](../video_analysis_deob_20260621/README.md) — child index
|
||||
- [Pass 1 campaign spec §0, §11](../video_analysis_campaign_20260621/spec.md) — multi-pass framing + Pass 2 handoff (now superseded by the umbrella above)
|
||||
- [intent_dsl_survey_20260612/report_v1.2.md](../intent_dsl_survey_20260612/report_v1.2.md) — sibling DSL; same report structure
|
||||
- `conductor/workflow.md` "Standard Task Workflow" + "Tier 1 Track Initialization Rules"
|
||||
- `conductor/code_styleguides/agent_memory_dimensions.md` — Pass 2 produces a "knowledge" memory artifact
|
||||
@@ -0,0 +1,59 @@
|
||||
# Track state for video_analysis_deob_warmup_20260621
|
||||
# Updated by Tier 2 Tech Lead during execution
|
||||
|
||||
[meta]
|
||||
track_id = "video_analysis_deob_warmup_20260621"
|
||||
name = "Video Analysis De-obfuscation Warmup (Pass 2 precursor)"
|
||||
status = "active"
|
||||
current_phase = 0 # Phase 0 = waiting for user samples
|
||||
last_updated = "2026-06-21"
|
||||
|
||||
[blocked_by]
|
||||
# User action item: user must provide 3-10 samples of past de-obfuscation notes in samples/
|
||||
|
||||
[blocks]
|
||||
video_analysis_deob_lexicon_20260621 = "blocked (consumes report.md + prompt_template.md)"
|
||||
video_analysis_deob_pilot_20260621 = "blocked (consumes report.md + prompt_template.md)"
|
||||
video_analysis_deob_apply_20260621 = "blocked (consumes report.md + prompt_template.md)"
|
||||
|
||||
[phases]
|
||||
phase_0 = { status = "in_progress", checkpointsha = "", name = "User samples provided (USER action item)" }
|
||||
phase_1 = { status = "pending", checkpointsha = "", name = "Survey the samples (Tier 3 worker)" }
|
||||
phase_2 = { status = "pending", checkpointsha = "", name = "Write report.md (the design doc)" }
|
||||
phase_3 = { status = "pending", checkpointsha = "", name = "Write prompt_template.md (the LLM operational spec)" }
|
||||
phase_4 = { status = "pending", checkpointsha = "", name = "User review + approval" }
|
||||
|
||||
[tasks]
|
||||
# Phase 0 (USER action)
|
||||
t0_1 = { status = "pending", commit_sha = "", description = "User gathers 3-10 samples of past de-obfuscation notes and places them in samples/. Format: any text (markdown, txt, mixed). Gitignored." }
|
||||
|
||||
# Phase 1 (survey)
|
||||
t1_1 = { status = "pending", commit_sha = "", description = "Tier 3 worker surveys the samples: term frequency, structural patterns, form projection heuristics, noise-dedup maps, etymology style, example transformations" }
|
||||
|
||||
# Phase 2 (report.md)
|
||||
t2_1 = { status = "pending", commit_sha = "", description = "Write report.md (~1000-3000 LOC) following §FR4 structure: philosophy + prior art + lexicon (4 tiers) + 3 dedup maps + form-anchor rule + etymology rule + sample transformations + connection to phase children + provenance appendix" }
|
||||
t2_2 = { status = "pending", commit_sha = "", description = "Commit report.md with git note summarizing the lexicon + dedup maps discovered" }
|
||||
|
||||
# Phase 3 (prompt_template.md)
|
||||
t3_1 = { status = "pending", commit_sha = "", description = "Write prompt_template.md (~200-500 LOC) following §FR5 structure: role + input + output (3-layer) + lexicon + 4 rules + 3 dedup maps + 3-layer format + verification + example transformations" }
|
||||
t3_2 = { status = "pending", commit_sha = "", description = "Commit prompt_template.md with git note summarizing the template's operational scope" }
|
||||
|
||||
# Phase 4 (user review)
|
||||
t4_1 = { status = "pending", commit_sha = "", description = "User reviews both deliverables. Approves or iterates (loop back to Phase 2 or 3)" }
|
||||
t4_2 = { status = "pending", commit_sha = "", description = "Update state.toml to status = 'completed'" }
|
||||
|
||||
[verification]
|
||||
samples_provided = false
|
||||
report_md_committed = false
|
||||
prompt_template_md_committed = false
|
||||
user_approved = false
|
||||
state_toml_completed = false
|
||||
|
||||
[user_directives_logged]
|
||||
unorthodox_curation = "Per user 2026-06-21: 'I have a very unorthodox take for how I curate knowledge, especially formal knowledge in the math and sciences.'"
|
||||
constructive_type_theory = "Per user 2026-06-21: 'I like Norman Wildberger's work. And I like the constructivist current progress on type theories as a foundational system.'"
|
||||
bounded_for_knowledge = "Per user 2026-06-21: 'No observer or mechanism or construct can be infinite in resolution or quantification. To have distinction must have a bounds.'"
|
||||
cycles_iteration_allowed = "Per user 2026-06-21: 'Infinite is okay well handled CORRECTLY... What can be indefinite is that can be subjected upon is that of cycles, that of iteration, that of repetition.'"
|
||||
warmup_evidence_based = "Per user 2026-06-21: 'I can provide samples of notes I've done but it will take time and might be best to leave to a warmup track to gather and survey those, to then codify how this de-obfuscation via an llm following that within a track's plan would do.'"
|
||||
report_plus_template = "Per user 2026-06-21: warmup output is report.md + prompt_template.md"
|
||||
no_day_estimates = "Per conductor/workflow.md Tier 1 Track Initialization Rules (added 2026-06-16). Scope measured in files/sites only."
|
||||
Reference in New Issue
Block a user