diff --git a/conductor/tracks/metadata_field_cache_20260624/metadata.json b/conductor/tracks/metadata_field_cache_20260624/metadata.json new file mode 100644 index 00000000..142b22c6 --- /dev/null +++ b/conductor/tracks/metadata_field_cache_20260624/metadata.json @@ -0,0 +1,68 @@ +{ + "track_id": "metadata_field_cache_20260624", + "name": "Child 3: Metadata Field Cache", + "track_type": "campaign_child", + "parent_campaign": "metadata_ssdl_defusing_20260624", + "created_date": "2026-06-24", + "branch": "master", + "depends_on": ["code_path_audit_20260607", "metadata_nil_sentinel_20260624", "metadata_generational_handle_20260624"], + "blocks_within_campaign": [], + "scope": { + "new_files": [ + "conductor/tracks/metadata_field_cache_20260624/spec.md", + "conductor/tracks/metadata_field_cache_20260624/plan.md", + "conductor/tracks/metadata_field_cache_20260624/metadata.json", + "conductor/tracks/metadata_field_cache_20260624/state.toml", + "tests/test_metadata_field_cache.py", + "docs/reports/TRACK_COMPLETION_metadata_field_cache_20260624.md" + ], + "modified_files": [ + "src/aggregate.py (MetadataFieldCache + field-access migrations)", + "src/ai_client.py (field-access migrations; specific files TBD by Tier 2)", + "conductor/tracks.md", + "docs/reports/campaign_measurements_20260624.md" + ], + "deleted_files": [] + }, + "estimated_effort": { + "method": "scope (per workflow.md §Tier 1 Track Initialization Rules). NO day estimates.", + "phase_1": "1 task: 4 behavioral tests", + "phase_2": "1 task: MetadataFieldCache + 123 field-access site migrations", + "phase_3": "1 task: 6 VCs + budget gate + TRACK_COMPLETION + state + tracks.md + campaign SHIPPED" + }, + "verification_criteria": [ + "VC1: MetadataFieldCache exists", + "VC2: Production code uses the cache at field-access sites", + "VC3: Behavioral test exists and passes", + "VC4: Budget gate met (drop >= 30% vs post-child-2 measurement)", + "VC5: Full test suite remains green (11/11 tiers PASS)", + "VC6: 4 audit gates remain clean" + ], + "known_issues": [], + "deferred_to_followup_tracks": [], + "regressions_and_pre_existing_failures": [], + "pre_existing_failures_remaining": [], + "risk_register": [ + { + "id": "risk-1", + "description": "Cache invalidation is wrong (stale values returned)", + "likelihood": "medium", + "impact": "Production code returns stale Metadata values", + "mitigation": "Cache keyed by handle; when underlying value changes, handle's generation bumps (via registry), invalidating cache entries" + }, + { + "id": "risk-2", + "description": "The 123 field-access sites are not actually 123 (audit was wrong)", + "likelihood": "low", + "impact": "Migration scope unclear; budget gate measurement is invalid", + "mitigation": "Re-run detect_access_pattern_evidence after migration to count actual sites" + }, + { + "id": "risk-3", + "description": "Budget gate fails (drop < 30%)", + "likelihood": "low", + "impact": "Child 3 cannot ship; campaign pauses", + "mitigation": "Cache collapses 123 lookups to 1 lookup each; expected large drop" + } + ] +} \ No newline at end of file diff --git a/conductor/tracks/metadata_field_cache_20260624/plan.md b/conductor/tracks/metadata_field_cache_20260624/plan.md new file mode 100644 index 00000000..b39df841 --- /dev/null +++ b/conductor/tracks/metadata_field_cache_20260624/plan.md @@ -0,0 +1,86 @@ +# Plan: metadata_field_cache_20260624 + +3 tasks, 3 atomic commits. Same pattern as children 1 and 2. + +## Phase 1: Behavioral Test (1 task) + +- [ ] Task 1.1: Write `tests/test_metadata_field_cache.py`. + - WHERE: New file `tests/test_metadata_field_cache.py` + - WHAT: 4 tests (per spec FR3) + - HOW: Use 1-space indentation + - SAFETY: Test imports `MetadataFieldCache` + `MetadataHandle` from their production locations + - COMMIT: `test(metadata): behavioral test for field cache (MetadataFieldCache)` + - GIT NOTE: 4 tests; will be RED until Phase 2 ships + - VERIFY: `uv run pytest tests/test_metadata_field_cache.py -v` shows 4/4 FAIL (expected) + +## Phase 2: Implementation (1 task) + +- [ ] Task 2.1: Add `MetadataFieldCache` + migrate the 123 field-access sites. + - WHERE: `src/aggregate.py` (cache type) + the 123 field-access sites across `src/` + - WHAT: + - Add `class MetadataFieldCache` with `get`, `set`, `invalidate` methods + - Migrate the 123 `entry.get('key', default)` and `entry['key']` sites to use the cache + - HOW: Use `manual-slop_py_add_def` for the cache type; `manual-slop_edit_file` for the migration sites + - SAFETY: Verify with `ast.parse`; run the 4 tests + the child-1 + child-2 tests + the 14 previously-failing tests + - COMMIT: `feat(metadata): MetadataFieldCache + 123 field-access site migrations` + - GIT NOTE: 123 sites now use cache; cache invalidation on generation bump (child 2's registry) + - VERIFY: `uv run pytest tests/test_metadata_field_cache.py tests/test_metadata_generational_handle.py tests/test_metadata_nil_sentinel.py -v` shows all PASS + +## Phase 3: Verification + Budget Gate (1 task) + +- [ ] Task 3.1: Run all 6 VCs; capture the budget gate measurement; write end-of-campaign TRACK_COMPLETION. + - WHERE: All audit gates + test suite + SSDL measurement + campaign umbrella + - WHAT: + - Run VC1-VC6 for the child + - Compute the new effective-codepaths number + - Compare to post-child-2 measurement; require drop ≥ 30% + - Write the child's TRACK_COMPLETION report + - Update this track's `state.toml` to `status = "completed"`, all 3 phases completed + - Append to campaign_measurements_20260624.md + - Update conductor/tracks.md + - ALSO: write the campaign's end-of-campaign TRACK_COMPLETION at `docs/reports/TRACK_COMPLETION_metadata_ssdl_defusing_20260624.md` + - Update the campaign umbrella's `state.toml` to `status = "completed"`, all 4 phases completed + - HOW: Run each VC command, capture output, write the report. + - SAFETY: Same as children 1 and 2 + - COMMIT: 3 child commits (state, TRACK_COMPLETION, tracks.md) + 3 campaign commits (state, TRACK_COMPLETION, tracks.md) + - GIT NOTE: Per workflow.md + - VERIFY: All 6 VCs pass; budget gate met; campaign umbrella SHIPPED + +## Commit Log (Expected) + +1. `test(metadata): behavioral test for field cache` (Task 1.1) +2. `feat(metadata): MetadataFieldCache + 123 field-access site migrations` (Task 2.1) +3. `conductor(state): metadata_field_cache_20260624 SHIPPED` (Task 3.1) +4. `docs(reports): TRACK_COMPLETION for metadata_field_cache_20260624` (Task 3.1) +5. `conductor(tracks): add metadata_field_cache_20260624 row` (Task 3.1) +6. `docs(reports): TRACK_COMPLETION for metadata_ssdl_defusing_20260624` (Task 3.1) +7. `conductor(state): metadata_ssdl_defusing_20260624 SHIPPED` (Task 3.1) +8. `conductor(tracks): add metadata_ssdl_defusing_20260624 row + 3 child rows` (Task 3.1) + +## Verification Commands + +```bash +# VC1: cache type exists +grep -rn "class MetadataFieldCache" src/ + +# VC2: production uses cache +grep -rn "field_cache.get\|field_cache.set" src/ + +# VC3: tests pass +uv run pytest tests/test_metadata_field_cache.py -v + +# VC4: budget gate (final measurement) +uv run python -c "from src.code_path_audit_ssdl import compute_effective_codepaths; ...; print(compute_effective_codepaths(metadata_profile, 'src'))" + +# VC5: full test suite +uv run python scripts/run_tests_batched.py + +# VC6: 4 audit gates +uv run python scripts/audit_weak_types.py --strict +uv run python scripts/generate_type_registry.py --check +uv run python scripts/audit_main_thread_imports.py +uv run python scripts/audit_no_models_config_io.py + +# Campaign-wide +cat docs/reports/TRACK_COMPLETION_metadata_ssdl_defusing_20260624.md +``` \ No newline at end of file diff --git a/conductor/tracks/metadata_field_cache_20260624/spec.md b/conductor/tracks/metadata_field_cache_20260624/spec.md new file mode 100644 index 00000000..5dcbb0ad --- /dev/null +++ b/conductor/tracks/metadata_field_cache_20260624/spec.md @@ -0,0 +1,114 @@ +# Track Specification: metadata_field_cache_20260624 + +## Overview + +Child 3 of the `metadata_ssdl_defusing_20260624` campaign. Introduces `MetadataFieldCache` keyed by `(handle.index, field_name)`. The 123 string-keyed `entry.get('key', default)` field-access sites become 123 cache lookups. **BLOCKED_BY child 2** (the handle provides the stable cache key). + +## Current State Audit (master @ child-2-SHIPPED, after metadata_generational_handle_20260624) + +- `NIL_METADATA` sentinel exists (from child 1) +- `MetadataHandle` + `MetadataHandleRegistry` exist (from child 2) +- The 123 field-access sites in `src/aggregate.py`, `src/ai_client.py`, and other production files use `entry.get('key', default)` or `entry['key']` patterns + +## Goals + +| ID | Goal | Acceptance | +|---|---|---| +| G1 | `MetadataFieldCache` introduced: keyed by `(handle.index, field_name)` | The cache type is exported; can be created, queried, and invalidated | +| G2 | The 123 field-access sites use the cache | `grep -rn "MetadataFieldCache\|field_cache.get\|field_cache.set" src/` returns ≥ 1 hit per site | +| G3 | 1 behavioral test for the cache | `tests/test_metadata_field_cache.py` exists; asserts hit, miss, invalidation via generation bump | +| G4 | Budget gate met: effective-codepaths drop ≥ 30% vs post-child-2 measurement | Re-measurement shows the drop | + +## Non-Goals + +- Touching the 4 other `dict[str, Any]` aliases — out of scope +- Touching the list-typed aggregates — out of scope +- Replacing the 3 candidate placeholders — blocked on `any_type_componentization_20260621` +- Cache for the inner Metadata values within nested structures (the campaign handles the top-level Metadata) +- Eviction policy (LRU, etc.) — the cache is small (123 entries) and keyed by handle; no eviction needed + +## Functional Requirements + +### FR1: Cache type + +In a sensible location (likely `src/aggregate.py`): + +```python +class MetadataFieldCache: + def __init__(self) -> None: + self._cache: dict[tuple[int, str], Any] = {} + self._generations: dict[int, int] = {} # mirrors the registry + + def get(self, handle: MetadataHandle, field_name: str) -> Any: + # if handle.generation != self._generations[handle.index], return None (sentinel equivalent) + # otherwise return self._cache.get((handle.index, field_name), NIL) + + def set(self, handle: MetadataHandle, field_name: str, value: Any) -> None: + # if handle.generation != self._generations[handle.index], ignore (stale write) + # otherwise self._cache[(handle.index, field_name)] = value + + def invalidate(self, index: int) -> None: + # clear all entries for this index; bump generation +``` + +(Exact API up to Tier 2; the contract is: cache keyed by `(index, field_name)`, generation-aware invalidation.) + +### FR2: Migrate the 123 field-access sites + +For each `entry.get('key', default)` or `entry['key']` site in the production code: +- Replace with `cache.get(handle, 'key') or NIL_METADATA.get('key', default)` (or similar; the exact pattern depends on whether the cache returns the raw value or the cached Metadata) +- Alternative: the cache stores the entire Metadata (or per-field values), and consumers request `(handle, 'field_name')` + +The exact migration pattern is up to Tier 2. The acceptance criterion is that the 123 sites are migrated. + +### FR3: Behavioral test + +`tests/test_metadata_field_cache.py` with at least 4 tests: +- `test_cache_hit`: `cache.get(handle, 'key')` after `cache.set(handle, 'key', value)` returns value +- `test_cache_miss`: `cache.get(handle, 'key')` without prior set returns NIL (or None) +- `test_cache_invalidation_on_bump`: after `cache.invalidate(handle.index)`, `cache.get(handle, 'key')` returns NIL +- `test_cache_stale_write_ignored`: `cache.set(stale_handle, 'key', value)` does not write + +## Non-Functional Requirements + +- NFR1: 1-space indentation +- NFR2: CRLF line endings on Windows +- NFR3: No comments in source code +- NFR4: Per-task atomic commits with git notes +- NFR5: No new pip dependencies +- NFR6: `Result[T]` returns for fallible cache methods +- NFR7: No new `src/.py` files + +## Architecture Reference + +- `NIL_METADATA` (child 1) — the sentinel returned by `cache.get` on miss or stale write +- `MetadataHandle` + `MetadataHandleRegistry` (child 2) — the handle provides the cache key +- `docs/reports/code_path_audit/2026-06-22/AUDIT_REPORT.md` Finding 1 Fix 2 — the Immediate-Mode Cache proposal +- `src/code_path_audit_ssdl.py` — the SSDL infrastructure used to measure progress +- `conductor/code_styleguides/data_oriented_design.md` — canonical DOD reference + +## Out of Scope + +- The 4 other `dict[str, Any]` aliases (deferred) +- The 3 candidate placeholders (blocked) +- Runtime profiling (Track F) +- Eviction policy (the cache is small) + +## Verification Criteria (Definition of Done) + +| # | Criterion | Verification command | +|---|---|---| +| VC1 | `MetadataFieldCache` exists | `grep -rn "class MetadataFieldCache" src/` | +| VC2 | Production code uses the cache at field-access sites | `grep -rn "field_cache.get\|field_cache.set" src/` returns ≥ 1 hit | +| VC3 | Behavioral test exists and passes | `uv run pytest tests/test_metadata_field_cache.py -v` | +| VC4 | Budget gate met | `compute_effective_codepaths(Metadata_profile)` returns number ≥ 30% smaller than post-child-2 measurement | +| VC5 | Full test suite remains green | `uv run python scripts/run_tests_batched.py` → 11/11 tiers PASS | +| VC6 | 4 audit gates remain clean | weak_types ≤ 112, type_registry in sync, main_thread_imports clean, no_models_config_io clean | + +## Risks + +| # | Risk | Likelihood | Mitigation | +|---|---|---|---| +| R1 | Cache invalidation is wrong (stale values returned) | medium | Cache is keyed by handle; when the underlying value changes, the handle's generation bumps (via the registry), invalidating cache entries. Behavioral test verifies the invalidation path. | +| R2 | The 123 field-access sites are not actually 123 (audit was wrong) | low | Re-run `src.code_path_audit.detect_access_pattern_evidence` after migration to count the actual sites | +| R3 | Budget gate fails (drop < 30%) | low | The cache collapses 123 lookups to 1 lookup each; expected to be a large drop. If not, investigate. | \ No newline at end of file diff --git a/conductor/tracks/metadata_field_cache_20260624/state.toml b/conductor/tracks/metadata_field_cache_20260624/state.toml new file mode 100644 index 00000000..0dbf89a5 --- /dev/null +++ b/conductor/tracks/metadata_field_cache_20260624/state.toml @@ -0,0 +1,44 @@ +# Track state for metadata_field_cache_20260624 +# Child 3 of metadata_ssdl_defusing_20260624 campaign. +# 3 phases, 3 tasks. BLOCKED_BY children 1 and 2. + +[meta] +track_id = "metadata_field_cache_20260624" +name = "Child 3: Metadata Field Cache" +status = "active" +current_phase = 0 +last_updated = "2026-06-24" + +[parent] +parent_campaign = "metadata_ssdl_defusing_20260624" + +[blocked_by] +code_path_audit_20260607 = "shipped" +metadata_nil_sentinel_20260624 = "shipped" +metadata_generational_handle_20260624 = "shipped" + +[blocks] +# This child blocks nothing within the campaign (it's the last child) + +[phases] +phase_1 = { status = "pending", checkpointsha = "", name = "Behavioral Test" } +phase_2 = { status = "pending", checkpointsha = "", name = "Implementation (Cache + 123 migrations)" } +phase_3 = { status = "pending", checkpointsha = "", name = "Verification + Budget Gate + Campaign SHIPPED" } + +[tasks] +t1_1 = { status = "pending", commit_sha = "", description = "Write tests/test_metadata_field_cache.py with 4 tests (red)" } +t2_1 = { status = "pending", commit_sha = "", description = "Add MetadataFieldCache + migrate 123 field-access sites" } +t3_1 = { status = "pending", commit_sha = "", description = "Run all 6 VCs; capture budget gate; write child TRACK_COMPLETION; write campaign TRACK_COMPLETION; update both state.toml + tracks.md" } + +[verification] +vc1_field_cache_exists = false +vc2_production_uses_cache = false +vc3_behavioral_test_passes = false +vc4_budget_gate_met = false +vc5_full_test_suite_green = false +vc6_audit_gates_clean = false + +[budget_gate] +baseline = "post_child_2_measurement" +expected_drop_pct = 30 +post_child_3_measurement = null \ No newline at end of file diff --git a/conductor/tracks/metadata_generational_handle_20260624/metadata.json b/conductor/tracks/metadata_generational_handle_20260624/metadata.json new file mode 100644 index 00000000..76070555 --- /dev/null +++ b/conductor/tracks/metadata_generational_handle_20260624/metadata.json @@ -0,0 +1,68 @@ +{ + "track_id": "metadata_generational_handle_20260624", + "name": "Child 2: Metadata Generational Handle", + "track_type": "campaign_child", + "parent_campaign": "metadata_ssdl_defusing_20260624", + "created_date": "2026-06-24", + "branch": "master", + "depends_on": ["code_path_audit_20260607", "metadata_nil_sentinel_20260624"], + "blocks_within_campaign": ["metadata_field_cache_20260624"], + "scope": { + "new_files": [ + "conductor/tracks/metadata_generational_handle_20260624/spec.md", + "conductor/tracks/metadata_generational_handle_20260624/plan.md", + "conductor/tracks/metadata_generational_handle_20260624/metadata.json", + "conductor/tracks/metadata_generational_handle_20260624/state.toml", + "tests/test_metadata_generational_handle.py", + "docs/reports/TRACK_COMPLETION_metadata_generational_handle_20260624.md" + ], + "modified_files": [ + "src/aggregate.py (MetadataHandle + Registry types + lifetime-check migrations)", + "src/ai_client.py (lifetime-check migrations; specific files TBD by Tier 2)", + "conductor/tracks.md", + "docs/reports/campaign_measurements_20260624.md" + ], + "deleted_files": [] + }, + "estimated_effort": { + "method": "scope (per workflow.md §Tier 1 Track Initialization Rules). NO day estimates.", + "phase_1": "1 task: 4 behavioral tests", + "phase_2": "1 task: MetadataHandle + Registry + lifetime-check migrations", + "phase_3": "1 task: 6 VCs + budget gate + TRACK_COMPLETION + state + tracks.md" + }, + "verification_criteria": [ + "VC1: MetadataHandle + MetadataHandleRegistry exist", + "VC2: Production code uses handle + registry at entry points", + "VC3: Behavioral test exists and passes", + "VC4: Budget gate met (drop >= 20% vs post-child-1 measurement)", + "VC5: Full test suite remains green (11/11 tiers PASS)", + "VC6: 4 audit gates remain clean" + ], + "known_issues": [], + "deferred_to_followup_tracks": [], + "regressions_and_pre_existing_failures": [], + "pre_existing_failures_remaining": [], + "risk_register": [ + { + "id": "risk-1", + "description": "The handle breaks code that expects raw Metadata", + "likelihood": "medium", + "impact": "Production code that does entry['key'] directly fails", + "mitigation": "Handle is a wrapper; consumers can extract raw value via .value; behavioral test verifies backwards-compat" + }, + { + "id": "risk-2", + "description": "The registry's lookup is not actually O(1)", + "likelihood": "low", + "impact": "Cache pattern in child 3 doesn't work as expected", + "mitigation": "Registry uses dict[int, int]; lookup is O(1) by construction" + }, + { + "id": "risk-3", + "description": "Budget gate fails (drop < 20%)", + "likelihood": "low", + "impact": "Child 2 cannot ship; campaign pauses", + "mitigation": "3466 branch points include lifetime checks; replacing with handle lookup should drop the count" + } + ] +} \ No newline at end of file diff --git a/conductor/tracks/metadata_generational_handle_20260624/plan.md b/conductor/tracks/metadata_generational_handle_20260624/plan.md new file mode 100644 index 00000000..3e017fb3 --- /dev/null +++ b/conductor/tracks/metadata_generational_handle_20260624/plan.md @@ -0,0 +1,79 @@ +# Plan: metadata_generational_handle_20260624 + +3 tasks, 3 atomic commits. Same pattern as child 1. + +## Phase 1: Behavioral Test (1 task) + +- [ ] Task 1.1: Write `tests/test_metadata_generational_handle.py`. + - WHERE: New file `tests/test_metadata_generational_handle.py` + - WHAT: 4 tests (per spec FR3) + - HOW: Use 1-space indentation + - SAFETY: Test imports `MetadataHandle` and `MetadataHandleRegistry` from their production location + - COMMIT: `test(metadata): behavioral test for generational handle (MetadataHandle + Registry)` + - GIT NOTE: 4 tests; will be RED until Phase 2 ships + - VERIFY: `uv run pytest tests/test_metadata_generational_handle.py -v` shows 4/4 FAIL (expected) + +## Phase 2: Implementation (1 task) + +- [ ] Task 2.1: Add `MetadataHandle` + `MetadataHandleRegistry` + migrate lifetime-check code. + - WHERE: `src/aggregate.py` (or another existing module; no new `src/.py` files) + - WHAT: + - Add `@dataclass(frozen=True) class MetadataHandle` with `index: int` and `generation: int` + - Add `class MetadataHandleRegistry` with `register`, `lookup`, `bump_generation` methods + - Migrate lifetime-check sites in `src/aggregate.py`, `src/ai_client.py`, and any other production files + - HOW: Use `manual-slop_py_add_def` for the new types; `manual-slop_edit_file` for the migration sites + - SAFETY: Verify with `ast.parse`; run the 4 tests + the child-1 tests + the 14 previously-failing tests + - COMMIT: `feat(metadata): MetadataHandle + Registry + lifetime-check migrations` + - GIT NOTE: Lifetime checks now go through registry.lookup; generation mismatch returns NIL_METADATA from child 1 + - VERIFY: `uv run pytest tests/test_metadata_generational_handle.py tests/test_metadata_nil_sentinel.py -v` shows all PASS + +## Phase 3: Verification + Budget Gate (1 task) + +- [ ] Task 3.1: Run all 6 VCs; capture the budget gate measurement. + - WHERE: All audit gates + test suite + SSDL measurement + - WHAT: + - Run VC1-VC6 + - Compute the new effective-codepaths number + - Compare to post-child-1 measurement; require drop ≥ 20% + - Write TRACK_COMPLETION report + - Update state.toml to `status = "completed"`, all 3 phases completed + - Append to campaign_measurements_20260624.md + - Update conductor/tracks.md + - HOW: Same as child 1 phase 3 + - SAFETY: Same as child 1 + - COMMIT: 3 commits (state, TRACK_COMPLETION, tracks.md) + - GIT NOTE: Per workflow.md + - VERIFY: All 6 VCs pass; budget gate met; campaign unblocked for child 3 + +## Commit Log (Expected) + +1. `test(metadata): behavioral test for generational handle` (Task 1.1) +2. `feat(metadata): MetadataHandle + Registry + lifetime-check migrations` (Task 2.1) +3. `conductor(state): metadata_generational_handle_20260624 SHIPPED` (Task 3.1) +4. `docs(reports): TRACK_COMPLETION for metadata_generational_handle_20260624` (Task 3.1) +5. `conductor(tracks): add metadata_generational_handle_20260624 row` (Task 3.1) + +## Verification Commands + +```bash +# VC1: types exist +grep -rn "class MetadataHandle\|class MetadataHandleRegistry" src/ + +# VC2: production uses handle +grep -rn "registry.lookup\|registry.register" src/ + +# VC3: tests pass +uv run pytest tests/test_metadata_generational_handle.py -v + +# VC4: budget gate +uv run python -c "from src.code_path_audit_ssdl import compute_effective_codepaths; ...; print(compute_effective_codepaths(metadata_profile, 'src'))" + +# VC5: full test suite +uv run python scripts/run_tests_batched.py + +# VC6: 4 audit gates +uv run python scripts/audit_weak_types.py --strict +uv run python scripts/generate_type_registry.py --check +uv run python scripts/audit_main_thread_imports.py +uv run python scripts/audit_no_models_config_io.py +``` \ No newline at end of file diff --git a/conductor/tracks/metadata_generational_handle_20260624/spec.md b/conductor/tracks/metadata_generational_handle_20260624/spec.md new file mode 100644 index 00000000..066bd9b4 --- /dev/null +++ b/conductor/tracks/metadata_generational_handle_20260624/spec.md @@ -0,0 +1,115 @@ +# Track Specification: metadata_generational_handle_20260624 + +## Overview + +Child 2 of the `metadata_ssdl_defusing_20260624` campaign. Wraps `Metadata` in a `(index, generation)` handle resolved through a registry. Collapses lifetime branches to 1 lookup + 1 generation comparison. **BLOCKED_BY child 1** (the nil sentinel is the fallback path on generation mismatch). + +## Current State Audit (master @ child-1-SHIPPED, after metadata_nil_sentinel_20260624) + +- `NIL_METADATA` sentinel exists (from child 1) +- The 6 nil-check functions use sentinel-return +- The 3466 branch points in the parent audit include lifetime checks (e.g., "is this handle still valid?") +- `src/aggregate.py` and `src/ai_client.py` contain lifetime checks; they should be replaced with handle lookup + generation comparison + +## Goals + +| ID | Goal | Acceptance | +|---|---|---| +| G1 | `MetadataHandle` (or equivalent) introduced: a `(index: int, generation: int)` pair | The handle type is exported; can be created and queried | +| G2 | `MetadataHandleRegistry` (or equivalent) introduced: stores `index -> generation` mapping | The registry has O(1) lookup, bump-generation, and get-with-validation methods | +| G3 | Production `Metadata` is wrapped in a handle at the consumer entry points | Consumers can do `handle.registry_lookup()` instead of `if entry.lifetime != current_lifetime: ...` | +| G4 | 1 behavioral test for the handle | `tests/test_metadata_generational_handle.py` exists; asserts lookup, generation mismatch returns `NIL_METADATA`, bump invalidates cached lookups | +| G5 | Budget gate met: effective-codepaths drop ≥ 20% vs post-child-1 measurement | Re-measurement shows the drop | + +## Non-Goals + +- Touching the 4 other `dict[str, Any]` aliases — out of scope (deferred to follow-up campaign) +- Touching the list-typed aggregates — out of scope +- Replacing the 3 candidate placeholders — blocked on `any_type_componentization_20260621` +- Adding a generational handle for the inner Metadata values within nested structures (the campaign handles the top-level Metadata; nested handles are out of scope) +- Cache invalidation (that's child 3; this child just provides the identity) + +## Functional Requirements + +### FR1: Handle + Registry types + +In a sensible location (likely `src/aggregate.py` per AGENTS.md §File Naming Convention, OR a NEW module if the user explicitly approves): + +```python +@dataclass(frozen=True) +class MetadataHandle: + index: int + generation: int + +class MetadataHandleRegistry: + def __init__(self) -> None: + self._generations: dict[int, int] = {} + + def register(self, metadata: Metadata) -> MetadataHandle: + # returns a new handle with a fresh index + generation + + def lookup(self, handle: MetadataHandle) -> Metadata | None: + # if handle.generation != self._generations[handle.index], return None + # otherwise return the stored Metadata (or sentinel for now) + + def bump_generation(self, index: int) -> None: + # invalidate the cached entry for this index +``` + +(Exact API up to Tier 2; the contract is: handle + registry with O(1) lookup + generation-based invalidation.) + +### FR2: Migrate lifetime-branch code + +For each site in `src/` that does lifetime checks (e.g., "is this Metadata still the one I cached?"): +- Replace with `handle = registry.register(metadata)` + `value = registry.lookup(handle)` + `if value is None: use NIL_METADATA` + +### FR3: Behavioral test + +`tests/test_metadata_generational_handle.py` with at least 3 tests: +- `test_register_returns_handle`: assert `registry.register(metadata)` returns a `MetadataHandle` with distinct `index` and `generation` +- `test_lookup_returns_none_after_bump`: assert `registry.lookup(handle)` returns None after `registry.bump_generation(handle.index)` +- `test_lookup_returns_none_for_unknown_index`: assert `registry.lookup(MetadataHandle(index=999, generation=1))` returns None +- `test_lookup_returns_value_for_valid_handle`: assert `registry.lookup(handle)` returns the registered Metadata + +## Non-Functional Requirements + +- NFR1: 1-space indentation +- NFR2: CRLF line endings on Windows +- NFR3: No comments in source code +- NFR4: Per-task atomic commits with git notes +- NFR5: No new pip dependencies +- NFR6: `Result[T]` returns for fallible registry methods (per `conductor/code_styleguides/error_handling.md`) +- NFR7: No new `src/.py` files (per AGENTS.md) — unless the user explicitly approves; default is to put `MetadataHandle` + `MetadataHandleRegistry` in `src/aggregate.py` or another existing module + +## Architecture Reference + +- `src/aggregate.py` (the parent module for `Metadata`) +- `NIL_METADATA` (from child 1) — the fallback returned by `lookup` on generation mismatch +- `docs/reports/code_path_audit/2026-06-22/AUDIT_REPORT.md` Finding 1 Fix 3 — the Generational Handle proposal +- `conductor/code_styleguides/data_oriented_design.md` — canonical DOD reference + +## Out of Scope + +- The 4 other `dict[str, Any]` aliases (deferred) +- The 3 candidate placeholders (blocked) +- The cache (child 3) +- Runtime profiling (Track F from the previous menu; deferred) + +## Verification Criteria (Definition of Done) + +| # | Criterion | Verification command | +|---|---|---| +| VC1 | `MetadataHandle` and `MetadataHandleRegistry` exist | `grep -rn "class MetadataHandle\|class MetadataHandleRegistry" src/` | +| VC2 | Production code uses handle + registry at the entry points | `grep -rn "registry.lookup\|registry.register" src/` returns ≥ 1 hit | +| VC3 | Behavioral test exists and passes | `uv run pytest tests/test_metadata_generational_handle.py -v` | +| VC4 | Budget gate met | `compute_effective_codepaths(Metadata_profile)` returns number ≥ 20% smaller than post-child-1 measurement | +| VC5 | Full test suite remains green | `uv run python scripts/run_tests_batched.py` → 11/11 tiers PASS | +| VC6 | 4 audit gates remain clean | weak_types ≤ 112, type_registry in sync, main_thread_imports clean, no_models_config_io clean | + +## Risks + +| # | Risk | Likelihood | Mitigation | +|---|---|---|---| +| R1 | The handle breaks code that expects raw `Metadata` | medium | The handle is a wrapper; consumers can extract the raw value via `.value` or similar. Behavioral test verifies backwards-compat for the common cases. | +| R2 | The registry's lookup is not actually O(1) | low | The registry uses a `dict[int, int]` for `index -> generation`; lookup is O(1) by construction. | +| R3 | Budget gate fails (drop < 20%) | low | The 3466 branch points include lifetime checks; replacing with handle lookup should drop the count. If not, the SSDL math is wrong. | \ No newline at end of file diff --git a/conductor/tracks/metadata_generational_handle_20260624/state.toml b/conductor/tracks/metadata_generational_handle_20260624/state.toml new file mode 100644 index 00000000..b9569e7c --- /dev/null +++ b/conductor/tracks/metadata_generational_handle_20260624/state.toml @@ -0,0 +1,44 @@ +# Track state for metadata_generational_handle_20260624 +# Child 2 of metadata_ssdl_defusing_20260624 campaign. +# 3 phases, 3 tasks. BLOCKED_BY child 1. + +[meta] +track_id = "metadata_generational_handle_20260624" +name = "Child 2: Metadata Generational Handle" +status = "active" +current_phase = 0 +last_updated = "2026-06-24" + +[parent] +parent_campaign = "metadata_ssdl_defusing_20260624" + +[blocked_by] +code_path_audit_20260607 = "shipped" +metadata_nil_sentinel_20260624 = "shipped" + +[blocks] +# Within the campaign: +metadata_field_cache_20260624 = "pending child 2" + +[phases] +phase_1 = { status = "pending", checkpointsha = "", name = "Behavioral Test" } +phase_2 = { status = "pending", checkpointsha = "", name = "Implementation (Handle + Registry + migrations)" } +phase_3 = { status = "pending", checkpointsha = "", name = "Verification + Budget Gate" } + +[tasks] +t1_1 = { status = "pending", commit_sha = "", description = "Write tests/test_metadata_generational_handle.py with 4 tests (red)" } +t2_1 = { status = "pending", commit_sha = "", description = "Add MetadataHandle + MetadataHandleRegistry + migrate lifetime-check sites" } +t3_1 = { status = "pending", commit_sha = "", description = "Run all 6 VCs; capture budget gate measurement; write TRACK_COMPLETION; update state + tracks.md" } + +[verification] +vc1_handle_and_registry_exist = false +vc2_production_uses_registry = false +vc3_behavioral_test_passes = false +vc4_budget_gate_met = false +vc5_full_test_suite_green = false +vc6_audit_gates_clean = false + +[budget_gate] +baseline = "post_child_1_measurement" +expected_drop_pct = 20 +post_child_2_measurement = null \ No newline at end of file diff --git a/conductor/tracks/metadata_nil_sentinel_20260624/metadata.json b/conductor/tracks/metadata_nil_sentinel_20260624/metadata.json new file mode 100644 index 00000000..f646df74 --- /dev/null +++ b/conductor/tracks/metadata_nil_sentinel_20260624/metadata.json @@ -0,0 +1,68 @@ +{ + "track_id": "metadata_nil_sentinel_20260624", + "name": "Child 1: Metadata Nil Sentinel", + "track_type": "campaign_child", + "parent_campaign": "metadata_ssdl_defusing_20260624", + "created_date": "2026-06-24", + "branch": "master", + "depends_on": ["code_path_audit_20260607"], + "blocks_within_campaign": ["metadata_generational_handle_20260624"], + "scope": { + "new_files": [ + "conductor/tracks/metadata_nil_sentinel_20260624/spec.md", + "conductor/tracks/metadata_nil_sentinel_20260624/plan.md", + "conductor/tracks/metadata_nil_sentinel_20260624/metadata.json", + "conductor/tracks/metadata_nil_sentinel_20260624/state.toml", + "tests/test_metadata_nil_sentinel.py", + "docs/reports/TRACK_COMPLETION_metadata_nil_sentinel_20260624.md" + ], + "modified_files": [ + "src/aggregate.py (NIL_METADATA constant + nil-check migrations)", + "src/ai_client.py (nil-check migrations; specific files TBD by Tier 2)", + "conductor/tracks.md", + "docs/reports/campaign_measurements_20260624.md" + ], + "deleted_files": [] + }, + "estimated_effort": { + "method": "scope (per workflow.md §Tier 1 Track Initialization Rules). NO day estimates.", + "phase_1": "1 task: write the 2 behavioral tests", + "phase_2": "1 task: NIL_METADATA constant + 6 nil-check migrations", + "phase_3": "1 task: 6 VCs + budget gate + TRACK_COMPLETION + state + tracks.md" + }, + "verification_criteria": [ + "VC1: NIL_METADATA is defined in src/", + "VC2: The 6 nil-check functions no longer have the 3-pattern nil-check", + "VC3: Behavioral test exists and passes", + "VC4: Budget gate met (drop >= 10% vs 4.01e22 baseline)", + "VC5: Full test suite remains green (11/11 tiers PASS)", + "VC6: 4 audit gates remain clean" + ], + "known_issues": [], + "deferred_to_followup_tracks": [], + "regressions_and_pre_existing_failures": [], + "pre_existing_failures_remaining": [], + "risk_register": [ + { + "id": "risk-1", + "description": "The 6 nil-check functions are in unexpected locations", + "likelihood": "low", + "impact": "Phase 2 needs to find them via grep + detect_nil_check_pattern", + "mitigation": "Audit enumerated all consumer files; detect_nil_check_pattern is the oracle" + }, + { + "id": "risk-2", + "description": "NIL_METADATA defaults cause downstream bugs", + "likelihood": "low", + "impact": "Production code that uses the sentinel crashes", + "mitigation": "Empty defaults + behavioral test verifies sentinel works in a real call" + }, + { + "id": "risk-3", + "description": "Budget gate fails (drop < 10%)", + "likelihood": "low", + "impact": "Child 1 cannot ship; campaign pauses", + "mitigation": "The 6 nil-checks are documented; their removal MUST drop the branch count" + } + ] +} \ No newline at end of file diff --git a/conductor/tracks/metadata_nil_sentinel_20260624/plan.md b/conductor/tracks/metadata_nil_sentinel_20260624/plan.md new file mode 100644 index 00000000..f77e0871 --- /dev/null +++ b/conductor/tracks/metadata_nil_sentinel_20260624/plan.md @@ -0,0 +1,92 @@ +# Plan: metadata_nil_sentinel_20260624 + +3 tasks, 3 atomic commits. TDD: write the test first (red), implement (green), commit. + +## Phase 1: Behavioral Test (1 task) + +Focus: Write the failing test for the sentinel. + +- [ ] Task 1.1: Write `tests/test_metadata_nil_sentinel.py`. + - WHERE: New file `tests/test_metadata_nil_sentinel.py` + - WHAT: 2 tests: + - `test_nil_metadata_is_defined`: `from src.aggregate import NIL_METADATA; assert NIL_METADATA is not None; assert isinstance(NIL_METADATA, dict) or isinstance(NIL_METADATA, Metadata)` (depending on whether Metadata is a TypeAlias or class) + - `test_detect_nil_check_pattern_returns_false_for_migrated_functions`: import the 6 migrated functions; assert `detect_nil_check_pattern` returns False for each + - HOW: Use the existing `src/code_path_audit_ssdl.detect_nil_check_pattern` as the oracle. Use 1-space indentation. + - SAFETY: The test file imports the 6 functions. Identify them by running `grep` for `is None` patterns in `src/aggregate.py` and `src/ai_client.py`. If Tier 2 finds additional functions in other files, include them too. + - COMMIT: `test(metadata): behavioral test for nil sentinel (NIL_METADATA)` + - GIT NOTE: 2 tests, imports the 6 functions, asserts detect_nil_check_pattern returns False for each; will be RED until Phase 2 ships + - VERIFY: `uv run pytest tests/test_metadata_nil_sentinel.py -v` shows 2/2 FAIL (expected; the sentinel doesn't exist yet) + +## Phase 2: Implementation (1 task) + +Focus: Define `NIL_METADATA` and migrate the 6 functions. + +- [ ] Task 2.1: Add `NIL_METADATA` and migrate the 6 nil-check functions. + - WHERE: `src/aggregate.py` (NIL_METADATA constant) + the 6 files containing the nil-check functions (likely `src/aggregate.py` and `src/ai_client.py`) + - WHAT: + - Add `NIL_METADATA: Metadata = Metadata(...)` constant in `src/aggregate.py` (the defaults are safe; an empty `{}` if Metadata is a TypeAlias) + - For each of the 6 nil-check functions, replace the `if entry is None: ...` / `if entry == None: ...` / `if entry != None: ...` pattern with sentinel-return + - The most common pattern: `entry = entry or NIL_METADATA` at the top of the function (replaces the `if entry is None: return default` early-return) + - HOW: Use `manual-slop_edit_file` for each migration site. Use `manual-slop_py_add_def` for the `NIL_METADATA` constant. + - SAFETY: + - Verify with `ast.parse(open("src/aggregate.py").read())` + - Run `uv run pytest tests/test_metadata_nil_sentinel.py -v` → 2/2 PASS + - Run the 14 previously-failing tests from `fix_test_failures_20260624` → 14/14 PASS (no regression) + - COMMIT: `feat(metadata): NIL_METADATA sentinel + 6 nil-check migrations` + - GIT NOTE: 6 functions refactored to use sentinel-return; established the fallback that child 2's generation-mismatch path returns to + - VERIFY: `uv run pytest tests/test_metadata_nil_sentinel.py -v` shows 2/2 PASS + +## Phase 3: Verification + Budget Gate (1 task) + +Focus: Run all 6 VCs + the budget gate. + +- [ ] Task 3.1: Run all 6 VCs; capture the budget gate measurement. + - WHERE: All audit gates + test suite + SSDL measurement + - WHAT: + - Run VC1-VC6 (the 6 verification criteria from the spec) + - Compute the new effective-codepaths number: `uv run python -c "from src.code_path_audit_ssdl import compute_effective_codepaths; from src.code_path_audit import AggregateProfile, ...; profile = ...; print(compute_effective_codepaths(profile, 'src'))"` + - Compute the drop vs 4.01e22 baseline; if drop ≥ 10%, mark the budget gate as PASS + - Write the child's TRACK_COMPLETION report at `docs/reports/TRACK_COMPLETION_metadata_nil_sentinel_20260624.md` + - Update this track's `state.toml` to `status = "completed"`, `current_phase = "complete"`, all 3 phases `completed` + - Append the post-child-1 measurement to `docs/reports/campaign_measurements_20260624.md` (the campaign-level log) + - Update `conductor/tracks.md` to add a row for this child + - HOW: Run each VC command, capture output, write the report. + - SAFETY: The 2 pre-existing-violation audit gates (NG1, NG2 from `code_path_audit_polish_20260622`) are still out of scope. Do not regress them. + - COMMIT: 3 commits: `conductor(state): metadata_nil_sentinel_20260624 SHIPPED`, `docs(reports): TRACK_COMPLETION for metadata_nil_sentinel_20260624`, `conductor(tracks): add metadata_nil_sentinel_20260624 row` + - GIT NOTE: 1 per commit per workflow.md + - VERIFY: All 6 VCs pass; budget gate met (drop ≥ 10%); campaign unblocked for child 2 + +## Commit Log (Expected) + +1. `test(metadata): behavioral test for nil sentinel (NIL_METADATA)` (Task 1.1) +2. `feat(metadata): NIL_METADATA sentinel + 6 nil-check migrations` (Task 2.1) +3. `conductor(state): metadata_nil_sentinel_20260624 SHIPPED` (Task 3.1) +4. `docs(reports): TRACK_COMPLETION for metadata_nil_sentinel_20260624` (Task 3.1) +5. `conductor(tracks): add metadata_nil_sentinel_20260624 row` (Task 3.1) + +Plus per-task plan-update commits per the workflow. + +## Verification Commands (run at end of Phase 3) + +```bash +# VC1: NIL_METADATA defined +grep -rn "NIL_METADATA" src/ + +# VC2: detect_nil_check_pattern returns False for the 6 functions +uv run python -c "from src.code_path_audit_ssdl import detect_nil_check_pattern; from src.code_path_audit import FunctionRef; ...; [assert not detect_nil_check_pattern(f, 'src') for f in the_6_functions]" + +# VC3: behavioral test +uv run pytest tests/test_metadata_nil_sentinel.py -v + +# VC4: budget gate (measure and compare) +uv run python -c "from src.code_path_audit_ssdl import compute_effective_codepaths; ...; print(compute_effective_codepaths(metadata_profile, 'src'))" + +# VC5: full test suite +uv run python scripts/run_tests_batched.py + +# VC6: 4 audit gates +uv run python scripts/audit_weak_types.py --strict +uv run python scripts/generate_type_registry.py --check +uv run python scripts/audit_main_thread_imports.py +uv run python scripts/audit_no_models_config_io.py +``` \ No newline at end of file diff --git a/conductor/tracks/metadata_nil_sentinel_20260624/spec.md b/conductor/tracks/metadata_nil_sentinel_20260624/spec.md new file mode 100644 index 00000000..7c599bdd --- /dev/null +++ b/conductor/tracks/metadata_nil_sentinel_20260624/spec.md @@ -0,0 +1,98 @@ +# Track Specification: metadata_nil_sentinel_20260624 + +## Overview + +Child 1 of the `metadata_ssdl_defusing_20260624` campaign. Introduces `NIL_METADATA = Metadata(...)` sentinel and migrates the 6 nil-check functions identified by the parent audit. Establishes the fallback path that Child 2 (Generational Handle) returns to on generation mismatch. + +## Current State Audit (master @ 7a9261c4) + +- `src/aggregate.py` and `src/ai_client.py` contain 6 functions with `is None` / `== None` / `!= None` patterns on `Metadata` (or aliases of `dict[str, Any]`). These are detected by `src/code_path_audit_ssdl.detect_nil_check_pattern`. +- The parent audit (`docs/reports/code_path_audit/2026-06-22/AUDIT_REPORT.md` Finding 1) reports "6 nil-check functions" and "4.01e22 effective codepaths" for the `Metadata` aggregate. +- No `NIL_METADATA` constant exists. + +## Goals + +| ID | Goal | Acceptance | +|---|---|---| +| G1 | `NIL_METADATA = Metadata(...)` constant exists in the production source | `grep -rn "NIL_METADATA" src/` finds the constant + the 6 migration sites | +| G2 | The 6 nil-check functions use the sentinel | `src.code_path_audit_ssdl.detect_nil_check_pattern` returns 0 for the 6 functions after migration | +| G3 | 1 behavioral test for the sentinel | `tests/test_metadata_nil_sentinel.py` exists; all assertions pass | +| G4 | Budget gate met: effective-codepaths drop ≥ 10% vs 4.01e22 baseline | `compute_effective_codepaths(Metadata_profile)` returns a number ≥ 10% smaller than 4.01e22 | + +## Non-Goals + +- Touching the 4 other `dict[str, Any]` aliases (FileItem, CommsLogEntry, HistoryMessage, ToolDefinition, ToolCall) — they have similar patterns (parent audit Finding 2) but are out of scope for this child +- Touching the list-typed aggregates (CommsLog, History, FileItems) — out of scope +- Touching the 3 candidate aggregates (ToolSpec, ChatMessage, ProviderHistory) — blocked on `any_type_componentization_20260621` (NOT on master) +- Refactoring the 6 functions beyond the nil-check migration (the change is surgical: replace the nil-check with sentinel-return) +- Adding new audit infrastructure (the campaign USES the existing SSDL functions) + +## Functional Requirements + +### FR1: Define `NIL_METADATA` + +In a sensible location (likely `src/aggregate.py` since it's the Metadata parent module per `src/code_path_audit.py:343-368`'s `CANONICAL_MEMORY_DIM`), add: + +```python +NIL_METADATA: Metadata = Metadata( + # safe defaults; the exact contents are up to Tier 2 +) +``` + +The defaults MUST be safe (no-op / no-value) such that consumers can blindly use `entry or NIL_METADATA` without triggering KeyError or AttributeError. + +### FR2: Migrate the 6 nil-check functions + +For each of the 6 functions detected by `src/code_path_audit_ssdl.detect_nil_check_pattern`: +- Replace `if entry is None: ...` / `if entry is None or entry == "": ...` / `if entry != None: ...` patterns with sentinel-return +- The most common pattern: `entry = entry or NIL_METADATA` at the top of the function +- Preserve the function's existing behavior for non-nil cases + +### FR3: Behavioral test + +`tests/test_metadata_nil_sentinel.py` with at least 2 tests: +- `test_nil_metadata_is_defined`: assert `NIL_METADATA` exists and is a valid `Metadata` (or `dict[str, Any]` if Metadata is a TypeAlias) +- `test_migrated_function_uses_sentinel`: call one of the 6 migrated functions with `None` and assert it returns the sentinel (or behavior equivalent to using the sentinel) + +## Non-Functional Requirements + +- NFR1: 1-space indentation +- NFR2: CRLF line endings on Windows +- NFR3: No comments in source code +- NFR4: Per-task atomic commits with git notes +- NFR5: No new pip dependencies +- NFR6: The 6 migration sites use the existing convention (sentinel pattern, not new abstractions) +- NFR7: No new `src/.py` files (per AGENTS.md) — `NIL_METADATA` lives in `src/aggregate.py` or another existing module + +## Architecture Reference + +- `src/code_path_audit_ssdl.py:84-100` — `detect_nil_check_pattern` (the function that identifies the 6 sites) +- `src/code_path_audit.py:343-368` — `CANONICAL_MEMORY_DIM` (where Metadata's canonical location is) +- `src/aggregate.py` — the parent module for `Metadata` +- `docs/reports/code_path_audit/2026-06-22/AUDIT_REPORT.md` Finding 1 — the 6 nil-check functions and the proposed fix +- `conductor/code_styleguides/data_oriented_design.md` — the canonical DOD reference + +## Out of Scope + +- All other aggregates (deferred to a follow-up campaign) +- The 3 candidate aggregates (blocked on `any_type_componentization_20260621`) +- Runtime profiling (Track F from the previous menu; deferred) + +## Verification Criteria (Definition of Done) + +| # | Criterion | Verification command | +|---|---|---| +| VC1 | `NIL_METADATA` is defined in `src/` | `grep -rn "NIL_METADATA" src/` returns ≥ 1 hit | +| VC2 | The 6 nil-check functions no longer have the 3-pattern nil-check | `src/code_path_audit_ssdl.detect_nil_check_pattern` returns False for all 6 | +| VC3 | Behavioral test exists and passes | `uv run pytest tests/test_metadata_nil_sentinel.py -v` | +| VC4 | Budget gate met | `compute_effective_codepaths(Metadata_profile)` returns number ≥ 10% smaller than 4.01e22 | +| VC5 | Full test suite remains green | `uv run python scripts/run_tests_batched.py` → 11/11 tiers PASS | +| VC6 | 4 audit gates remain clean | weak_types ≤ 112, type_registry in sync, main_thread_imports clean, no_models_config_io clean | + +## Risks + +| # | Risk | Likelihood | Mitigation | +|---|---|---|---| +| R1 | The 6 nil-check functions are in unexpected locations (not aggregate.py or ai_client.py) | low | `detect_nil_check_pattern` enumerates all consumer files; the audit identified them | +| R2 | The `NIL_METADATA` defaults are wrong (cause downstream bugs) | low | The defaults should be safe (empty Metadata with no required fields). Behavioral test verifies sentinel works in a real call. | +| R3 | Budget gate fails (drop < 10%) | low | The 6 nil-checks are documented; their removal MUST drop. If not, the SSDL math is wrong (separate investigation). | \ No newline at end of file diff --git a/conductor/tracks/metadata_nil_sentinel_20260624/state.toml b/conductor/tracks/metadata_nil_sentinel_20260624/state.toml new file mode 100644 index 00000000..6a697876 --- /dev/null +++ b/conductor/tracks/metadata_nil_sentinel_20260624/state.toml @@ -0,0 +1,43 @@ +# Track state for metadata_nil_sentinel_20260624 +# Child 1 of metadata_ssdl_defusing_20260624 campaign. +# 3 phases, 3 tasks. Tier 2 to execute per conductor/workflow.md. + +[meta] +track_id = "metadata_nil_sentinel_20260624" +name = "Child 1: Metadata Nil Sentinel" +status = "active" +current_phase = 0 +last_updated = "2026-06-24" + +[parent] +parent_campaign = "metadata_ssdl_defusing_20260624" + +[blocked_by] +code_path_audit_20260607 = "shipped" + +[blocks] +# Within the campaign: +metadata_generational_handle_20260624 = "pending child 1" + +[phases] +phase_1 = { status = "pending", checkpointsha = "", name = "Behavioral Test" } +phase_2 = { status = "pending", checkpointsha = "", name = "Implementation (NIL_METADATA + 6 migrations)" } +phase_3 = { status = "pending", checkpointsha = "", name = "Verification + Budget Gate" } + +[tasks] +t1_1 = { status = "pending", commit_sha = "", description = "Write tests/test_metadata_nil_sentinel.py with 2 tests (red)" } +t2_1 = { status = "pending", commit_sha = "", description = "Add NIL_METADATA constant + migrate 6 nil-check functions" } +t3_1 = { status = "pending", commit_sha = "", description = "Run all 6 VCs; capture budget gate measurement; write TRACK_COMPLETION; update state + tracks.md" } + +[verification] +vc1_nil_metadata_defined = false +vc2_6_nil_checks_migrated = false +vc3_behavioral_test_passes = false +vc4_budget_gate_met = false +vc5_full_test_suite_green = false +vc6_audit_gates_clean = false + +[budget_gate] +baseline = 4.01e+22 +expected_drop_pct = 10 +post_child_1_measurement = null \ No newline at end of file diff --git a/conductor/tracks/metadata_ssdl_defusing_20260624/metadata.json b/conductor/tracks/metadata_ssdl_defusing_20260624/metadata.json new file mode 100644 index 00000000..2294b730 --- /dev/null +++ b/conductor/tracks/metadata_ssdl_defusing_20260624/metadata.json @@ -0,0 +1,132 @@ +{ + "track_id": "metadata_ssdl_defusing_20260624", + "name": "Metadata SSDL Defusing Campaign", + "track_type": "campaign", + "created_date": "2026-06-24", + "branch": "master", + "depends_on": ["code_path_audit_20260607"], + "blocks": [], + "child_tracks": [ + { + "track_id": "metadata_nil_sentinel_20260624", + "name": "Child 1: Nil Sentinel", + "blocked_by_parent": false, + "blocks_within_campaign": ["metadata_generational_handle_20260624"] + }, + { + "track_id": "metadata_generational_handle_20260624", + "name": "Child 2: Generational Handle", + "blocked_by_parent": true, + "blocks_within_campaign": ["metadata_field_cache_20260624"] + }, + { + "track_id": "metadata_field_cache_20260624", + "name": "Child 3: Immediate-Mode Field Cache", + "blocked_by_parent": true, + "blocks_within_campaign": [] + } + ], + "scope": { + "new_files": [ + "conductor/tracks/metadata_ssdl_defusing_20260624/spec.md", + "conductor/tracks/metadata_ssdl_defusing_20260624/plan.md", + "conductor/tracks/metadata_ssdl_defusing_20260624/metadata.json", + "conductor/tracks/metadata_ssdl_defusing_20260624/state.toml", + "conductor/tracks/metadata_nil_sentinel_20260624/*", + "conductor/tracks/metadata_generational_handle_20260624/*", + "conductor/tracks/metadata_field_cache_20260624/*", + "tests/test_metadata_nil_sentinel.py", + "tests/test_metadata_generational_handle.py", + "tests/test_metadata_field_cache.py", + "docs/reports/campaign_measurements_20260624.md", + "docs/reports/TRACK_COMPLETION_metadata_ssdl_defusing_20260624.md" + ], + "modified_files": [ + "src/aggregate.py", + "src/ai_client.py", + "src/history.py", + "src/aggregate.py (additional functions as Tier 2 identifies)", + "conductor/tracks.md" + ], + "deleted_files": [] + }, + "estimated_effort": { + "method": "scope (per workflow.md §Tier 1 Track Initialization Rules). NO day estimates.", + "child_1": "1 atomic commit (sentinel + 6 migrations) + 1 test + 1 verification", + "child_2": "1 atomic commit (handle registry) + 1 test + 1 verification", + "child_3": "1 atomic commit (field cache) + 1 test + 1 verification", + "phase_4": "1 end-of-campaign report + state + tracks.md" + }, + "verification_criteria": [ + "VC1: All 3 child tracks SHIPPED (status=completed, current_phase=complete, all phases completed)", + "VC2: End-of-campaign report exists with 3 measurements + cumulative reduction", + "VC3: Full batched test suite is green (all 11 tiers PASS)", + "VC4: 4 audit gates remain clean", + "VC5: No new top-level src/.py files (per AGENTS.md file naming convention)", + "VC6: 3 behavioral tests for the 3 SSDL primitives exist and pass" + ], + "budget_gates": [ + { + "child": "metadata_nil_sentinel_20260624", + "baseline": "4.01e22 effective codepaths for Metadata (pre-campaign)", + "expected_drop": ">= 10%", + "pause_threshold": "drop < 10%", + "pause_action": "STOP campaign; report to user; investigate why 6 nil-checks didn't reduce branch count" + }, + { + "child": "metadata_generational_handle_20260624", + "baseline": "post-child-1 measurement", + "expected_drop": ">= 20%", + "pause_threshold": "drop < 20%", + "pause_action": "STOP campaign; report to user; investigate why lifetime branches didn't collapse" + }, + { + "child": "metadata_field_cache_20260624", + "baseline": "post-child-2 measurement", + "expected_drop": ">= 30%", + "pause_threshold": "drop < 30%", + "pause_action": "STOP campaign; report to user; investigate why cache lookups didn't reduce branch count" + } + ], + "known_issues": [], + "deferred_to_followup_tracks": [ + { + "id": "deferred-same-techniques-other-aggregates", + "title": "Apply the 3 SSDL primitives to the other 9 in-scope aggregates (FileItem, CommsLogEntry, HistoryMessage, ToolDefinition, ToolCall, Result, the 3 list-typed, the 3 candidates)", + "description": "The 3 SSDL primitives (NIL_METADATA, MetadataHandleRegistry, MetadataFieldCache) are general; the same pattern can be applied to other aggregates. The 4 other dict[str, Any] aggregates (FileItem, CommsLogEntry, HistoryMessage, ToolDefinition, ToolCall) show similar patterns per the parent audit Finding 2.", + "track_status": "separate campaign post-this one" + } + ], + "regressions_and_pre_existing_failures": [], + "pre_existing_failures_remaining": [], + "risk_register": [ + { + "id": "risk-1", + "description": "Child 1 doesn't measurably drop the effective-codepaths number", + "likelihood": "low", + "impact": "Campaign pauses at child 1", + "mitigation": "Budget gate catches this; investigate the SSDL math or the audit if drop is 0" + }, + { + "id": "risk-2", + "description": "Child 2 (generational handle) breaks code that expects raw Metadata", + "likelihood": "medium", + "impact": "Production code that does `entry['key']` directly fails; need wrapper migration", + "mitigation": "The handle is a wrapper; consumers can extract raw value via .value or similar; behavioral test verifies backwards-compat" + }, + { + "id": "risk-3", + "description": "Child 3 (field cache) cache invalidation is wrong", + "likelihood": "medium", + "impact": "Stale values returned; subtle bug", + "mitigation": "Cache keyed by handle; when underlying value changes, handle generation bumps, invalidating cache entries" + }, + { + "id": "risk-4", + "description": "Cumulative drop is less than expected", + "likelihood": "low", + "impact": "Campaign produces smaller improvement than expected", + "mitigation": "The campaign's value is in the TECHNIQUE, not the final number; techniques ship, numbers are heuristic" + } + ] +} \ No newline at end of file diff --git a/conductor/tracks/metadata_ssdl_defusing_20260624/plan.md b/conductor/tracks/metadata_ssdl_defusing_20260624/plan.md new file mode 100644 index 00000000..de43c831 --- /dev/null +++ b/conductor/tracks/metadata_ssdl_defusing_20260624/plan.md @@ -0,0 +1,110 @@ +# Campaign Plan: metadata_ssdl_defusing_20260624 + +3-child campaign executed sequentially with budget gates. The umbrella plan is intentionally minimal — each child has its own plan. The umbrella tracks the campaign-wide coordination only. + +## Phase 1: Child 1 — Nil Sentinel (metadata_nil_sentinel_20260624) + +Focus: Establish the sentinel fallback path. Independent of children 2 and 3. + +- [ ] Task 1.1: Tier 2 ships child 1 per its own plan.md. + - See `conductor/tracks/metadata_nil_sentinel_20260624/plan.md` for the 3-5 tasks. +- [ ] Task 1.2: Run the budget gate. + - `uv run python -c "..."` to compute the new effective-codepaths number for Metadata. + - If drop ≥ 10% vs 4.01e22 baseline, proceed to Phase 2. + - If drop < 10%, PAUSE and report to user. +- [ ] Task 1.3: Re-run the full batched test suite. + - `uv run python scripts/run_tests_batched.py` → all 11 tiers PASS. +- [ ] Task 1.4: Capture the post-child-1 measurement in `docs/reports/campaign_measurements_20260624.md` (campaign-wide log). + +## Phase 2: Child 2 — Generational Handle (metadata_generational_handle_20260624) + +Focus: Wrap Metadata in (index, generation). BLOCKED_BY Phase 1 (the sentinel is the generation-mismatch fallback). + +- [ ] Task 2.1: Tier 2 ships child 2 per its own plan.md. +- [ ] Task 2.2: Run the budget gate. + - Re-measure effective-codepaths. + - If drop ≥ 20% vs post-child-1 measurement, proceed to Phase 3. + - If drop < 20%, PAUSE and report. +- [ ] Task 2.3: Re-run the full batched test suite. +- [ ] Task 2.4: Append the post-child-2 measurement to the campaign log. + +## Phase 3: Child 3 — Field Cache (metadata_field_cache_20260624) + +Focus: Add the cache keyed by handle. BLOCKED_BY Phase 2 (the handle provides the stable cache key). + +- [ ] Task 3.1: Tier 2 ships child 3 per its own plan.md. +- [ ] Task 3.2: Run the budget gate. + - Re-measure effective-codepaths. + - If drop ≥ 30% vs post-child-2 measurement, proceed to Phase 4. + - If drop < 30%, PAUSE and report. +- [ ] Task 3.3: Re-run the full batched test suite. +- [ ] Task 3.4: Append the post-child-3 measurement to the campaign log. + +## Phase 4: End-of-Campaign Report + +Focus: Quantify the cumulative effect. + +- [ ] Task 4.1: Write `docs/reports/TRACK_COMPLETION_metadata_ssdl_defusing_20260624.md`. + - 3 measurements (post-child-1, -2, -3) + the 4.01e22 baseline. + - Total reduction. + - The 3 SSDL primitives' locations in `src/`. + - Links to each child's TRACK_COMPLETION. + - Verdict: did the campaign meet its goal? (Y/N + evidence) +- [ ] Task 4.2: Update this track's `state.toml` to `status = "completed"`, `current_phase = "complete"`, all 4 phases `completed`. +- [ ] Task 4.3: Update `conductor/tracks.md` to add the campaign row + the 3 child rows. + +## Budget Gate Quick Reference + +| Child | Expected drop | If drop < threshold, PAUSE and report | +|---|---|---| +| 1: Nil Sentinel | ≥ 10% | Pause; investigate why the 6 nil-checks didn't reduce branch count | +| 2: Generational Handle | ≥ 20% | Pause; investigate why lifetime branches didn't collapse | +| 3: Field Cache | ≥ 30% | Pause; investigate why cache lookup didn't reduce branch count | + +The thresholds are conservative estimates. Actual drops may be much larger (the heuristic's 4.01e22 is a worst-case upper bound; real numbers may be smaller by orders of magnitude once the techniques are applied). + +## Commit Log (Expected, Campaign-Wide) + +| Commit | Description | +|---|---| +| (from child 1) | `feat(metadata): NIL_METADATA sentinel + 6 nil-check migrations` | +| (from child 1) | `test(metadata): behavioral test for nil sentinel` | +| (from child 1) | `conductor(state): metadata_nil_sentinel_20260624 SHIPPED` | +| (from child 2) | `feat(metadata): generational handle registry` | +| (from child 2) | `test(metadata): behavioral test for handle lookup` | +| (from child 2) | `conductor(state): metadata_generational_handle_20260624 SHIPPED` | +| (from child 3) | `feat(metadata): MetadataFieldCache keyed by handle` | +| (from child 3) | `test(metadata): behavioral test for cache hit/miss` | +| (from child 3) | `conductor(state): metadata_field_cache_20260624 SHIPPED` | +| (campaign) | `docs(reports): TRACK_COMPLETION for metadata_ssdl_defusing_20260624` | +| (campaign) | `conductor(state): metadata_ssdl_defusing_20260624 SHIPPED` | +| (campaign) | `conductor(tracks): add campaign + 3 child rows` | + +Plus per-task plan-update commits per the workflow. + +## Verification Commands (run at end of Phase 4) + +```bash +# VC1: all 3 children SHIPPED +cat conductor/tracks/metadata_nil_sentinel_20260624/state.toml | grep status +cat conductor/tracks/metadata_generational_handle_20260624/state.toml | grep status +cat conductor/tracks/metadata_field_cache_20260624/state.toml | grep status + +# VC2: end-of-campaign report +cat docs/reports/TRACK_COMPLETION_metadata_ssdl_defusing_20260624.md + +# VC3: full test suite +uv run python scripts/run_tests_batched.py + +# VC4: 4 audit gates +uv run python scripts/audit_weak_types.py --strict +uv run python scripts/generate_type_registry.py --check +uv run python scripts/audit_main_thread_imports.py +uv run python scripts/audit_no_models_config_io.py + +# VC5: no new top-level src/ files +git diff master..HEAD --stat -- 'src/*.py' | grep -E '\bsrc/[a-z_]+\.py\b' | sort -u + +# VC6: behavioral tests +uv run pytest tests/test_metadata_nil_sentinel.py tests/test_metadata_generational_handle.py tests/test_metadata_field_cache.py -v +``` \ No newline at end of file diff --git a/conductor/tracks/metadata_ssdl_defusing_20260624/spec.md b/conductor/tracks/metadata_ssdl_defusing_20260624/spec.md new file mode 100644 index 00000000..ca06bed8 --- /dev/null +++ b/conductor/tracks/metadata_ssdl_defusing_20260624/spec.md @@ -0,0 +1,147 @@ +# Campaign Specification: metadata_ssdl_defusing_20260624 + +## Overview + +3-child campaign to defuse the `Metadata` aggregate's combinatoric explosion (4.01e22 effective codepaths) via Fleury's SSDL techniques. Each child produces one SSDL primitive, is independently shippable, and is gated by a budget check that re-measures effective codepaths after each child. + +The parent audit (`code_path_audit_20260607` / `AUDIT_REPORT.md` Finding 1, CRITICAL) identified 3 specific techniques: +1. **Nil Sentinel `[N]`** for the 6 nil-check functions +2. **Generational Handle** wrapping Metadata +3. **Immediate-Mode Cache `[Q:key] -> [I:FetchCached] -> [T]`** for the 123 field-access sites + +The campaign executes them in dependency order: child 1 introduces the sentinel fallback that child 2's generation-mismatch path returns to; child 2's stable identity enables child 3's cache keying. + +## Current State Audit (as of master @ 7a9261c4, post-fix_test_failures_20260624 merge) + +The audit baseline is captured in `docs/reports/code_path_audit/2026-06-22/AUDIT_REPORT.md`: +- **Metadata:** 483 producers / 752 consumers / 123 field-access sites (0 typed) +- **Effective codepaths:** 4.01e22 (sum of 2^branches across 752 consumers) +- **Branch points:** 3466 across consumer functions +- **Nil-check functions:** 6 (the `is None` / `== None` / `!= None` detection sites) + +The behavioral SSDL test exists at `tests/test_code_path_audit_ssdl_behavioral.py` and uses a synthetic 5-function × 3-if-statements fixture to assert `compute_effective_codepaths` math (5 × 2^3 = 40). The real-Metadata measurement is captured by `src.code_path_audit_ssdl.compute_effective_codepaths(Metadata_profile)`. + +## Goals + +| ID | Goal | Acceptance | +|---|---|---| +| G1 | Child 1 (Nil Sentinel) ships: 6 nil-check functions use sentinel-return | 6 functions refactored; behavioral test for sentinel; 4.01e22 number drops measurably | +| G2 | Child 2 (Generational Handle) ships: lifetime branches collapse to 1 lookup + 1 generation comparison | Registry in place; 1 lookup benchmark; further effective-codepath drop | +| G3 | Child 3 (Field Cache) ships: 123 string-keyed `entry.get('key', default)` sites become cache hits | Cache in place; 123 sites benchmarked; further effective-codepath drop | +| G4 | End-of-campaign report: cumulative effective-codepath measurement vs 4.01e22 baseline | `docs/reports/TRACK_COMPLETION_metadata_ssdl_defusing_20260624.md` written | +| G5 | All 4 audit gates remain clean | weak_types ≤ 112, type_registry in sync, main_thread_imports clean, no_models_config_io clean | + +## Non-Goals + +- Touching any aggregate OTHER than Metadata (the audit's other 9 in-scope aggregates + 3 candidates are out of scope; the SSDL primitives established by this campaign can be applied to them in follow-up campaigns) +- Modifying the audit infrastructure (`src/code_path_audit*.py`); the campaign USES the audit to measure progress but does not change the audit +- Refactoring the 7-file split (NG3 from `code_path_audit_polish_20260622`); that's a separate refactor track +- Runtime profiling (the campaign uses the static `branch_count` heuristic; runtime profiling is Track F from the previous menu) + +## Per-Child Specs + +### Child 1: `metadata_nil_sentinel_20260624` (Nil Sentinel) + +**Scope:** Introduce `NIL_METADATA = Metadata(...)` constant with safe defaults. Migrate the 6 nil-check functions (detected by `src.code_path_audit_ssdl.detect_nil_check_pattern`) to sentinel-return: replace `if entry is None: ...` / `if entry == None: ...` / `if entry != None: ...` patterns with `entry = entry or NIL_METADATA` (or the function's equivalent). + +**Acceptance:** +- 6 functions refactored +- 1 behavioral test (`tests/test_metadata_nil_sentinel.py`): asserts the sentinel is used, asserts the 6 functions no longer have the 3-pattern nil-check branches +- Budget gate: re-run `compute_effective_codepaths(Metadata_profile)`; if the number drops by < 10%, pause and report + +**Why first:** establishes the sentinel that child 2's generation-mismatch path returns to. + +### Child 2: `metadata_generational_handle_20260624` (Generational Handle) + +**Scope:** Wrap Metadata in `(index, generation)` resolved through a registry. Validation is one comparison: if `metadata.generation != registry.generations[metadata.index]`, return `NIL_METADATA`. Otherwise, the value is valid. + +**Acceptance:** +- `MetadataHandleRegistry` (or equivalent) introduced in a sensible location (likely `src/aggregate.py` or a new sibling module per AGENTS.md §File Naming Convention) +- Migration: the production `Metadata` value is now wrapped in a handle; lifetime-branch code (e.g., the 3466 branch points that include lifetime checks) collapses to 1 lookup + 1 comparison +- 1 behavioral test: assert handle lookup is O(1), assert generation mismatch returns sentinel +- Budget gate: re-run `compute_effective_codepaths(Metadata_profile)`; if the number drops by < 20%, pause and report (the generational handle is expected to produce a larger drop than the sentinel) + +**Why second:** builds on child 1's sentinel as the fallback path. Provides a stable identity for child 3's cache keying. + +### Child 3: `metadata_field_cache_20260624` (Immediate-Mode Cache) + +**Scope:** Introduce `MetadataFieldCache[(handle_id, field_name)] -> value`. Consumers request `(metadata_handle, 'field_name')`, get cached value. The 123 string-keyed `entry.get('key', default)` sites become 123 cache lookups. + +**Acceptance:** +- `MetadataFieldCache` introduced +- Migration: the 123 field-access sites in `src/` use the cache +- 1 behavioral test: assert cache hit, assert cache miss with sentinel fallback +- Budget gate: re-run `compute_effective_codepaths(Metadata_profile)`; if the number drops by < 30%, pause and report (the cache is expected to produce the largest drop) + +**Why third:** the cache needs the handle's stable identity (child 2) to use as a key. + +## Budget Gate Protocol + +After each child commits: + +1. **Measure:** run `uv run python -c "from src.code_path_audit import AggregateProfile, ...; from src.code_path_audit_ssdl import compute_effective_codepaths; profile = ...; print(compute_effective_codepaths(profile, 'src'))"` +2. **Compare:** diff vs prior measurement (or 4.01e22 baseline for child 1) +3. **Gate:** if drop < expected threshold (10% / 20% / 30% per child), PAUSE the campaign and report to user +4. **Continue:** if drop ≥ threshold, proceed to next child + +The measurement is captured in the child track's TRACK_COMPLETION report and rolled up into the campaign's end-of-campaign report. + +## Functional Requirements + +### FR1: Each child preserves the existing test suite +After each child, all 6 VCs from the parent `fix_test_failures_20260624` track remain green: 14 previously-failing tests still pass; no new failures. + +### FR2: Each child is independently shippable +A child can be merged without the other 2 (the campaign gates are forward-looking; a child that meets its budget is shippable on its own). + +### FR3: The end-of-campaign report quantifies the cumulative effect +`docs/reports/TRACK_COMPLETION_metadata_ssdl_defusing_20260624.md` shows: pre-campaign baseline 4.01e22, post-child-1 number, post-child-2 number, post-child-3 number, total reduction. + +## Non-Functional Requirements + +- NFR1: 1-space indentation (project convention) +- NFR2: CRLF line endings on Windows +- NFR3: No comments in source code +- NFR4: No new pip dependencies +- NFR5: Per-task atomic commits with git notes +- NFR6: Each child's `Result[T]` returns for fallible fns (per `conductor/code_styleguides/error_handling.md`) +- NFR7: The new SSDL primitives are exported from a sensible location; no new top-level `src/.py` files (per AGENTS.md §File Naming Convention) unless the user explicitly approves + +## Architecture Reference + +- `docs/reports/code_path_audit/2026-06-22/AUDIT_REPORT.md` — Finding 1 (CRITICAL) and the 3 proposed fixes +- `src/code_path_audit_ssdl.py:84-100` — `detect_nil_check_pattern` (the function that identifies the 6 nil-check sites) +- `src/code_path_audit_ssdl.py:39-55` — `compute_effective_codepaths` (the measurement function) +- `src/code_path_audit.py:271-296` — `CANONICAL_MEMORY_DIM` and `MEMORY_DIM_FILE_HEURISTIC` (where to file new primitives) +- `conductor/code_styleguides/data_oriented_design.md` — the canonical DOD reference +- `conductor/code_styleguides/error_handling.md` — the `Result[T]` convention +- `conductor/code_styleguides/agent_memory_dimensions.md` — the 4 memory dimensions (Metadata is `discussion`) + +## Out of Scope + +- Aggregates other than Metadata (FileItem, CommsLogEntry, HistoryMessage, ToolDefinition, ToolCall, Result, the 3 list-typed, the 3 candidates) — the SSDL primitives are general but the campaign is Metadata-specific +- Modifying `src/code_path_audit*.py` (the audit infrastructure) +- Refactoring the 7-file split +- Runtime profiling (Track F from the previous menu) +- Modifying the campaign structure (3 children are fixed; adding a 4th is out of scope) + +## Verification Criteria (Definition of Done) + +| # | Criterion | Verification command | +|---|---|---| +| VC1 | All 3 children SHIPPED | Each child track has `status = "completed"`, `current_phase = "complete"`, all phases `completed` | +| VC2 | End-of-campaign report exists | `cat docs/reports/TRACK_COMPLETION_metadata_ssdl_defusing_20260624.md` shows the 3 measurements + cumulative reduction | +| VC3 | Full test suite remains green | `uv run python scripts/run_tests_batched.py` → all 11 tiers PASS | +| VC4 | 4 audit gates remain clean | weak_types ≤ 112, type_registry in sync, main_thread_imports clean, no_models_config_io clean | +| VC5 | No new `src/.py` files created (per AGENTS.md) | `git diff master..HEAD --stat -- 'src/*.py' \| grep -E 'src/[a-z_]+\.py'` returns only the existing `src/` modules; the new SSDL primitives live in existing files | +| VC6 | Behavioral tests for each child exist and pass | `uv run pytest tests/test_metadata_nil_sentinel.py tests/test_metadata_generational_handle.py tests/test_metadata_field_cache.py -v` → all pass | + +## Risks + +| # | Risk | Likelihood | Mitigation | +|---|---|---|---| +| R1 | Child 1 doesn't measurably drop the effective-codepaths number | low | The 6 nil-checks are documented in AUDIT_REPORT.md; their removal MUST drop the number. If not, the audit or the SSDL math is wrong (separate investigation). | +| R2 | Child 2 (generational handle) breaks code that expects raw `Metadata` | medium | The handle is a wrapper; consumers can still extract the raw value via `.value` or similar. Behavioral test verifies backwards-compat for the common cases. | +| R3 | Child 3 (field cache) cache invalidation is wrong | medium | The cache is keyed by `(handle_id, field_name)`. When the underlying value changes, the handle's generation bumps, invalidating all cache entries for that handle. The cache is a write-through thin wrapper. | +| R4 | The cumulative drop is less than expected (e.g., 4.01e22 → 1e15 instead of 4.01e22 → 1e5) | low | The campaign's value is in the TECHNIQUE, not the final number. The numbers are heuristic; the actual goal is the structural improvement (sentinel, handle, cache). If the techniques ship, the campaign succeeds regardless of the final heuristic number. | +| R5 | New `src/.py` files needed for the SSDL primitives | low | Per AGENTS.md, helpers go in the parent module. The new primitives live in `src/aggregate.py` (the parent module for `Metadata`). If the user explicitly approves new top-level files, the campaign can be extended. | \ No newline at end of file diff --git a/conductor/tracks/metadata_ssdl_defusing_20260624/state.toml b/conductor/tracks/metadata_ssdl_defusing_20260624/state.toml new file mode 100644 index 00000000..9ceedeec --- /dev/null +++ b/conductor/tracks/metadata_ssdl_defusing_20260624/state.toml @@ -0,0 +1,67 @@ +# Campaign state for metadata_ssdl_defusing_20260624 +# 3-child campaign; sequential with budget gates. +# Tier 2 to execute per conductor/workflow.md. + +[meta] +track_id = "metadata_ssdl_defusing_20260624" +name = "Metadata SSDL Defusing Campaign" +status = "active" +current_phase = 0 +last_updated = "2026-06-24" + +[parent] +# Independent campaign (not part of an umbrella) + +[blocked_by] +code_path_audit_20260607 = "shipped" + +[blocks] +# This campaign blocks no other tracks. It is an exploratory campaign. + +[child_tracks] +metadata_nil_sentinel_20260624 = { status = "pending", short_name = "child_1_nil_sentinel" } +metadata_generational_handle_20260624 = { status = "pending", short_name = "child_2_handle" } +metadata_field_cache_20260624 = { status = "pending", short_name = "child_3_cache" } + +[phases] +phase_1 = { status = "pending", checkpointsha = "", name = "Child 1: Nil Sentinel (with budget gate >= 10% drop)" } +phase_2 = { status = "pending", checkpointsha = "", name = "Child 2: Generational Handle (with budget gate >= 20% drop)" } +phase_3 = { status = "pending", checkpointsha = "", name = "Child 3: Field Cache (with budget gate >= 30% drop)" } +phase_4 = { status = "pending", checkpointsha = "", name = "End-of-Campaign Report" } + +[tasks] +# Phase 1 +t1_1 = { status = "pending", commit_sha = "", description = "Tier 2 ships child 1 per its own plan.md" } +t1_2 = { status = "pending", commit_sha = "", description = "Run child 1 budget gate (>= 10% drop); pause if not met" } +t1_3 = { status = "pending", commit_sha = "", description = "Re-run full batched test suite; verify all 11 tiers PASS" } +t1_4 = { status = "pending", commit_sha = "", description = "Capture post-child-1 measurement in campaign_measurements_20260624.md" } +# Phase 2 +t2_1 = { status = "pending", commit_sha = "", description = "Tier 2 ships child 2 per its own plan.md" } +t2_2 = { status = "pending", commit_sha = "", description = "Run child 2 budget gate (>= 20% drop); pause if not met" } +t2_3 = { status = "pending", commit_sha = "", description = "Re-run full batched test suite" } +t2_4 = { status = "pending", commit_sha = "", description = "Append post-child-2 measurement to campaign log" } +# Phase 3 +t3_1 = { status = "pending", commit_sha = "", description = "Tier 2 ships child 3 per its own plan.md" } +t3_2 = { status = "pending", commit_sha = "", description = "Run child 3 budget gate (>= 30% drop); pause if not met" } +t3_3 = { status = "pending", commit_sha = "", description = "Re-run full batched test suite" } +t3_4 = { status = "pending", commit_sha = "", description = "Append post-child-3 measurement to campaign log" } +# Phase 4 +t4_1 = { status = "pending", commit_sha = "", description = "Write TRACK_COMPLETION_metadata_ssdl_defusing_20260624.md" } +t4_2 = { status = "pending", commit_sha = "", description = "Update this state.toml to status=completed" } +t4_3 = { status = "pending", commit_sha = "", description = "Update conductor/tracks.md with campaign + 3 child rows" } + +[verification] +vc1_all_3_children_shipped = false +vc2_end_of_campaign_report_exists = false +vc3_full_batched_suite_green = false +vc4_audit_gates_clean = false +vc5_no_new_top_level_src_files = false +vc6_3_behavioral_tests_pass = false + +[measurements] +# Format: baseline_4.01e22 = sum of 2^branches across all 752 Metadata consumers +# Populated by Tier 2 as children ship +pre_campaign = 4.01e+22 +post_child_1 = null +post_child_2 = null +post_child_3 = null \ No newline at end of file