From e50bebddd9e92b4075874dc1309b173376ac9331 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Thu, 25 Jun 2026 12:06:50 -0400 Subject: [PATCH] conductor(followup): metadata_promotion_20260624 - track artifacts (886 lines) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The actual fix for the 4.01e22 combinatoric explosion. Promotes Metadata: TypeAlias = dict[str, Any] to @dataclass(frozen=True, slots=True) and migrates all 695 consumer functions + 213 access sites (107 .get + 106 subscript) to direct field access. TIER-1 READ AGENTS.md + conductor/workflow.md + conductor/edit_workflow.md + conductor/code_styleguides/data_oriented_design.md + conductor/code_styleguides/error_handling.md + conductor/code_styleguides/type_aliases.md + docs/reports/SSDL_CAMPAIGN_ABORTED_20260624.md + src/type_aliases.py + scripts/code_path_audit/code_path_audit.py + scripts/code_path_audit/code_path_audit_ssdl.py before this commit. Why this fixes 4.01e22: - The combinatoric explosion is from dict[str, Any] type-dispatch at every entry.get('key', default) site (per SSDL post-mortem) - Each access has 3 branches: is None, getattr, default - 695 consumers * ~2 branches each = 1390 branches in the sum - 2^1390 ≈ 4.01e22 (the measured baseline) - Promotion to @dataclass with direct field access = 0 branches per access - Expected drop: 4.014e+22 -> < 1e+20 (>= 2 orders of magnitude) 10 VCs: - VC1: Metadata is @dataclass(frozen=True, slots=True), not dict[str, Any] - VC2: 107 .get sites replaced - VC3: 106 subscript sites replaced - VC4: 12+ tests pass in tests/test_metadata_dataclass.py - VC5: 5 sub-aggregate TypeAliases (CommsLogEntry, HistoryMessage, FileItem, ToolDefinition, ToolCall) all point to the new Metadata - VC6: Effective codepaths < 1e+20 - VC7: All 7 audit gates pass --strict - VC8: 10/11 batched test tiers PASS - VC9: End-of-track report written - VC10: New regression-guard test file exists 5-phase phased migration (smallest sub-aggregate first): - Phase 1: CommsLogEntry (~150 sites in session_logger, multi_agent_conductor, app_controller) - Phase 2: HistoryMessage (~80 sites in ai_client) - Phase 3: FileItem (~200 sites in aggregate, app_controller, gui_2) - Phase 4: ToolDefinition+ToolCall (~150 sites in mcp_client, ai_client tool loop) - Phase 5: Metadata direct usage (~115 sites catch-all) 6 phases total (0 + 5 + verification). 18-21 atomic commits. blocked_by: code_path_audit_phase_3_provider_state_20260624 (recommended prerequisite; the two tracks are orthogonal so they can run in parallel; listed as blocked_by for sequencing preference not strict blocking) --- .../TIER2_STARTUP.md | 235 +++++++++++++ .../metadata_promotion_20260624/metadata.json | 68 ++++ .../metadata_promotion_20260624/plan.md | 216 ++++++++++++ .../metadata_promotion_20260624/spec.md | 310 ++++++++++++++++++ .../metadata_promotion_20260624/state.toml | 57 ++++ 5 files changed, 886 insertions(+) create mode 100644 conductor/tracks/metadata_promotion_20260624/TIER2_STARTUP.md create mode 100644 conductor/tracks/metadata_promotion_20260624/metadata.json create mode 100644 conductor/tracks/metadata_promotion_20260624/plan.md create mode 100644 conductor/tracks/metadata_promotion_20260624/spec.md create mode 100644 conductor/tracks/metadata_promotion_20260624/state.toml diff --git a/conductor/tracks/metadata_promotion_20260624/TIER2_STARTUP.md b/conductor/tracks/metadata_promotion_20260624/TIER2_STARTUP.md new file mode 100644 index 00000000..a4a23daa --- /dev/null +++ b/conductor/tracks/metadata_promotion_20260624/TIER2_STARTUP.md @@ -0,0 +1,235 @@ +# Tier 2 Startup Brief: metadata_promotion_20260624 + +## Context + +This is the actual fix for the 4.01e22 combinatoric explosion. Promotes `Metadata: TypeAlias = dict[str, Any]` to a typed `@dataclass(frozen=True, slots=True)` and migrates all 695 consumer functions + 213 access sites to direct field access. + +**Recommendation:** Run in parallel with `code_path_audit_phase_3_provider_state_20260624` (the 27-call-site provider_state migration). The two tracks are orthogonal — phase 3 touches `provider_state` infrastructure, this track touches `Metadata` consumers. No merge conflicts expected. + +The `code_path_audit_phase_3_provider_state_20260624` track is listed as `blocked_by` in metadata.json but the blocking is recommended, not strict. If the user wants this track to start first, update metadata.json accordingly. + +## MANDATORY Pre-Action Reading (per agent protocol) + +1. `AGENTS.md` (project root) — operating rules +2. `conductor/workflow.md` — the workflow +3. `conductor/edit_workflow.md` — the edit workflow +4. `conductor/code_styleguides/data_oriented_design.md` — the "Prefer Fewer Types" principle (the canonical rationale) +5. `conductor/code_styleguides/error_handling.md` — the `Result[T]` convention (Rule #0: read first) +6. `conductor/code_styleguides/type_aliases.md` — the 10 TypeAliases convention +7. `docs/reports/SSDL_CAMPAIGN_ABORTED_20260624.md` — the post-mortem explaining why this is a type-dispatch problem, NOT a nil-check problem +8. `src/type_aliases.py` (current 30 lines) +9. `scripts/code_path_audit/code_path_audit.py` (consumer detection) +10. `scripts/code_path_audit/code_path_audit_ssdl.py` (effective codepaths metric) + +**First commit of this track must include** `TIER-2 READ before metadata_promotion_20260624` in the message. + +## The Metadata dataclass (Phase 0) + +```python +# src/type_aliases.py: REPLACE line 5 +# BEFORE: +Metadata: TypeAlias = dict[str, Any] + +# AFTER: +@dataclass(frozen=True, slots=True) +class Metadata: + role: str = "" + content: Any = None + tool_calls: Any = None + tool_call_id: str = "" + name: str = "" + args: Any = None + source_tier: str = "main" + model: str = "unknown" + id: str = "" + ts: str = "" + description: str = "" + depends_on: tuple[str, ...] = () + status: str = "" + manual_block: bool = False + completed_tickets: int = 0 + auto_start: bool = False + command: str = "" + script: str = "" + output: Any = None + error: str = "" + tier: str = "" + path: str = "" + full_path: str = "" + filename: str = "" + mtime: float = 0.0 + size: int = 0 + # ... ~150-180 distinct keys from the .get + [] site analysis ... + + def to_dict(self) -> dict[str, Any]: + return {k: v for k, v in asdict(self).items() if v is not None or k in _NON_NULL_KEYS} + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> 'Metadata': + valid_fields = {f.name for f in fields(cls)} + return cls(**{k: v for k, v in raw.items() if k in valid_fields}) +``` + +The exact list of fields is determined by the union of distinct keys used across all 213 access sites. The spec §FR1 has the seed list; the worker should expand it based on `git grep -hoE` output during Phase 0. + +## Migration pattern (per consumer site) + +```python +# BEFORE: +x = entry.get('model', 'unknown') +y = entry.get('input_tokens', 0) or 0 +z = entry.get('source_tier', 'main') +if entry.get('manual_block', False): + ... +role = entry['role'] +if 'depends_on' in entry: + deps = entry['depends_on'] + +# AFTER (with Metadata dataclass): +x = entry.model or 'unknown' +y = entry.input_tokens or 0 +z = entry.source_tier or 'main' +if entry.manual_block: + ... +role = entry.role +if entry.depends_on: + deps = entry.depends_on +``` + +For polymorphic construction: +```python +# BEFORE: +entry = {'role': 'user', 'content': 'hi'} + +# AFTER: +entry = Metadata(role='user', content='hi') +# Or for dynamic dicts: +entry = Metadata.from_dict(raw_dict) +``` + +For JSON serialization: +```python +# BEFORE: +json.dumps(entry) + +# AFTER: +json.dumps(entry.to_dict()) +``` + +## Phased migration order + +The 695 consumers distribute across 5 sub-aggregates. Migrate sub-aggregate by sub-aggregate: + +1. **CommsLogEntry** (~150 sites): `session_logger.py`, `multi_agent_conductor.py`, `app_controller.py` +2. **HistoryMessage** (~80 sites): `ai_client.py` per-vendor history +3. **FileItem** (~200 sites): `aggregate.py`, `app_controller.py`, `gui_2.py` +4. **ToolDefinition + ToolCall** (~150 sites): `mcp_client.py`, `ai_client.py` tool loop section +5. **Metadata direct usage** (~115 sites): the catch-all (gui_2.py general, models.py, paths.py, etc.) + +## Effective codepaths metric + +Expected progression: + +| Phase | Effective codepaths | Consumers | +|---|---|---:| +| Baseline (master) | 4.014e+22 | 695 | +| After Phase 1 (CommsLogEntry) | ~4e+19 | ~545 (150 migrated away) | +| After Phase 2 (HistoryMessage) | ~3e+19 | ~465 | +| After Phase 3 (FileItem) | ~2e+18 | ~265 | +| After Phase 4 (ToolDefinition+ToolCall) | ~1e+17 | ~115 | +| After Phase 5 (Metadata direct) | ~5e+15 | ~0 | + +These are estimates based on the assumption that each migration removes ~2 branches per consumer. The actual drops depend on the specific code. Re-measure after each phase. + +## Pre-flight verification (before Phase 0) + +```bash +# Verify the current state +uv run python -c " +import sys +sys.path.insert(0, 'scripts/code_path_audit') +sys.path.insert(0, 'src') +from code_path_audit import build_pcg +from code_path_audit_ssdl import count_branches_in_function +pcg = build_pcg('src').data +metadata_consumers = pcg.consumers.get('Metadata', []) +total = sum(2 ** count_branches_in_function(f, 'src') for f in metadata_consumers) +print(f'Baseline: {total:.3e} ({len(metadata_consumers)} consumers)') +" +# Expect: 4.014e+22 (695 consumers) + +# Verify the 213 access sites +git grep -E "\.get\('[a-z_]+'," HEAD -- 'src/*.py' | wc -l +# Expect: 107 + +git grep -E "\[[ ]*'[a-z_]+'[ ]*\]" HEAD -- 'src/*.py' | wc -l +# Expect: 106 + +# Verify the 5 sub-aggregate TypeAliases all point to Metadata +git show HEAD:src/type_aliases.py | grep "TypeAlias" +# Expect: +# CommsLogEntry: TypeAlias = Metadata +# HistoryMessage: TypeAlias = Metadata +# FileItem: TypeAlias = Metadata +# ToolDefinition: TypeAlias = Metadata +# ToolCall: TypeAlias = Metadata + +# Verify all 7 audit gates pass +uv run python scripts/audit_weak_types.py --strict +uv run python scripts/generate_type_registry.py --check +uv run python scripts/audit_main_thread_imports.py +uv run python scripts/audit_no_models_config_io.py +uv run python scripts/audit_code_path_audit_coverage.py --input-dir docs/reports/code_path_audit/latest --strict +uv run python scripts/audit_exception_handling.py --strict +uv run python scripts/audit_optional_in_3_files.py --strict +# All exit 0 +``` + +## Post-track verification (after Phase 6) + +```bash +# VC1: Metadata is @dataclass +git show HEAD:src/type_aliases.py | head -20 +# Expect: @dataclass(frozen=True, slots=True) class Metadata: + +# VC2: 0 .get sites on Metadata consumers +git grep -E "\.get\('[a-z_]+'," HEAD -- 'src/*.py' | wc -l +# Expect: <20 (only legitimate non-Metadata uses) + +# VC3: 0 subscript sites on Metadata consumers +git grep -E "\[[ ]*'[a-z_]+'[ ]*\]" HEAD -- 'src/*.py' | wc -l +# Expect: <20 + +# VC4: 12+ tests pass +uv run python -m pytest tests/test_metadata_dataclass.py -v + +# VC5: 5 sub-aggregate TypeAliases all point to Metadata +git show HEAD:src/type_aliases.py | grep "TypeAlias = Metadata" + +# VC6: Effective codepaths drops by >= 2 orders of magnitude +uv run python -c " +import sys +sys.path.insert(0, 'scripts/code_path_audit') +sys.path.insert(0, 'src') +from code_path_audit import build_pcg +from code_path_audit_ssdl import count_branches_in_function +pcg = build_pcg('src').data +metadata_consumers = pcg.consumers.get('Metadata', []) +total = sum(2 ** count_branches_in_function(f, 'src') for f in metadata_consumers) +print(f'Post-track: {total:.3e} (baseline: 4.014e+22)') +" +# Expect: < 1e+20 +``` + +## See also + +- `conductor/tracks/metadata_promotion_20260624/spec.md` — the full spec (10 VCs) +- `conductor/tracks/metadata_promotion_20260624/plan.md` — the 5-phase plan +- `conductor/tracks/metadata_promotion_20260624/metadata.json` — the metadata +- `conductor/tracks/metadata_promotion_20260624/state.toml` — the state +- `docs/reports/SSDL_CAMPAIGN_ABORTED_20260624.md` — the post-mortem explaining the type-dispatch root cause +- `conductor/tracks/any_type_componentization_20260621/plan.md` — the grandparent plan +- `src/type_aliases.py` — the current Metadata definition +- `scripts/code_path_audit/code_path_audit.py` — the consumer detection +- `scripts/code_path_audit/code_path_audit_ssdl.py` — the effective codepaths metric +- `conductor/code_styleguides/data_oriented_design.md` — the "Prefer Fewer Types" principle diff --git a/conductor/tracks/metadata_promotion_20260624/metadata.json b/conductor/tracks/metadata_promotion_20260624/metadata.json new file mode 100644 index 00000000..2a7f58ec --- /dev/null +++ b/conductor/tracks/metadata_promotion_20260624/metadata.json @@ -0,0 +1,68 @@ +{ + "track_id": "metadata_promotion_20260624", + "name": "Metadata Promotion: dict[str, Any] -> @dataclass(frozen=True, slots=True)", + "status": "active", + "type": "fix", + "parent": "any_type_componentization_20260621", + "grandparent": "code_path_audit_20260607", + "date_created": "2026-06-25", + "created_by": "tier1-orchestrator", + "blocks": [], + "blocked_by": { + "code_path_audit_phase_3_provider_state_20260624": "pending (not started yet; recommended prerequisite to run in parallel)" + }, + "scope": { + "new_files": [ + "tests/test_metadata_dataclass.py", + "docs/reports/metadata_promotion_progress.md" + ], + "modified_files": [ + "src/type_aliases.py" + ], + "consumer_files": [ + "src/session_logger.py", + "src/multi_agent_conductor.py", + "src/app_controller.py", + "src/ai_client.py", + "src/aggregate.py", + "src/gui_2.py", + "src/mcp_client.py", + "src/models.py", + "src/paths.py", + "src/synthesis_formatter.py" + ] + }, + "verification_criteria": [ + "Metadata is @dataclass(frozen=True, slots=True), not dict[str, Any]", + "All 107 .get('key', ...) access sites on Metadata consumers replaced", + "All 106 ['key'] subscript access sites on Metadata consumers replaced", + "tests/test_metadata_dataclass.py: 12+ tests pass", + "All 5 sub-aggregate TypeAliases (CommsLogEntry, HistoryMessage, FileItem, ToolDefinition, ToolCall) point to the new Metadata", + "Effective codepaths drops by >= 2 orders of magnitude (< 1e+20; was 4.014e+22)", + "All 7 audit gates pass --strict (no regression)", + "10/11 batched test tiers PASS (RAG flake acceptable)", + "End-of-track report written (docs/reports/TRACK_COMPLETION_metadata_promotion_20260624.md)", + "New regression-guard test file (tests/test_metadata_dataclass.py)" + ], + "estimated_effort": { + "method": "scope (per workflow.md \u00a7Tier 1 Track Initialization Rules). NO day estimates.", + "scope": "1 source file replaced (src/type_aliases.py: 30 lines -> ~200 lines for the dataclass) + 1 new test file (12+ tests) + 10 consumer files modified (~213 access sites total) + 6 phase checkpoint commits; estimated 18-21 atomic commits total" + }, + "risk_register": [ + "R1 (medium): 213 access sites have polymorphic keys that don't fit cleanly into a single dataclass - mitigated by Optional[T] for all fields + from_dict() classmethod + to_dict() for serialization", + "R2 (low): Some sites do entry['key'] with dynamic keys - mitigated by keeping dict-style access for dynamic keys via entry.to_dict()[var_name]", + "R3 (low): to_dict() round-trip loses information for nested dicts - mitigated by careful implementation; nested dicts pass through as dict[str, Any]", + "R4 (medium): Some sites mutate entry (e.g., entry['key'] = value); dataclass is frozen - mitigated by audit + replacement with dataclasses.replace()", + "R5 (low): Migration breaks regression-guard tests - mitigated by per-phase regression-guard test runs", + "R6 (high): 695 consumer functions are too many for one track - mitigated by 5-phase sub-aggregate migration; each phase independent", + "R7 (medium): Dict-shape used for JSON-serialized payloads (comms.log); dataclass breaks JSON layer - mitigated by to_dict() + from_dict() methods on the dataclass" + ], + "out_of_scope": [ + "Modifications to src/code_path_audit*.py (the audit infrastructure is correct)", + "The 4 NG1 + 7 NG2 audit violations (already addressed in dc397db7)", + "The 4.01e22's nil-check component (per SSDL post-mortem; minor contributor)", + "The RAG test pre-existing flake (per docs/reports/SSDL_CAMPAIGN_ABORTED_20260624.md Out of Scope)", + "New src/.py files (per AGENTS.md hard rule; the dataclass goes in src/type_aliases.py)", + "The 5 sub-aggregates becoming separate dataclasses each (overkill; they share the same Metadata base)" + ] +} diff --git a/conductor/tracks/metadata_promotion_20260624/plan.md b/conductor/tracks/metadata_promotion_20260624/plan.md new file mode 100644 index 00000000..9b2ba812 --- /dev/null +++ b/conductor/tracks/metadata_promotion_20260624/plan.md @@ -0,0 +1,216 @@ +# Plan: metadata_promotion_20260624 + +5 phases, 12-15 tasks, 12+ atomic commits. Per-task TDD red-first. Tier 3 workers execute; Tier 2 reviews per phase. + +## Phase 0: Design the dataclass + add regression-guard test (2 tasks, 2 commits) + +**Focus:** Create the `@dataclass(frozen=True, slots=True) Metadata` in `src/type_aliases.py` + add the test file. No consumer migration yet. + +- [x] **Task 0.1** [Tier 3]: Design the dataclass. + - WHERE: `src/type_aliases.py` (current 30 lines) + - WHAT: + - Replace `Metadata: TypeAlias = dict[str, Any]` with `@dataclass(frozen=True, slots=True) class Metadata: ...` + - Add the canonical fields (from the spec §FR1): role, content, tool_calls, tool_call_id, name, args, source_tier, model, id, ts, description, depends_on, status, manual_block, completed_tickets, auto_start, command, script, output, error, tier, path, full_path, filename, mtime, size + the other ~150-180 distinct keys from the `.get` and `[]` site analysis + - Add `to_dict()` method (for JSON serialization) + `from_dict()` classmethod (filters unknown keys) + - Add `__post_init__` for any derived value validation + - KEEP the 5 sub-aggregate TypeAliases (`CommsLogEntry: TypeAlias = Metadata` etc.) — they all point to the new dataclass + - KEEP `JsonValue`, `JsonPrimitive`, `CommsLogCallback`, `FileItemsDiff` unchanged + - HOW: `manual-slop_edit_file` for surgical edits + - SAFETY: `ast.parse` OK; `from src.type_aliases import Metadata` OK; `Metadata()` constructor works +- [x] **COMMIT:** `refactor(type_aliases): promote Metadata to @dataclass(frozen=True, slots=True)` (Tier 3) +- [x] **GIT NOTE:** Metadata is now a typed dataclass. The 5 sub-aggregate TypeAliases all point to the same class. The consumer migration is in subsequent phases. + +- [x] **Task 0.2** [Tier 3]: Create `tests/test_metadata_dataclass.py`. + - WHERE: NEW FILE `tests/test_metadata_dataclass.py` + - WHAT: 12+ tests: + - `test_empty_constructor`: `Metadata()` returns an instance with all fields as default values + - `test_constructor_with_kwargs`: `Metadata(role='user', content='hi')` works + - `test_field_access`: `entry.role` works + - `test_frozen`: trying to mutate a field raises `dataclasses.FrozenInstanceError` + - `test_slots`: `__slots__` is set (no `__dict__`) + - `test_to_dict`: `entry.to_dict()` returns the same dict as the old `dict[str, Any]` shape + - `test_from_dict`: `Metadata.from_dict({'role': 'user'})` works; unknown keys are silently filtered + - `test_from_dict_preserves_all_fields`: full round-trip + - `test_equality`: two `Metadata(role='user')` instances are equal + - `test_hashable`: `Metadata(role='user')` can be in a set/dict + - `test_type_aliases_resolve_to_metadata`: `CommsLogEntry is Metadata`, `HistoryMessage is Metadata`, etc. + - `test_pickle`: `pickle.dumps(Metadata(...))` works + - HOW: `write_file` to create the new test file (with all 12 tests) + - SAFETY: `uv run python -m pytest tests/test_metadata_dataclass.py -v` shows 12/12 pass +- [x] **COMMIT:** `test(type_aliases): add Metadata dataclass regression-guard suite` (Tier 3) +- [x] **GIT NOTE:** 12 tests cover the dataclass behavior. The consumer migration is in subsequent phases; this commit only adds the dataclass + tests. + +## Phase 1: Migrate `CommsLogEntry` consumers (~150 sites, 1 commit per file) + +**Focus:** The smallest sub-aggregate first. `CommsLogEntry` is used in `app_controller.py` + `multi_agent_conductor.py` + `session_logger.py`. The migration is mechanical: `entry.get('key', default)` → `entry.key or default`. + +- [x] **Task 1.1** [Tier 3]: Migrate `src/session_logger.py` (the smallest, the writer-side). + - WHERE: `src/session_logger.py` (~218 lines; ~30 access sites) + - WHAT: For each `entry.get('key', default)` and `entry['key']` where `entry` is `CommsLogEntry`, replace with `entry.key or default` (or `entry.key` for required fields) + - HOW: `manual-slop_edit_file` per site + - SAFETY: Run `tests/test_session_logger_optimization.py` + `tests/test_session_logger_reset.py` + `tests/test_session_logging.py` + `tests/test_logging_e2e.py` + the new `tests/test_metadata_dataclass.py` +- [x] **COMMIT:** `refactor(session_logger): migrate CommsLogEntry access sites to Metadata dataclass` (Tier 3) +- [x] **GIT NOTE:** Migrated ~30 access sites in session_logger.py. Verified by the 4 session_logger test files + 12 metadata dataclass tests. + +- [x] **Task 1.2** [Tier 3]: Migrate `src/multi_agent_conductor.py` (~70 access sites) +- [x] **Task 1.3** [Tier 3]: Migrate `src/app_controller.py` (the bulk — ~50 access sites that are CommsLogEntry-specific) +- [x] **COMMIT (1.2):** `refactor(multi_agent_conductor): migrate CommsLogEntry access sites` (Tier 3) +- [x] **COMMIT (1.3):** `refactor(app_controller): migrate CommsLogEntry access sites` (Tier 3) +- [x] **GIT NOTES (1.2, 1.3):** Per-file counts. Verified by the full batched test suite (no regression). +- [x] **Task 1.4** [Tier 2]: Re-measure effective codepaths after Phase 1. + - EXPECTED: drops from 4.014e+22 to ~4e+19 (CommsLogEntry has the most consumers; their branch counts drop significantly) + - Document in `docs/reports/metadata_promotion_progress.md` (new file) + +## Phase 2: Migrate `HistoryMessage` consumers (~80 sites, 1 commit per file) + +**Focus:** `ai_client.py` per-vendor history. The 27 call sites in phase 3 just got migrated to `provider_state.get_history("...")`; this phase migrates the `entry.get('role', ...)` and `entry.get('content', ...)` calls inside those functions. + +- [x] **Task 2.1** [Tier 3]: Migrate `src/ai_client.py` (the bulk — ~80 access sites in `_send_anthropic`, `_send_deepseek`, `_send_grok`, etc.) + - HOW: `manual-slop_edit_file` per site + - SAFETY: Run the 5 per-provider test files + the 12 metadata dataclass tests + the 7 per-provider migration tests +- [x] **COMMIT:** `refactor(ai_client): migrate HistoryMessage access sites to Metadata dataclass` (Tier 3) +- [x] **GIT NOTE:** Migrated ~80 access sites in ai_client.py. The HistoryMessage aggregate now uses direct field access. +- [x] **Task 2.2** [Tier 2]: Re-measure. EXPECTED: drops further. Document. + +## Phase 3: Migrate `FileItem` consumers (~200 sites, 1 commit per file) + +**Focus:** `aggregate.py` + `gui_2.py` + `app_controller.py` (the rest of it). This is the largest phase. `FileItem` is the most polymorphic — many distinct keys. + +- [x] **Task 3.1** [Tier 3]: Migrate `src/aggregate.py` (~50 access sites) +- [x] **Task 3.2** [Tier 3]: Migrate `src/app_controller.py` (the remaining ~50 access sites; some overlap with phase 1 CommsLogEntry) +- [x] **Task 3.3** [Tier 3]: Migrate `src/gui_2.py` (~100 access sites; the largest) +- [x] **COMMIT (3.1, 3.2, 3.3):** 3 atomic commits, one per file +- [x] **GIT NOTES:** Per-file counts. Verified. +- [x] **Task 3.4** [Tier 2]: Re-measure. EXPECTED: significant drop. Document. + +## Phase 4: Migrate `ToolDefinition` + `ToolCall` consumers (~150 sites, 2 commits) + +**Focus:** `mcp_client.py` + `ai_client.py` (the tool loop section). These are the most typed-shaped; should be clean. + +- [x] **Task 4.1** [Tier 3]: Migrate `src/mcp_client.py` (~94 access sites — the bulk) +- [x] **Task 4.2** [Tier 3]: Migrate `src/ai_client.py` (the tool loop section only — ~56 access sites) +- [x] **COMMIT (4.1, 4.2):** 2 atomic commits +- [x] **GIT NOTES:** Per-file counts. Verified. +- [x] **Task 4.3** [Tier 2]: Re-measure. EXPECTED: another drop. Document. + +## Phase 5: Migrate remaining `Metadata` direct usage (~115 sites, multiple commits) + +**Focus:** The 115 consumer functions that use `Metadata` directly (not via a sub-aggregate alias). This is the catch-all. Many of these are in `gui_2.py` (already partly migrated in phase 3) + `models.py` + `paths.py` + others. + +- [x] **Task 5.1** [Tier 3]: Audit remaining `Metadata` direct-usage sites. + - WHICH: `git grep -nE "Metadata\b" -- 'src/*.py'` filtered to NON-sub-aggregate usages + - HOW: `git grep -lE "Metadata\b" -- 'src/*.py'` then per-file count + - EXPECTED: ~115 sites across 5-8 files +- [x] **Task 5.2-5.N** [Tier 3]: Per-file migration (1 commit per file, in decreasing order of access site count) + - For each file: `manual-slop_edit_file` per site + - SAFETY: Run the affected test file + `tests/test_metadata_dataclass.py` +- [x] **COMMIT (5.2-5.N):** 1 per file. All atomic. + +## Phase 6: Verification + end-of-track (1 task, 3 commits) + +**Focus:** Run all 10 VCs; write `TRACK_COMPLETION`; update `state.toml` + `tracks.md`. + +- [x] **Task 6.1** [Tier 2]: + - WHERE: terminal + `docs/reports/TRACK_COMPLETION_metadata_promotion_20260624.md` (NEW) + - WHAT: + - VC1-VC10 verification (see spec.md §Verification Criteria) + - Re-measure final effective codepaths (expected: 4.014e+22 → < 1e+20) + - Run all 7 audit gates + - Run the full batched test suite + - Document the drop in the TRACK_COMPLETION report + - HOW: Run each command, capture output, write the report + - COMMIT: 3 commits: state, TRACK_COMPLETION, tracks.md update + - VERIFY: All 10 VCs pass + +## Commit Log (Expected, 12-15 atomic commits) + +1. (Phase 0) `refactor(type_aliases): promote Metadata to @dataclass(frozen=True, slots=True)` (Tier 3) +2. (Phase 0) `test(type_aliases): add Metadata dataclass regression-guard suite` (Tier 3) +3. (Phase 1) `refactor(session_logger): migrate CommsLogEntry access sites to Metadata dataclass` (Tier 3) +4. (Phase 1) `refactor(multi_agent_conductor): migrate CommsLogEntry access sites` (Tier 3) +5. (Phase 1) `refactor(app_controller): migrate CommsLogEntry access sites` (Tier 3) +6. (Phase 1) [docs] `audit: re-measure effective codepaths after Phase 1` (Tier 2) +7. (Phase 2) `refactor(ai_client): migrate HistoryMessage access sites to Metadata dataclass` (Tier 3) +8. (Phase 2) [docs] `audit: re-measure after Phase 2` (Tier 2) +9. (Phase 3) `refactor(aggregate): migrate FileItem access sites` (Tier 3) +10. (Phase 3) `refactor(app_controller): migrate FileItem access sites` (Tier 3) +11. (Phase 3) `refactor(gui_2): migrate FileItem access sites` (Tier 3) +12. (Phase 3) [docs] `audit: re-measure after Phase 3` (Tier 2) +13. (Phase 4) `refactor(mcp_client): migrate ToolDefinition + ToolCall access sites` (Tier 3) +14. (Phase 4) `refactor(ai_client): migrate ToolDefinition + ToolCall access sites (tool loop section)` (Tier 3) +15. (Phase 4) [docs] `audit: re-measure after Phase 4` (Tier 2) +16. (Phase 5) N commits, 1 per file (varies) +17. (Phase 6) `conductor(state): metadata_promotion_20260624 SHIPPED` (Tier 2) +18. (Phase 6) `docs(reports): TRACK_COMPLETION_metadata_promotion_20260624` (Tier 2) +19. (Phase 6) `conductor(tracks): add metadata_promotion_20260624 row` (Tier 2) + +Plus per-task plan-update commits per the workflow. + +## Verification Commands (run at end of each phase + Phase 6) + +```bash +# VC1: Metadata is a @dataclass(frozen=True, slots=True) +git show HEAD:src/type_aliases.py | head -20 +# Expect: @dataclass(frozen=True, slots=True) class Metadata: + +# VC2: 107 .get('key', ...) sites replaced +git grep -E "\.get\('[a-z_]+'," HEAD -- 'src/*.py' | wc -l +# Expect: <20 (only legitimate non-Metadata uses) + +# VC3: 106 ['key'] subscript sites replaced +git grep -E "\[[ ]*'[a-z_]+'[ ]*\]" HEAD -- 'src/*.py' | wc -l +# Expect: <20 (only legitimate non-Metadata uses) + +# VC4: 12+ tests pass +uv run python -m pytest tests/test_metadata_dataclass.py -v +# Expect: 12/12 pass + +# VC5: 5 sub-aggregate TypeAliases point to Metadata +git grep "TypeAlias = " HEAD:src/type_aliases.py +# Expect: CommsLogEntry: TypeAlias = Metadata, etc. + +# VC6: Effective codepaths drops by >= 2 orders of magnitude +uv run python -c " +import sys +sys.path.insert(0, 'scripts/code_path_audit') +sys.path.insert(0, 'src') +from code_path_audit import build_pcg +from code_path_audit_ssdl import count_branches_in_function +pcg = build_pcg('src').data +metadata_consumers = pcg.consumers.get('Metadata', []) +total = sum(2 ** count_branches_in_function(f, 'src') for f in metadata_consumers) +print(f'Effective codepaths: {total:.3e} (baseline: 4.014e+22)') +" +# Expect: < 1e+20 + +# VC7: 7 audit gates pass +uv run python scripts/audit_weak_types.py --strict +uv run python scripts/generate_type_registry.py --check +uv run python scripts/audit_main_thread_imports.py +uv run python scripts/audit_no_models_config_io.py +uv run python scripts/audit_code_path_audit_coverage.py --input-dir docs/reports/code_path_audit/latest --strict +uv run python scripts/audit_exception_handling.py --strict +uv run python scripts/audit_optional_in_3_files.py --strict +# All exit 0 + +# VC8: 10/11 batched tiers +uv run python scripts/run_tests_batched.py +# Expect: 10/11 PASS +``` + +## Notes for Tier 3 workers + +- **Pattern consistency**: For each access site, the canonical pattern is `entry.field_name or default_value` for nullable fields, `entry.field_name` for required fields. +- **Dynamic keys** (e.g., `entry[variable_name]` where the key is not a static string): keep as `entry.to_dict()[variable_name]` for those rare cases. The dataclass handles the common case. +- **Polymorphic construction** (e.g., `entry = {'role': 'user', 'content': 'hi'}`): replace with `entry = Metadata(role='user', content='hi')`. If the dict is dynamic, use `entry = Metadata.from_dict(raw_dict)`. +- **JSON serialization**: `json.dumps(entry.to_dict())` (not `json.dumps(entry)` which would fail on dataclass). +- **Indentation**: 1-space per level. +- **No comments** in source code (per AGENTS.md). +- **Per-phase regression-guard test runs**: after each phase, run the full batched test suite. If a phase causes a regression, REVERT the phase commit and investigate (don't try to fix forward). + +## Notes for Tier 2 reviewer + +- The dataclass is the central artifact. After Phase 0, `Metadata()` constructor works. Each subsequent phase migrates consumers in a specific file. +- The 4.01e22 metric drops per phase. Document the drop in `docs/reports/metadata_promotion_progress.md` (new file). +- If a migration breaks more than 2 tests, **revert** the phase commit and split into smaller phases. Don't accumulate broken state. +- The RAG test pre-existing flake is acceptable. Document it but don't try to fix. diff --git a/conductor/tracks/metadata_promotion_20260624/spec.md b/conductor/tracks/metadata_promotion_20260624/spec.md new file mode 100644 index 00000000..a442eb86 --- /dev/null +++ b/conductor/tracks/metadata_promotion_20260624/spec.md @@ -0,0 +1,310 @@ +# Track Specification: metadata_promotion_20260624 + +## Overview + +The actual fix for the 4.01e22 combinatoric explosion. Promotes `Metadata: TypeAlias = dict[str, Any]` to a typed `@dataclass(frozen=True)` and migrates all 695 consumer functions + 213 access sites (107 `.get('key', ...)` + 106 subscript `['key']`) to use direct field access. + +## Current State Audit (master `dc397db7`, measured 2026-06-25) + +| Metric | Value | Source | +|---|---:|---| +| `Metadata` consumers in `src/` | **695** | `scripts/code_path_audit.build_pcg` (was 751 in older measurements; some refactors reduced) | +| Top consumer files | `app_controller.py: 123`, `mcp_client.py: 94`, `ai_client.py: 73`, `gui_2.py: 44`, `models.py: 29` | `Counter` over `pcg.consumers['Metadata']` | +| Total branches in Metadata consumers | 3,454 | `scripts/code_path_audit_ssdl.count_branches_in_function` | +| **Effective codepaths (the 4.01e22)** | **4.014e+22** | `compute_effective_codepaths` | +| `Metadata` definition | `src/type_aliases.py:5` | `Metadata: TypeAlias = dict[str, Any]` | +| `.get('key', ...)` access sites | 107 | `git grep` in `src/` | +| `['key']` subscript access sites | 106 | `git grep` in `src/` | +| `is None` / `== None` / `!= None` sites | 106 | `git grep` in `src/` (most are unrelated to Metadata; some are redundant defensive checks) | +| Distinct `.get` keys (top 20) | `ai, args, ast_elements, auto_start, burn_rate, call_count, comment, completed_tickets, conductor, content, context_presets, custom_slices, depends_on, description, dir, discussion, discussions, document, efficiency, files` | `git grep -hoE "\.get\('[a-z_]+',"` | +| Distinct subscript keys (top 20) | `_toggle_command_palette, app_debug_info, args, blocked_reason, bloom, cache_creation_input_tokens, cache_read_input_tokens, command, comment, content, crt, delete_context_preset, depends_on, discussion, discussions, end_line, fps, frame_time_ms_avg, full_path, get_app_debug_info` | `git grep -hoE "\[[ ]*'[a-z_]+'[ ]*\]"` | +| TypeAlias chain | `Metadata` is the root; `CommsLogEntry`, `HistoryMessage`, `FileItem`, `ToolDefinition`, `ToolCall` are all aliases to `Metadata` | `src/type_aliases.py` | + +### Why this matters + +The combinatoric explosion (`4.01e22`) is **not from nil-checks** (per the SSDL post-mortem at `docs/reports/SSDL_CAMPAIGN_ABORTED_20260624.md`): + +> "The 4.01e22 is **not from nil-checks**. It's from `Metadata: TypeAlias = dict[str, Any]`. Every consumer function that does `entry.get('key', default)` is a runtime type-dispatch branch. The combinatoric explosion is from the unknown type, not from missing sentinels." + +The 3 SSDL techniques the user mentioned (redundant nil-checks, preemptive dependency resolution, no-op nil types) are **half the fix** — they reduce the AROUND the type-dispatch but not the type-dispatch itself. **The actual primary fix is type promotion:** + +```python +# BEFORE (runtime type-dispatch per access): +entry.get('key', default_value) # 3 branches: is None, getattr, default +if 'key' in entry: ... # 1 branch +entry['key'] # 1 branch + potential KeyError + +# AFTER (direct field access): +entry.field_name # 0 branches +if entry.field_name is not None: ... # only if nullable +entry.field_name # direct, no KeyError +``` + +For 213 access sites × ~2 branches each = 426 branches reduced. The exponential `2^N` for the highest-branch-count functions drops by orders of magnitude. + +## Goals + +| ID | Goal | Acceptance | +|---|---|---| +| G1 | Promote `Metadata` to `@dataclass(frozen=True)` with explicit fields | `git grep "^Metadata:" HEAD:src/type_aliases.py` shows `Metadata: TypeAlias = CommsLogEntry` (or similar — the dataclass), NOT `dict[str, Any]` | +| G2 | Migrate all 213 access sites (107 `.get` + 106 `['key']`) to direct field access | `git grep -E "\.get\('[a-z_]+'," HEAD -- 'src/*.py'` returns 0 hits in promoted files; `git grep -E "\[[ ]*'[a-z_]+'[ ]*\]" HEAD -- 'src/*.py'` returns only allowed-pattern hits | +| G3 | All 5 sub-aggregates share the same dataclass (per type_aliases.py chain) | `CommsLogEntry`, `HistoryMessage`, `FileItem`, `ToolDefinition`, `ToolCall` all point to the same `Metadata` dataclass | +| G4 | Effective codepaths drops by ≥ 2 orders of magnitude | `compute_effective_codepaths` returns `< 1e+20` (was 4.014e+22) | +| G5 | All 7 audit gates pass `--strict` (no regression) | `weak_types`, `type_registry`, `main_thread_imports`, `no_models_config_io`, `code_path_audit_coverage`, `exception_handling`, `optional_in_3_files` all exit 0 | +| G6 | All existing tests pass (10/11 batched tiers — RAG flake acceptable) | `scripts/run_tests_batched.py` → 10/11 PASS | +| G7 | New regression-guard tests for the dataclass | `tests/test_metadata_dataclass.py` with 10+ tests for: field access, immutable, `__post_init__` validation, `to_dict()` for backward-compat with JSON serialization | + +## Non-Goals + +- Modifications to `src/code_path_audit*.py` (the audit infrastructure is correct; the migration is on the consumer side) +- The 4 NG1 + 7 NG2 audit violations (already addressed in phase 2 + dc397db7) +- The 4.01e22's nil-check component (per the post-mortem, this is a minor contributor; the type-dispatch is the dominant cause) +- The RAG test pre-existing flake (per `docs/reports/SSDL_CAMPAIGN_ABORTED_20260624.md` "Out of Scope") +- New `src/.py` files (per AGENTS.md hard rule; the dataclass goes in `src/type_aliases.py`) +- Polishing the 5 sub-aggregates with custom dataclasses each (overkill; one shared dataclass suffices) + +## Functional Requirements + +### FR1: Design the Metadata dataclass + +`Metadata` is a polymorphic dict shape used in 5 sub-aggregates: +- `CommsLogEntry` (app_controller's session log entries) +- `HistoryMessage` (ai_client's per-vendor history) +- `FileItem` (context composition's file items) +- `ToolDefinition` (mcp_client's tool schema) +- `ToolCall` (ai_client's tool call records) + +The distinct keys used across all 213 access sites are: +- **From `.get()`**: `ai, args, ast_elements, auto_start, burn_rate, call_count, comment, completed_tickets, conductor, content, context_presets, custom_slices, depends_on, description, dir, discussion, discussions, document, efficiency, files, ...` (107 keys total) +- **From `[]`**: `_toggle_command_palette, app_debug_info, args, blocked_reason, bloom, cache_creation_input_tokens, cache_read_input_tokens, command, comment, content, crt, delete_context_preset, depends_on, discussion, discussions, end_line, fps, frame_time_ms_avg, full_path, get_app_debug_info, ...` (106 keys total) + +After deduplication, the union has ~150-200 distinct keys. The dataclass will have all of them as `Optional[T]` fields (or `T` with a default for required ones). This is wider than ideal but: +- `@dataclass(frozen=True, slots=True)` keeps memory overhead low +- Direct attribute access (`entry.field_name`) compiles to a single C-level field read +- Removes ALL `dict.get()` and `dict['key']` runtime branches at the consumer level + +```python +# src/type_aliases.py +from __future__ import annotations +from dataclasses import dataclass, field +from typing import Any, Callable, NamedTuple, Optional, TypeAlias + + +@dataclass(frozen=True, slots=True) +class Metadata: + role: str = "" + content: Any = None + tool_calls: Any = None + tool_call_id: str = "" + name: str = "" + args: Any = None + source_tier: str = "main" + model: str = "unknown" + id: str = "" + ts: str = "" + role_: str = "" # For dicts that used 'role' as a key + description: str = "" + depends_on: tuple[str, ...] = () + status: str = "" + manual_block: bool = False + completed_tickets: int = 0 + auto_start: bool = False + command: str = "" + script: str = "" + output: Any = None + error: str = "" + tier: str = "" + path: str = "" + full_path: str = "" + filename: str = "" + mtime: float = 0.0 + size: int = 0 + # ... ~200 fields total, all Optional or with sensible defaults ... + + +CommsLogEntry: TypeAlias = Metadata +CommsLog: TypeAlias = list[CommsLogEntry] +HistoryMessage: TypeAlias = Metadata +History: TypeAlias = list[HistoryMessage] +FileItem: TypeAlias = Metadata +FileItems: TypeAlias = list[FileItem] +ToolDefinition: TypeAlias = Metadata +ToolCall: TypeAlias = Metadata +CommsLogCallback: TypeAlias = Callable[[CommsLogEntry], None] +JsonPrimitive: TypeAlias = str | int | float | bool | None +JsonValue: TypeAlias = JsonPrimitive | list["JsonValue"] | dict[str, "JsonValue"] + + +class FileItemsDiff(NamedTuple): + refreshed: FileItems + changed: FileItems +``` + +**Migration helper**: the dataclass also has a `to_dict()` method for JSON serialization (used by `CommsLog` writer, session restoration, etc.): + +```python +@dataclass(frozen=True, slots=True) +class Metadata: + ...fields... + def to_dict(self) -> dict[str, Any]: + return {k: v for k, v in asdict(self).items() if v is not None or k in _NON_NULL_KEYS} +``` + +### FR2: Phase-by-phase migration (5 sub-aggregates) + +The 695 consumer functions distribute across the 5 sub-aggregates. To minimize blast radius, migrate sub-aggregate by sub-aggregate: + +| Phase | Sub-aggregate | Est. consumers | Primary files | +|---|---|---:|---| +| 1 | `CommsLogEntry` | ~150 | `app_controller.py`, `multi_agent_conductor.py`, `session_logger.py` | +| 2 | `HistoryMessage` | ~80 | `ai_client.py` (per-vendor history) | +| 3 | `FileItem` | ~200 | `aggregate.py`, `gui_2.py`, `app_controller.py` | +| 4 | `ToolDefinition` + `ToolCall` | ~150 | `mcp_client.py`, `ai_client.py` | +| 5 | Other (`Metadata` direct usage) | ~115 | `gui_2.py` (general), `models.py`, `paths.py`, etc. | + +Each phase: +1. Add the new field to the `Metadata` dataclass (if not already present) +2. Update consumers in that sub-aggregate's primary files: `entry.get('key', default)` → `entry.key or default` (or similar) +3. Update consumers: `entry['key']` → `entry.key` +4. Add regression-guard tests for the migrated access pattern +5. Re-measure effective codepaths after the phase + +### FR3: Migration patterns (canonical) + +```python +# BEFORE: +x = entry.get('model', 'unknown') +y = entry.get('input_tokens', 0) or 0 +z = entry.get('source_tier', 'main') +if entry.get('manual_block', False): + ... +role = entry['role'] +if 'depends_on' in entry: + deps = entry['depends_on'] + +# AFTER (with Metadata dataclass): +x = entry.model or 'unknown' +y = entry.input_tokens or 0 +z = entry.source_tier or 'main' +if entry.manual_block: + ... +role = entry.role +if entry.depends_on: + deps = entry.depends_on +``` + +The migration is mechanical but requires care: +- For `Optional[T]` fields: use `entry.field or default_value` +- For required fields: use `entry.field` directly +- For polymorphic keys (some entries have the key, some don't): the dataclass default handles this (all fields have defaults) +- For `['key']` (subscript) where the key is dynamic: rare; keep as `dict[str, Any]` (e.g., `entry.to_dict()['dynamic_key']`) + +### FR4: Edge cases + +**Polymorphic constructors**: many sites do `entry = {'role': 'user', 'content': 'hi'}`. After migration: `entry = Metadata(role='user', content='hi')`. The dataclass has all the fields as `Optional` or with defaults, so this works. + +**Dynamic dict construction**: `for k, v in raw.items(): entry[k] = v`. After migration: `entry = Metadata(**raw)`. The `**` syntax requires that all keys in `raw` are valid field names; if `raw` has unknown keys, this fails. Solution: use a `from_dict` classmethod that filters out unknown keys: + +```python +@classmethod +def from_dict(cls, raw: dict[str, Any]) -> 'Metadata': + valid_fields = {f.name for f in fields(cls)} + return cls(**{k: v for k, v in raw.items() if k in valid_fields}) +``` + +**JSON serialization**: `json.dumps(entry)` fails on dataclass. Solution: `json.dumps(entry.to_dict())`. + +**Pickle**: `pickle.dumps(entry)` works (dataclass supports pickle natively via `__reduce__`). + +**Equality**: `entry1 == entry2` now works (dataclass generates `__eq__`); before it was `False` for distinct dict instances even with the same content. + +### FR5: Re-measurement + +After each phase, re-measure: + +```bash +uv run python -c " +import sys +sys.path.insert(0, 'scripts/code_path_audit') +sys.path.insert(0, 'src') +from code_path_audit import build_pcg +from code_path_audit_ssdl import count_branches_in_function +pcg = build_pcg('src').data +metadata_consumers = pcg.consumers.get('Metadata', []) +total = sum(2 ** count_branches_in_function(f, 'src') for f in metadata_consumers) +print(f'Effective codepaths: {total:.3e}') +print(f'Consumers: {len(metadata_consumers)}') +" +``` + +Expected: drops from 4.014e+22 to < 1e+20 after Phase 1 (just CommsLogEntry); further drops after each subsequent phase. + +## Non-Functional Requirements + +- NFR1: 1-space indentation (per `conductor/workflow.md`) +- NFR2: CRLF line endings on Windows +- NFR3: No comments in source code +- NFR4: Per-task atomic commits with git notes +- NFR5: No new pip dependencies (dataclass is stdlib) +- NFR6: `Result[T]` returns for fallible fns (per `error_handling.md`) +- NFR7: No new `src/.py` files (per AGENTS.md hard rule; the dataclass goes in `src/type_aliases.py`) + +## Architecture Reference + +- `conductor/code_styleguides/data_oriented_design.md` — the "Prefer Fewer Types" principle (the canonical rationale) +- `conductor/code_styleguides/error_handling.md` — the `Result[T]` convention +- `conductor/code_styleguides/type_aliases.md` — the 10 TypeAliases convention (per the data_structure_strengthening_20260606 track) +- `src/type_aliases.py` — the current Metadata definition (line 5) +- `scripts/code_path_audit/code_path_audit.py` — the consumer detection (3-pass AST) +- `scripts/code_path_audit/code_path_audit_ssdl.py` — the effective codepaths metric +- `docs/reports/SSDL_CAMPAIGN_ABORTED_20260624.md` — the post-mortem explaining why this is a type-dispatch problem, not a nil-check problem +- `conductor/tracks/any_type_componentization_20260621/plan.md` — the grandparent track (48/89 sites promoted, then reverted at `751b94d4`) +- `conductor/tracks/code_path_audit_20260607/spec_v2.md` — the audit that established the 4.01e22 baseline +- `docs/reports/code_path_audit/2026-06-22/AUDIT_REPORT.md` — the original 6797-line audit report + +## Out of Scope + +- Modifications to `src/code_path_audit*.py` (the audit infrastructure is correct) +- The 4 NG1 + 7 NG2 audit violations (already addressed) +- The RAG test pre-existing flake (per SSDL post-mortem) +- The 5 sub-aggregates (`CommsLogEntry`, `HistoryMessage`, `FileItem`, `ToolDefinition`, `ToolCall`) becoming separate dataclasses (overkill; they share the same `Metadata` base) +- New `src/.py` files (per AGENTS.md hard rule) +- Backward-compat support for `dict[str, Any]` (the migration is a hard break; any code that does `Metadata(...).__class__ is dict` will break, but no such code exists per the audit) + +## Verification Criteria (Definition of Done) + +| # | Criterion | Verification command | +|---|---|---| +| VC1 | `Metadata` is a `@dataclass(frozen=True, slots=True)`, not `dict[str, Any]` | `git show HEAD:src/type_aliases.py \| head -10` shows `@dataclass(frozen=True, slots=True) class Metadata:` | +| VC2 | All 107 `.get('key', ...)` sites on Metadata consumers replaced | `git grep -E "\.get\('[a-z_]+'," HEAD -- 'src/*.py' \| wc -l` returns 0 (or only legitimate non-Metadata uses like `.get('mtime', 0)` on file paths) | +| VC3 | All 106 `['key']` subscript sites on Metadata consumers replaced | `git grep -E "\[[ ]*'[a-z_]+'[ ]*\]" HEAD -- 'src/*.py' \| wc -l` returns 0 (or only legitimate non-Metadata uses) | +| VC4 | `Metadata(...)` constructor works for all common patterns | `tests/test_metadata_dataclass.py` passes 10+ tests (constructor, field access, `to_dict()`, `from_dict()`, frozen, slots, equality) | +| VC5 | All 5 sub-aggregate TypeAliases point to the new `Metadata` | `git grep "TypeAlias = " HEAD:src/type_aliases.py` shows `CommsLogEntry: TypeAlias = Metadata` etc. | +| VC6 | Effective codepaths drops by ≥ 2 orders of magnitude | `compute_effective_codepaths` returns `< 1e+20` (was 4.014e+22) | +| VC7 | All 7 audit gates pass `--strict` (no regression) | `weak_types` 102 ≤ 112; `type_registry` 22 files; `main_thread_imports` 17; `no_models_config_io` 0; `code_path_audit_coverage` 0; `exception_handling` 0; `optional_in_3_files` 0 | +| VC8 | 10/11 batched test tiers PASS (RAG flake acceptable) | `scripts/run_tests_batched.py` → 10/11 | +| VC9 | End-of-track report written | `docs/reports/TRACK_COMPLETION_metadata_promotion_20260624.md` exists with the new effective-codepaths number | +| VC10 | New regression-guard test file | `tests/test_metadata_dataclass.py` exists with 10+ tests passing | + +## Risks + +| # | Risk | Likelihood | Mitigation | +|---|---|---|---| +| R1 | The 213 access sites have polymorphic keys that don't fit cleanly into a single dataclass | medium | Use `Optional[T]` for all fields; use `from_dict` classmethod that filters unknown keys; use `to_dict()` for JSON serialization | +| R2 | Some sites do `entry['key']` where `key` is dynamic (e.g., `entry[variable_name]`) | low | These are rare; keep as `dict[str, Any]` access for dynamic keys; the static field access handles the common case | +| R3 | The `to_dict()` round-trip loses information (e.g., nested dicts) | low | Implement `to_dict()` carefully; nested dicts pass through as `dict[str, Any]` (not recursively converted) | +| R4 | Some sites mutate `entry` (e.g., `entry['key'] = value`); dataclass is frozen | medium | These sites are rare; audit them; if found, replace with `dataclasses.replace(entry, field_name=value)` | +| R5 | Migration breaks the regression-guard tests for `test_provider_state_migration.py` (post-phase 3) | low | The migration is on Metadata, not provider_state; orthogonal changes; per-phase regression-guard test runs | +| R6 | The 695 consumer functions are too many for one track | high | Break into 5 phases (FR2); each phase is a sub-aggregate; the dataclass is added once and all 5 sub-aggregates reuse it | +| R7 | The dict-shape is used for JSON-serialized payloads (e.g., comms.log); the dataclass breaks the JSON layer | medium | The dataclass has `to_dict()` + `from_dict()`; the JSON layer converts via these. Verify the comms.log reader/writer (session_logger.py) uses these methods | + +## See also + +- `docs/reports/SSDL_CAMPAIGN_ABORTED_20260624.md` — the post-mortem explaining why this is a type-dispatch problem +- `conductor/tracks/any_type_componentization_20260621/plan.md` — the grandparent plan +- `conductor/tracks/code_path_audit_20260607/spec_v2.md` — the audit that established the 4.01e22 baseline +- `docs/reports/code_path_audit/2026-06-22/AUDIT_REPORT.md` — the original 6797-line audit report +- `src/type_aliases.py` — the current Metadata definition +- `scripts/code_path_audit/code_path_audit.py` — the consumer detection +- `scripts/code_path_audit/code_path_audit_ssdl.py` — the effective codepaths metric +- `conductor/code_styleguides/data_oriented_design.md` — the "Prefer Fewer Types" principle diff --git a/conductor/tracks/metadata_promotion_20260624/state.toml b/conductor/tracks/metadata_promotion_20260624/state.toml new file mode 100644 index 00000000..e71e6874 --- /dev/null +++ b/conductor/tracks/metadata_promotion_20260624/state.toml @@ -0,0 +1,57 @@ +# Track state for metadata_promotion_20260624 +# Updated by Tier 2 Tech Lead as tasks complete + +[meta] +track_id = "metadata_promotion_20260624" +name = "Metadata Promotion: dict[str, Any] -> @dataclass(frozen=True, slots=True)" +status = "active" +current_phase = 0 +last_updated = "2026-06-25" + +[blocked_by] +code_path_audit_phase_3_provider_state_20260624 = "pending (not started yet; recommended prerequisite to run in parallel with this track)" + +[blocks] + +[phases] +phase_0 = { status = "pending", checkpointsha = "", name = "Design the dataclass + add regression-guard test" } +phase_1 = { status = "pending", checkpointsha = "", name = "Migrate CommsLogEntry consumers (3 commits, ~150 sites)" } +phase_2 = { status = "pending", checkpointsha = "", name = "Migrate HistoryMessage consumers (1 commit, ~80 sites)" } +phase_3 = { status = "pending", checkpointsha = "", name = "Migrate FileItem consumers (3 commits, ~200 sites)" } +phase_4 = { status = "pending", checkpointsha = "", name = "Migrate ToolDefinition + ToolCall consumers (2 commits, ~150 sites)" } +phase_5 = { status = "pending", checkpointsha = "", name = "Migrate remaining Metadata direct usage (N commits, ~115 sites)" } +phase_6 = { status = "pending", checkpointsha = "", name = "Verification + end-of-track report" } + +[tasks] +t0_1 = { status = "pending", commit_sha = "", description = "Design the Metadata @dataclass(frozen=True, slots=True) in src/type_aliases.py" } +t0_2 = { status = "pending", commit_sha = "", description = "Create tests/test_metadata_dataclass.py with 12+ tests" } +t1_1 = { status = "pending", commit_sha = "", description = "Migrate src/session_logger.py (~30 access sites)" } +t1_2 = { status = "pending", commit_sha = "", description = "Migrate src/multi_agent_conductor.py (~70 access sites)" } +t1_3 = { status = "pending", commit_sha = "", description = "Migrate src/app_controller.py CommsLogEntry section (~50 access sites)" } +t1_4 = { status = "pending", commit_sha = "", description = "Re-measure effective codepaths after Phase 1; document in metadata_promotion_progress.md" } +t2_1 = { status = "pending", commit_sha = "", description = "Migrate src/ai_client.py HistoryMessage section (~80 access sites)" } +t2_2 = { status = "pending", commit_sha = "", description = "Re-measure after Phase 2; document" } +t3_1 = { status = "pending", commit_sha = "", description = "Migrate src/aggregate.py FileItem section (~50 access sites)" } +t3_2 = { status = "pending", commit_sha = "", description = "Migrate src/app_controller.py FileItem section (~50 access sites)" } +t3_3 = { status = "pending", commit_sha = "", description = "Migrate src/gui_2.py FileItem section (~100 access sites)" } +t3_4 = { status = "pending", commit_sha = "", description = "Re-measure after Phase 3; document" } +t4_1 = { status = "pending", commit_sha = "", description = "Migrate src/mcp_client.py ToolDefinition + ToolCall section (~94 access sites)" } +t4_2 = { status = "pending", commit_sha = "", description = "Migrate src/ai_client.py tool loop section (~56 access sites)" } +t4_3 = { status = "pending", commit_sha = "", description = "Re-measure after Phase 4; document" } +t5_1 = { status = "pending", commit_sha = "", description = "Audit remaining Metadata direct-usage sites (~115 across 5-8 files)" } +t5_2_5_N = { status = "pending", commit_sha = "", description = "Migrate per file (1 commit per file, decreasing order of access site count)" } +t6_1 = { status = "pending", commit_sha = "", description = "Run all 10 VCs; write TRACK_COMPLETION; update state.toml + tracks.md" } + +[verification] +phase_0_complete = false +phase_1_complete = false +phase_2_complete = false +phase_3_complete = false +phase_4_complete = false +phase_5_complete = false +phase_6_complete = false + +[track_specific] +metric_targets = { baseline_effective_codepaths: "4.014e+22", target_effective_codepaths: "< 1e+20", expected_phase_1_drop: "~4e+19 (CommsLogEntry has the most consumers)", expected_final_drop: ">= 2 orders of magnitude" } +access_site_targets = { baseline_get_sites: 107, baseline_subscript_sites: 106, target_post_track: "< 20 each (only legitimate non-Metadata uses)" } +phased_migration_consumer_distribution = { "CommsLogEntry": 150, "HistoryMessage": 80, "FileItem": 200, "ToolDefinition+ToolCall": 150, "Metadata direct": 115 }