From aba35f9f4aadf29d3d780b8f238d8bd7baab75b2 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sat, 6 Jun 2026 18:06:34 -0400 Subject: [PATCH] conductor(spec): Add type registry to data_structure_strengthening track Per user feedback (2026-06-06): instead of a follow-up 'TypedDict Migration' track, add a NEW deliverable: an auto-generated type registry in docs/type_registry/ that captures the field information in docs form. New files: - scripts/generate_type_registry.py (NEW): AST-based tool that reads src/ and writes per-source-file .md files with the fields of every @dataclass, NamedTuple, TypeAlias, TypedDict. Has --check (CI mode, exits 1 if registry would change) and --diff (dry run) modes. - docs/type_registry/ (NEW, generated): index.md + per-source-file references (type_aliases.md, ai_client.md, models.md, etc.). - tests/test_generate_type_registry.py (NEW): verify the generator. Architecture updates: - Section 3.6 (NEW): Type Registry architecture with example output. - Section 3.7 (NEW): Why per-source-file docs (locality of reference). - Section 1.1 (NEW): 'Why docs over TypedDict' analysis (3 reasons: lower upfront cost, better fit for AI workflow, auto-maintained). - Goals table: registry added as a C (innovation) goal. - Module layout: docs/type_registry/ and scripts/generate_type_registry.py added to the new files list. - Migration: Phase 2 now includes the registry generator + initial docs. - Out of scope: TypedDict migration REMOVED; 'auto-typing the field shape' added with the docs as the chosen approach. - See Also: TypedDict follow-up REPLACED with 'Registry Maintenance & CI Integration' (smaller scope, just wires the generator into CI). The 'cost we eat' is the LLM reading 200-500 lines of markdown per query. This is bounded and proportional to actual information need. The upfront cost of designing TypedDict schemas for every type is unbounded. Tradeoffs favor the docs approach for v1; TypedDict can come later as a future track if desired. --- .../metadata.json | 40 +++++- .../spec.md | 129 ++++++++++++++++-- .../state.toml | 28 ++-- 3 files changed, 172 insertions(+), 25 deletions(-) diff --git a/conductor/tracks/data_structure_strengthening_20260606/metadata.json b/conductor/tracks/data_structure_strengthening_20260606/metadata.json index b961c3da..b78d5715 100644 --- a/conductor/tracks/data_structure_strengthening_20260606/metadata.json +++ b/conductor/tracks/data_structure_strengthening_20260606/metadata.json @@ -11,6 +11,17 @@ "src/type_aliases.py", "tests/test_type_aliases.py", "tests/test_audit_weak_types.py", + "tests/test_generate_type_registry.py", + "scripts/generate_type_registry.py", + "docs/type_registry/index.md", + "docs/type_registry/type_aliases.md", + "docs/type_registry/ai_client.md", + "docs/type_registry/app_controller.md", + "docs/type_registry/models.md", + "docs/type_registry/api_hook_client.md", + "docs/type_registry/project_manager.md", + "docs/type_registry/aggregate.md", + "docs/type_registry/result_types.md", "conductor/code_styleguides/type_aliases.md" ], "modified_files": [ @@ -25,7 +36,7 @@ ] }, "blocked_by": [], - "blocks": ["typed_dict_migration_20260606" /* not yet created */], + "blocks": ["type_registry_ci_20260606" /* not yet created; the registry-CI-integration follow-up */], "estimated_phases": 2, "spec": "spec.md", "plan": "plan.md", @@ -89,10 +100,29 @@ "target_reduction": "430 -> ~60 (86% reduction in the 6 high-traffic files)" }, "ai_performance_analysis": { - "win": "A name is a one-time cost the AI pays to learn, then reuses forever. With 10 aliases covering 370+ usages, the AI's vocabulary cost is bounded while the readability win is unbounded.", - "cost": "10 new names for the AI to learn. Comparable to adding 10 new function names to a module - well within normal Python codebase scale.", - "caveat": "If we add too many aliases (50+), the cognitive cost exceeds the benefit. The proposed 10 is the sweet spot. Phase 2 will convert the most-used aliases to TypedDict, which gives the AI field-level hints, not just a name.", - "honest_assessment": "Net win. The current 0 aliases is the worst case; going to 10 is a strictly better state for AI readability." + "win": "A name is a one-time cost the AI pays to learn, then reuses forever. With 10 aliases covering 370+ usages, the AI's vocabulary cost is bounded while the readability win is unbounded. The auto-generated registry gives the AI field-level information on demand at the cost of a few hundred tokens of context per query.", + "cost": "10 new names for the AI to learn (same as adding 10 new function names to a module - well within normal Python codebase scale). Plus a small token cost when the AI reads a registry file: 200-500 lines of markdown per source file, read once and cached in context.", + "caveat": "If we add too many aliases (50+), the cognitive cost exceeds the benefit. The proposed 10 is the sweet spot. The docs-based registry approach is an alternative to TypedDict migration: docs are advisory but auto-maintained, whereas TypedDict would enforce but cost more upfront.", + "honest_assessment": "Net win. The current 0 aliases is the worst case; going to 10 is a strictly better state for AI readability. Adding auto-generated docs is a further improvement at modest token cost." + }, + + "type_registry": { + "directory": "docs/type_registry/", + "files": [ + "index.md (top-level TOCs)", + "type_aliases.md (the 10 TypeAliases from src/type_aliases.py)", + "result_types.md (the Result/ErrorInfo from data_oriented_error_handling_20260606)", + "" + ], + "script": "scripts/generate_type_registry.py", + "script_modes": { + "default": "Generate / regenerate the registry", + "--check": "CI mode; exits 1 if the registry would change", + "--diff": "Dry run; print what would change without writing" + }, + "agent_workflow": "The coding agent runs the generator before marking a track complete, and includes the registry diff in the commit. CI runs --check on every PR.", + "ai_token_cost": "200-500 lines of markdown per source file. The LLM reads it once and caches the schema in context. Subsequent references to the same types don't re-fetch.", + "rationale": "Trade upfront cost (TypedDict schema design for every type) for token cost (LLM reads docs at query time). Docs are auto-maintained; TypedDict schemas would need to be hand-maintained. For a codebase where the priority is 'name the shapes first, give them structure later', docs are the right v1 approach." }, "coexistence_with_data_oriented_track": { "Result_T": "The data_oriented_error_handling_20260606 track introduces Result[T] as a control-level wrapper. The aliases introduced by THIS track are value-level types (what's inside the T).", diff --git a/conductor/tracks/data_structure_strengthening_20260606/spec.md b/conductor/tracks/data_structure_strengthening_20260606/spec.md index ccfc09bd..adf83fdc 100644 --- a/conductor/tracks/data_structure_strengthening_20260606/spec.md +++ b/conductor/tracks/data_structure_strengthening_20260606/spec.md @@ -11,11 +11,23 @@ This track introduces a small, focused set of `TypeAlias` definitions in a new `src/type_aliases.py` module and replaces 370+ anonymous `dict[str, Any]` / `list[dict[...]]` usages across 6 high-traffic files (`src/ai_client.py`, `src/app_controller.py`, `src/models.py`, `src/api_hook_client.py`, `src/project_manager.py`, `src/aggregate.py`). It also converts 2-3 tuple returns to `NamedTuple`s for self-documenting struct semantics. +**In addition**, the track introduces a new `docs/type_registry/` directory that contains **auto-generated** documentation describing the fields of every `TypeAlias`, `NamedTuple`, `@dataclass`, and `TypedDict` in `src/`. A new script `scripts/generate_type_registry.py` reads `src/` via AST and writes the docs. The coding agent runs this script as part of track completion (and CI runs it as a `--check` to detect drift). + The track is **data-grounded**: a new AST-based audit script (`scripts/audit_weak_types.py`, committed in `84fd9ac9`) found 430 weak type sites across 29 of 61 files. After whitespace normalization, only **26 unique type strings** exist; the top 4 (`list[dict[str, Any]]`, `dict[str, Any]`, `Dict[str, Any]`, `List[Dict[str, Any]]`) account for 86% of findings. A small set of well-named aliases eliminates the vast majority. **The current codebase has ZERO strong type aliases** (no `TypeAlias`, no `NamedTuple`, no `pydantic.BaseModel` for these shapes). This is the worst case for AI readability — an LLM reading the code has zero schema hints and must guess the shape from usage at every call site. -**Scope is deliberately bounded.** The track adds **6 type aliases** and converts **2-3 tuple returns** to NamedTuples. It does NOT migrate to `TypedDict` or `@dataclass` schemas (that's a much larger Phase 2, planned as a separate follow-up). It does NOT touch the 23 lower-impact files; they remain as `dict[str, Any]` until a future track migrates them. +**Scope is deliberately bounded.** The track adds **6 type aliases**, converts **2-3 tuple returns** to NamedTuples, and introduces the **type registry generator + initial generated docs**. It does NOT migrate to `TypedDict` or `@dataclass` schemas (the registry generator captures the field information in docs form, with much lower upfront cost). It does NOT touch the 23 lower-impact files; they remain as `dict[str, Any]` until a future track migrates them. + +### 1.1 Why docs over TypedDict + +The original draft of this spec proposed a follow-up track "TypedDict / dataclass Migration" that would convert every `Metadata` alias into a `TypedDict` with explicit fields. After user feedback, this was replaced with the type-registry approach for three reasons: + +1. **Lower upfront cost.** `TypedDict` requires designing the schema for every type. The registry generator reads what already exists in code and writes it to docs. No schema design needed. +2. **Better fit for AI workflow.** An LLM that needs to know the fields of `CommsLogEntry` can `cat docs/type_registry/ai_client.md` once, then use the field info. The cost is a few hundred tokens of context, paid only when the LLM needs the schema. +3. **Auto-maintained.** The script runs as part of track completion and as a CI `--check`. The registry can never drift; if code changes, the agent regenerates the docs. + +The "cost we eat" is the LLM reading the docs at query time. This is bounded (a few hundred tokens per query) and proportional to the actual information need. ## 2. Goals (Priority Order) @@ -26,12 +38,13 @@ The track is **data-grounded**: a new AST-based audit script (`scripts/audit_wea | **B (architectural)** | The new aliases are the **canonical** names going forward. New code MUST use the aliases. Old code is migrated opportunistically (this track + future tracks). | One source of truth. The audit script (`scripts/audit_weak_types.py`) becomes a permanent CI gate that fails when new weak types are introduced. | | **B (architectural)** | Audit script exits 0 with significantly fewer findings after the refactor. Re-running `--json` should show the count drop from 430 to ~60 (only the 23 lower-impact files remain). | Measurable success criterion. The audit script is the ground truth. | | **C (optimization)** | Convert 2-3 tuple returns to `NamedTuple`s. Specifically: `_reread_file_items()` returns `Tuple[refreshed, changed]` becomes a `FileItemsDiff` NamedTuple. Other 1-occurrence tuples (screen coords, etc.) are converted opportunistically. | The tuple return pattern is rarer than the dict pattern (4 sites vs 430), but each conversion is high-value for self-documentation. | -| **C (documentation)** | Add a short "Data Structure Conventions" section to `conductor/product-guidelines.md` and a "Type Aliases" subsection in `conductor/code_styleguides/error_handling.md` (or a new `code_styleguides/type_aliases.md`). | The convention is visible in the project-level guidance. Future plans reference it. | -| **D (forward-looking)** | Plan a Phase-2 follow-up track: "TypedDict / dataclass Migration" that converts the most-used aliases (`CommsLogEntry`, `FileItem`) to `TypedDict` or `@dataclass(frozen=True)` so the FIELDS are visible to LLMs, not just the name. NOT in this track; documented in §12.1. | Honest about what's missing. Phase 2 is a separate effort. | +| **C (documentation)** | Add a short "Data Structure Conventions" section to `conductor/product-guidelines.md` and a new `conductor/code_styleguides/type_aliases.md` reference. | The convention is visible in the project-level guidance. Future plans reference it. | +| **C (innovation)** | New `docs/type_registry/` directory with **auto-generated** documentation describing the fields of every `TypeAlias`, `NamedTuple`, `@dataclass`, and `TypedDict` in `src/`. New script `scripts/generate_type_registry.py` reads `src/` via AST and writes the docs. The script has a `--check` mode for CI: exits 1 if the registry would change. The coding agent runs the script as part of track completion. | The "docs over TypedDict" tradeoff: pay a small token cost at AI-query time (the LLM `cat`s the docs) instead of a large upfront cost (designing `TypedDict` schemas for every type). See §1.1. | +| **D (forward-looking)** | Plan a future "Registry Maintenance" track that promotes the type-registry generation to a CI gate (fail if `--check` reports drift). The registry becomes part of every track's commit workflow. NOT in this track; documented in §12.1. | The track ships the registry; the future track wires it into CI / track-completion workflows. | ### 2.1 Non-Goals (this track) -- **Not** converting `dict[str, Any]` to `TypedDict` or `@dataclass`. That's Phase 2 of a future track. This track stops at NAMING the shapes; it does not give them structure. +- **Not** converting `dict[str, Any]` to `TypedDict` or `@dataclass` directly in code. The type registry (added in Phase 2) captures the field information in docs form; a future track may convert the most-used aliases to `TypedDict` (giving schema hints via type hints instead of via docs), but that is a separate decision. - **Not** touching the 23 lower-impact files. They stay as `dict[str, Any]` until a future incremental track migrates them. The audit script makes their weakness VISIBLE so the cost of ignoring them is documented. - **Not** changing the `Result[T]` pattern from the `data_oriented_error_handling_20260606` track. The aliases complement `Result`; they don't replace it. (`ErrorInfo` is a `@dataclass`, not a `TypeAlias`; it's already structured.) - **Not** adding pydantic models. The project doesn't currently use pydantic for these shapes; introducing it would be a much larger architectural decision. @@ -136,16 +149,30 @@ src/ aggregate.py # MODIFIED: import aliases; replace ~17 weak sites mcp_client.py # UNCHANGED (only 9 weak sites; below the threshold) +docs/ + type_registry/ + index.md # NEW (generated): top-level TOCs + type_aliases.md # NEW (generated): the 10 TypeAliases + 1 NamedTuple + ai_client.md # NEW (generated): per-source-file reference + app_controller.md # NEW (generated) + models.md # NEW (generated) + api_hook_client.md # NEW (generated) + project_manager.md # NEW (generated) + aggregate.md # NEW (generated) + result_types.md # NEW (generated): from data_oriented_error_handling_20260606 + conductor/ product-guidelines.md # MODIFIED: new "Data Structure Conventions" section code_styleguides/ - type_aliases.md # NEW: the canonical reference (or co-located in error_handling.md) + type_aliases.md # NEW: the canonical reference scripts/ audit_weak_types.py # already committed in 84fd9ac9; runs as CI gate + generate_type_registry.py # NEW: AST-based registry generator tests/ test_type_aliases.py # NEW: verify the aliases import and resolve to the right types + test_generate_type_registry.py # NEW: verify the generator's regex/AST patterns and output format (existing test files): # MODIFIED: update the 6 files; existing tests should pass unchanged ``` @@ -164,6 +191,88 @@ Result[FileItems] # a Result wrapping a list of file items This is consistent: `Result` is a generic that wraps any data type. Naming the data types (via `TypeAlias`) makes the generic concrete without changing the `Result` pattern. +### 3.6 Type Registry (Auto-Generated Docs) + +`scripts/generate_type_registry.py` is a new AST-based tool that reads `src/` and writes `docs/type_registry/`. It runs as part of track completion (manually by the coding agent) and as a CI `--check` (automated). + +**Output structure:** + +``` +docs/type_registry/ + index.md # top-level: full table of contents + summary + type_aliases.md # the 10 TypeAliases from src/type_aliases.py + ai_client.md # per-source-file: all dataclasses, NamedTuples, TypeAliases defined or used here + app_controller.md + models.md + api_hook_client.md + project_manager.md + aggregate.md + ... + (one .md per source file that has structs) +``` + +**Script behavior:** + +```bash +# Generate / regenerate the registry (default mode) +python scripts/generate_type_registry.py + +# Verify the registry is up-to-date (CI mode; exits 1 if drift) +python scripts/generate_type_registry.py --check + +# Dry run: print what would change without writing +python scripts/generate_type_registry.py --diff +``` + +**For each `@dataclass` in `src/`, the script writes a section like:** + +```markdown +## `src/models.py::Ticket` + +**Kind:** `@dataclass` +**Fields:** +- `id: str` — unique ticket identifier +- `title: str` — human-readable title +- `status: str = "todo"` — current status +- `priority: int = 0` — priority for queue ordering +- `created_at: datetime.datetime` — when created +- `dependencies: list[str] = field(default_factory=list)` — ticket IDs this depends on +- `metadata: Metadata` — opaque key-value metadata (see type_aliases.md) +``` + +(Note: docstrings on fields are extracted from the source to provide the "—" descriptions. Fields without docstrings are documented with their name only.) + +**For each `TypeAlias`, the script writes a section like:** + +```markdown +## `src/type_aliases.py::CommsLogEntry` + +**Kind:** `TypeAlias` +**Resolves to:** `Metadata` +**Used by:** `_comms_log`, `_append_comms`, `get_comms_log`, `comms_log_callback`, ... + +**Note:** `CommsLogEntry` is a semantic alias for `Metadata`. For the canonical field semantics, see [`Metadata`](#metadata) (which is itself a generic `dict[str, Any]` until a future track converts it to a `TypedDict`). +``` + +**For each `NamedTuple`, the script writes a section like:** + +```markdown +## `src/type_aliases.py::FileItemsDiff` + +**Kind:** `NamedTuple` +**Fields:** +- `refreshed: FileItems` — items whose mtime was checked and content re-read +- `changed: FileItems` — items whose content actually changed (subset of refreshed) +``` + +**For each function that returns a structured type, the script documents the return type signature** (using `ast.unparse` on the return annotation). + +### 3.7 Why Per-Source-File Docs (not one giant file) + +A per-source-file layout matches the project's per-source-file guide structure (`docs/guide_ai_client.md`, `docs/guide_mcp_client.md`, etc.). The coding agent reads `docs/type_registry/ai_client.md` when working in `src/ai_client.py` — locality of reference. The `index.md` provides the cross-cutting view. + +**The "token cost we eat" per LLM query is bounded:** a typical source file's registry is 200-500 lines of markdown. The LLM reads it once and caches the schema in context. Subsequent references to the same types don't re-fetch. + ## 4. Per-File Refactor Plan ### 4.1 `src/ai_client.py` (139 sites — largest offender) @@ -258,7 +367,7 @@ The aliases live in `src/type_aliases.py` (pure stdlib `typing.TypeAlias`). | Phase | What | Risk | |---|---|---| | **Phase 1 — Aliases + 6-file replacement + audit baseline** | Add `src/type_aliases.py`. Add `tests/test_type_aliases.py`. Mechanical replacement in 6 files. Add `--strict` mode to the audit script. Generate the new baseline. | Medium. ~345 sites of mechanical replacement. Mitigated by existing test coverage. | -| **Phase 2 — NamedTuples + docs + archive** | Convert 2-3 tuple returns to NamedTuples. Update `conductor/product-guidelines.md` and `code_styleguides/`. Manual smoke test. Archive the track. | Low. ~3-4 sites of tuple conversion. Docs-only changes. | +| **Phase 2 — NamedTuples + type registry generator + initial docs + archive** | Convert 2-3 tuple returns to NamedTuples. Add `scripts/generate_type_registry.py` + the initial generated registry in `docs/type_registry/`. Add tests for the generator. Add `conductor/code_styleguides/type_aliases.md` and update `product-guidelines.md`. Manual smoke test. Archive the track. | Low. ~3-4 sites of tuple conversion. Generator is a self-contained AST tool. Docs-only changes. | Each phase has its own checkpoint commit and git note. @@ -275,7 +384,7 @@ Each phase has its own checkpoint commit and git note. ## 10. Out of Scope (Explicit) -- **TypedDict / @dataclass migration** of the `Metadata` family. Deferred to a Phase 2 of a future track. This track adds NAMES; the next track adds STRUCTURE. +- **TypedDict / @dataclass migration** of the `Metadata` family. The type registry (added in Phase 2) captures the field information in docs form, with much lower upfront cost than `TypedDict` migration. A future track MAY convert the most-used aliases to `TypedDict` (giving the AI schema hints via type hints instead of via docs); this is a separate decision. - **The 23 lower-impact files** (those with 1-9 weak sites each). Deferred; will be addressed opportunistically or in a future incremental track. - **Adding pydantic models.** Not requested; would be a much larger architectural decision. - **Changing function signatures at the runtime level.** The aliases are TYPE-LEVEL; runtime behavior is identical. @@ -292,7 +401,11 @@ Each phase has its own checkpoint commit and git note. ### 12.1 Follow-up Track (planned; not in this spec) -**"TypedDict / dataclass Migration"** (`typed_dict_migration_20260606` or similar) — converts the most-used aliases (`CommsLogEntry`, `FileItem`, `ToolDefinition`, `HistoryMessage`) to `TypedDict` (Python 3.8+) or `@dataclass(frozen=True)` so the FIELDS are visible to LLMs, not just the name. This is the natural Phase 2 of this track. Prerequisites: this track (so the field-level schema has a stable name to attach to). +**"Registry Maintenance & CI Integration"** (`type_registry_ci_20260606` or similar) — promotes the type-registry generator from a manual track-completion step to a CI gate. The track: +- Wires `python scripts/generate_type_registry.py --check` into CI; the PR fails if the registry is stale. +- Adds the registry to the per-track commit workflow: the coding agent runs the generator before marking a track complete, and includes the registry diff in the commit. +- Optionally adds a pre-commit hook that runs the generator and stages the diff. +- The "Type Registry Maintenance" track is the natural follow-up. Prerequisites: this track (so the generator exists and is tested). ### 12.2 Project References diff --git a/conductor/tracks/data_structure_strengthening_20260606/state.toml b/conductor/tracks/data_structure_strengthening_20260606/state.toml index 7ade3957..a500ff11 100644 --- a/conductor/tracks/data_structure_strengthening_20260606/state.toml +++ b/conductor/tracks/data_structure_strengthening_20260606/state.toml @@ -10,7 +10,7 @@ last_updated = "2026-06-06" [phases] phase_1 = { status = "pending", checkpointsha = "", name = "Aliases + 6-file replacement + audit baseline" } -phase_2 = { status = "pending", checkpointsha = "", name = "NamedTuples + docs + archive" } +phase_2 = { status = "pending", checkpointsha = "", name = "NamedTuples + type registry generator + initial docs + archive" } [tasks] # Phase 1: Aliases + 6-file replacement @@ -28,16 +28,20 @@ t1_11 = { status = "pending", commit_sha = "", description = "Red: tests/test_au t1_12 = { status = "pending", commit_sha = "", description = "Run full test suite; confirm no regressions in 6 refactored files" } t1_13 = { status = "pending", commit_sha = "", description = "Run audit; confirm count dropped from 430 to ~60; commit the new baseline" } t1_14 = { status = "pending", commit_sha = "", description = "Phase 1 checkpoint commit + git note" } -# Phase 2: NamedTuples + docs + archive +# Phase 2: NamedTuples + type registry generator + initial docs + archive t2_1 = { status = "pending", commit_sha = "", description = "Convert src/ai_client.py:_reread_file_items to return FileItemsDiff NamedTuple (replaces Tuple[List[FileItem], List[FileItem]]); update ~3-4 call sites" } t2_2 = { status = "pending", commit_sha = "", description = "Opportunistic NamedTuple conversions for 1-2 more tuple returns (screen coords, etc.)" } -t2_3 = { status = "pending", commit_sha = "", description = "Create conductor/code_styleguides/type_aliases.md (canonical reference for the alias convention; 5 patterns + decision tree + examples)" } -t2_4 = { status = "pending", commit_sha = "", description = "Add 'Data Structure Conventions' section to conductor/product-guidelines.md (referencing the new styleguide)" } -t2_5 = { status = "pending", commit_sha = "", description = "Manual smoke test: launch GUI; verify type aliases don't break anything; verify audit --strict mode" } -t2_6 = { status = "pending", commit_sha = "", description = "Phase 2 checkpoint commit + git note (TRACK COMPLETE)" } -t2_7 = { status = "pending", commit_sha = "", description = "git mv conductor/tracks/data_structure_strengthening_20260606 to conductor/tracks/archive/" } -t2_8 = { status = "pending", commit_sha = "", description = "Update conductor/tracks.md: move entry to Recently Completed" } -t2_9 = { status = "pending", commit_sha = "", description = "Final state.toml update: mark all phases completed; add follow-up track typed_dict_migration_20260606 placeholder" } +t2_3 = { status = "pending", commit_sha = "", description = "Red: tests/test_generate_type_registry.py (verify AST extraction of @dataclass, NamedTuple, TypeAlias; verify output markdown structure)" } +t2_4 = { status = "pending", commit_sha = "", description = "Green: implement scripts/generate_type_registry.py (3 modes: default, --check, --diff)" } +t2_5 = { status = "pending", commit_sha = "", description = "Run the generator; commit the initial docs/type_registry/ (index.md + per-source-file .md files)" } +t2_6 = { status = "pending", commit_sha = "", description = "Verify --check mode: introduce a fake change in src/type_aliases.py, run --check, confirm exit 1" } +t2_7 = { status = "pending", commit_sha = "", description = "Create conductor/code_styleguides/type_aliases.md (canonical reference for the alias convention; 5 patterns + decision tree + examples)" } +t2_8 = { status = "pending", commit_sha = "", description = "Add 'Data Structure Conventions' section to conductor/product-guidelines.md (referencing the new styleguide)" } +t2_9 = { status = "pending", commit_sha = "", description = "Manual smoke test: launch GUI; verify type aliases don't break anything; verify audit --strict mode; verify generator --check mode" } +t2_10 = { status = "pending", commit_sha = "", description = "Phase 2 checkpoint commit + git note (TRACK COMPLETE)" } +t2_11 = { status = "pending", commit_sha = "", description = "git mv conductor/tracks/data_structure_strengthening_20260606 to conductor/tracks/archive/" } +t2_12 = { status = "pending", commit_sha = "", description = "Update conductor/tracks.md: move entry to Recently Completed" } +t2_13 = { status = "pending", commit_sha = "", description = "Final state.toml update: mark all phases completed; add follow-up track type_registry_ci_20260606 placeholder" } [verification] # Filled as phases complete @@ -81,10 +85,10 @@ project_manager = { weak_sites_before = 20, weak_sites_after = 0, status = "pend aggregate = { weak_sites_before = 17, weak_sites_after = 0, status = "pending" } [typed_dict_migration_followup] -track_id = "typed_dict_migration_20260606" +track_id = "type_registry_ci_20260606" status = "planned_in_data_structure_strengthening_20260606" -converts = ["CommsLogEntry", "FileItem", "ToolDefinition", "HistoryMessage"] -to = "TypedDict or @dataclass(frozen=True)" +goal = "Promote the type-registry generator from a manual track-completion step to a CI gate. Add --check to CI; wire pre-commit hook; document the per-track commit workflow." +note = "This follow-up REPLACES the earlier 'typed_dict_migration' follow-up. Per user feedback (2026-06-06), the registry approach (docs) is preferred over TypedDict migration (code) for the foreseeable future." [public_api_migration_followup] # From the data_oriented_error_handling track