Private
Public Access
0
0
Files
manual_slop/conductor/tracks/data_structure_strengthening_20260606/metadata.json
T
ed ed42a97a9b conductor(track): Initialize data_structure_strengthening_20260606
Track + metadata + state + tracks.md registration for the type-aliases
refactor that follows the audit_weak_types.py findings (430 weak sites
across 29 of 61 files; 86% concentrated in 6 high-traffic files).

Key design decisions (per user approval):
- 10 TypeAlias definitions in src/type_aliases.py (Metadata, CommsLogEntry,
  CommsLog, HistoryMessage, History, FileItem, FileItems, ToolDefinition,
  ToolCall, CommsLogCallback).
- 1 NamedTuple (FileItemsDiff) for the _reread_file_items return.
- Mechanical replacement of 345 weak sites across 6 files (NOT 430; the
  remaining 85 are in 23 lower-impact files deferred to future tracks).
- scripts/audit_weak_types.py gains a --strict mode and a baseline file
  (scripts/audit_weak_types.baseline.json) so the count is enforced.
- 2 phases: aliases + 6-file replacement + audit baseline; NamedTuples
  + docs + archive.
- Honest about what's missing: TypedDict / @dataclass migration is a
  follow-up track (typed_dict_migration_20260606), not this one.
- Coexistence with the data_oriented_error_handling_20260606 track's
  Result[T] / ErrorInfo: the aliases are value-level (data types), Result
  is control-level (wrapper). They compose (Result[FileItems] is valid).
  No conflict.

Audit baseline:
- Pre-track: 430 weak sites, 0 strong patterns
- Target after Phase 1: ~60 weak sites (only the 23 lower-impact files)
- Top 4 unique type strings account for 86% of findings (4-6 aliases
  eliminate the bulk of the noise).

Not blocked by anything; can be executed independently of the other
pending tracks. Blocks typed_dict_migration_20260606 (the future Phase 2).
2026-06-06 17:49:22 -04:00

147 lines
7.9 KiB
JSON

{
"track_id": "data_structure_strengthening_20260606",
"name": "Data Structure Strengthening (Type Aliases + NamedTuples)",
"initialized": "2026-06-06",
"owner": "tier2-tech-lead",
"priority": "medium",
"status": "active",
"type": "refactor + ai-readability + documentation",
"scope": {
"new_files": [
"src/type_aliases.py",
"tests/test_type_aliases.py",
"tests/test_audit_weak_types.py",
"conductor/code_styleguides/type_aliases.md"
],
"modified_files": [
"src/ai_client.py",
"src/app_controller.py",
"src/models.py",
"src/api_hook_client.py",
"src/project_manager.py",
"src/aggregate.py",
"conductor/product-guidelines.md",
"scripts/audit_weak_types.py"
]
},
"blocked_by": [],
"blocks": ["typed_dict_migration_20260606" /* not yet created */],
"estimated_phases": 2,
"spec": "spec.md",
"plan": "plan.md",
"priority_order": "A (6 aliases + 6-file replacement) > B (canonical names + audit CI gate) > C (NamedTuples + docs) > D (plan follow-up)",
"audit_data": {
"total_weak_findings_baseline": 430,
"files_scanned": 61,
"files_with_findings_baseline": 29,
"positive_patterns_baseline": 0,
"unique_type_strings_baseline": 26,
"top_4_unique_types_account_for_pct": 86,
"top_offender": "src/ai_client.py (139 findings, 32.3%)"
},
"type_aliases": {
"Metadata": "dict[str, Any] - the root alias; any key-value record",
"CommsLogEntry": "Metadata - a single entry in the AI comms log",
"CommsLog": "list[CommsLogEntry] - the comms log ring buffer",
"HistoryMessage": "Metadata - a single message in the AI provider history",
"History": "list[HistoryMessage] - the conversation history",
"FileItem": "Metadata - a single file in the context (path, content, is_image, etc.)",
"FileItems": "list[FileItem] - the most common weak pattern in the codebase",
"ToolDefinition": "Metadata - a single tool definition (function name, description, parameters)",
"ToolCall": "Metadata - a single tool call from the model (id, type, function)",
"CommsLogCallback": "Callable[[CommsLogEntry], None] - the callback signature"
},
"named_tuples": {
"FileItemsDiff": "NamedTuple with fields (refreshed: FileItems, changed: FileItems) - the return of _reread_file_items"
},
"refactor_targets": {
"src/ai_client.py": {
"weak_sites": 139,
"replacement_strategy": "79 dict_str_any -> Metadata/CommsLogEntry/HistoryMessage/FileItem/ToolDefinition/ToolCall; 56 list_of_dict -> CommsLog/History/FileItems/ToolDefinitions; 2 Optional[List[Dict[...]]] -> Optional[FileItems]; 2 assign_tuple_literal -> ToolCall"
},
"src/app_controller.py": {
"weak_sites": 86,
"replacement_strategy": "62 dict_str_any -> Metadata; 20 list_of_dict -> list[Metadata]; 4 optional_dict -> Optional[Metadata]"
},
"src/models.py": {
"weak_sites": 51,
"replacement_strategy": "48 dict_str_any -> Optional[Metadata]; 3 list_of_dict -> list[Metadata]"
},
"src/api_hook_client.py": {
"weak_sites": 32,
"replacement_strategy": "30 dict_str_any -> Metadata; 2 list_of_dict -> list[Metadata]"
},
"src/project_manager.py": {
"weak_sites": 20,
"replacement_strategy": "16 dict_str_any -> Metadata; 3 list_of_dict -> list[Metadata]; 1 optional_dict -> Optional[Metadata]"
},
"src/aggregate.py": {
"weak_sites": 17,
"replacement_strategy": "10 dict_str_any -> Metadata; 7 list_of_dict -> list[Metadata]"
}
},
"audit_ci_gate": {
"script": "scripts/audit_weak_types.py",
"current_mode": "informational (exit 0 always)",
"new_mode": "strict (exit 1 if new findings introduced vs baseline)",
"baseline_file": "scripts/audit_weak_types.baseline.json",
"baseline_after_phase_1": "~60 findings (only the 23 lower-impact files remain)",
"target_reduction": "430 -> ~60 (86% reduction in the 6 high-traffic files)"
},
"ai_performance_analysis": {
"win": "A name is a one-time cost the AI pays to learn, then reuses forever. With 10 aliases covering 370+ usages, the AI's vocabulary cost is bounded while the readability win is unbounded.",
"cost": "10 new names for the AI to learn. Comparable to adding 10 new function names to a module - well within normal Python codebase scale.",
"caveat": "If we add too many aliases (50+), the cognitive cost exceeds the benefit. The proposed 10 is the sweet spot. Phase 2 will convert the most-used aliases to TypedDict, which gives the AI field-level hints, not just a name.",
"honest_assessment": "Net win. The current 0 aliases is the worst case; going to 10 is a strictly better state for AI readability."
},
"coexistence_with_data_oriented_track": {
"Result_T": "The data_oriented_error_handling_20260606 track introduces Result[T] as a control-level wrapper. The aliases introduced by THIS track are value-level types (what's inside the T).",
"ErrorInfo": "Already a @dataclass from the data_oriented track; no change.",
"Result_composition": "Result[FileItems] is valid - the aliases name the T, not the Result itself."
},
"architectural_invariant": "The 6 type aliases are the CANONICAL names for the metadata family. New code MUST use them. Old code is migrated opportunistically. The audit script enforces this via the --strict mode (exits 1 if new weak sites are introduced).",
"threading_constraint": "No change. TypeAlias is type-level only; runtime behavior is identical to the underlying types. The aliases are thread-safe because dict / list / Callable are thread-safe for the operations performed.",
"verification_criteria": [
"src/type_aliases.py exists with 10 TypeAliases and 1 NamedTuple",
"All 10 aliases import successfully (tests/test_type_aliases.py)",
"Result[FileItems] is a valid generic (verified by importing)",
"scripts/audit_weak_types.py reports 370+ fewer findings after Phase 1 (~60 total)",
"scripts/audit_weak_types.py --strict mode exits 1 when a new weak site is added",
"scripts/audit_weak_types.baseline.json is committed with the post-Phase-1 count",
"src/ai_client.py: 139 weak sites -> 0 weak sites (all replaced with aliases)",
"src/app_controller.py: 86 -> 0",
"src/models.py: 51 -> 0",
"src/api_hook_client.py: 32 -> 0",
"src/project_manager.py: 20 -> 0",
"src/aggregate.py: 17 -> 0",
"Phase 2: _reread_file_items returns FileItemsDiff (NamedTuple); all call sites updated",
"Phase 2: 1-2 more tuple returns converted to NamedTuples opportunistically",
"tests/test_type_aliases.py: 8+ tests pass",
"tests/test_audit_weak_types.py: 6+ tests pass",
"tests/test_ai_client.py (existing): no regressions",
"tests/test_app_controller.py (existing): no regressions",
"tests/test_models.py (existing): no regressions",
"tests/test_api_hook_client.py (existing): no regressions",
"tests/test_project_manager.py (existing): no regressions",
"tests/test_aggregate.py (existing): no regressions",
"conductor/product-guidelines.md: new 'Data Structure Conventions' section added",
"conductor/code_styleguides/type_aliases.md: the canonical reference",
"No new threading.Thread calls in src/",
"No new Optional[X] introduced by the refactor (the aliases compose with Optional, but no NEW Optional types are added)",
"No runtime behavior changes (aliases are type-level only)"
],
"links": {
"backlog_entry": "conductor/tracks.md (to be added)",
"audit_script": "scripts/audit_weak_types.py",
"code_styleguide": "conductor/code_styleguides/type_aliases.md (to be created in Phase 2)",
"testing_guide": "docs/guide_testing.md",
"audit_baseline": "scripts/audit_weak_types.baseline.json (to be created in Phase 1)",
"related_tracks": [
"conductor/tracks/startup_speedup_20260606/",
"conductor/tracks/test_batching_refactor_20260606/",
"conductor/tracks/qwen_llama_grok_integration_20260606/",
"conductor/tracks/data_oriented_error_handling_20260606/"
]
}
}