manual_slop/conductor/tracks/data_structure_strengthening_20260606/metadata.json

{
  "track_id": "data_structure_strengthening_20260606",
  "name": "Data Structure Strengthening (Type Aliases + NamedTuples)",
  "initialized": "2026-06-06",
  "owner": "tier2-tech-lead",
  "priority": "medium",
  "status": "active",
  "type": "refactor + ai-readability + documentation",
  "scope": {
    "new_files": [
      "src/type_aliases.py",
      "tests/test_type_aliases.py",
      "tests/test_audit_weak_types.py",
      "tests/test_generate_type_registry.py",
      "scripts/generate_type_registry.py",
      "docs/type_registry/index.md",
      "docs/type_registry/type_aliases.md",
      "docs/type_registry/ai_client.md",
      "docs/type_registry/app_controller.md",
      "docs/type_registry/models.md",
      "docs/type_registry/api_hook_client.md",
      "docs/type_registry/project_manager.md",
      "docs/type_registry/aggregate.md",
      "docs/type_registry/result_types.md",
      "conductor/code_styleguides/type_aliases.md"
    ],
    "modified_files": [
      "src/ai_client.py",
      "src/app_controller.py",
      "src/models.py",
      "src/api_hook_client.py",
      "src/project_manager.py",
      "src/aggregate.py",
      "conductor/product-guidelines.md",
      "scripts/audit_weak_types.py"
    ]
  },
  "blocked_by": [],
  "blocks": ["type_registry_ci_20260606" /* not yet created; the registry-CI-integration follow-up */],
  "estimated_phases": 2,
  "spec": "spec.md",
  "plan": "plan.md",
  "priority_order": "A (6 aliases + 6-file replacement) > B (canonical names + audit CI gate) > C (NamedTuples + docs) > D (plan follow-up)",
  "audit_data": {
    "total_weak_findings_baseline": 430,
    "files_scanned": 61,
    "files_with_findings_baseline": 29,
    "positive_patterns_baseline": 0,
    "unique_type_strings_baseline": 26,
    "top_4_unique_types_account_for_pct": 86,
    "top_offender": "src/ai_client.py (139 findings, 32.3%)"
  },
  "type_aliases": {
    "Metadata": "dict[str, Any] - the root alias; any key-value record",
    "CommsLogEntry": "Metadata - a single entry in the AI comms log",
    "CommsLog": "list[CommsLogEntry] - the comms log ring buffer",
    "HistoryMessage": "Metadata - a single message in the AI provider history",
    "History": "list[HistoryMessage] - the conversation history",
    "FileItem": "Metadata - a single file in the context (path, content, is_image, etc.)",
    "FileItems": "list[FileItem] - the most common weak pattern in the codebase",
    "ToolDefinition": "Metadata - a single tool definition (function name, description, parameters)",
    "ToolCall": "Metadata - a single tool call from the model (id, type, function)",
    "CommsLogCallback": "Callable[[CommsLogEntry], None] - the callback signature"
  },
  "named_tuples": {
    "FileItemsDiff": "NamedTuple with fields (refreshed: FileItems, changed: FileItems) - the return of _reread_file_items"
  },
  "refactor_targets": {
    "src/ai_client.py": {
      "weak_sites": 139,
      "replacement_strategy": "79 dict_str_any -> Metadata/CommsLogEntry/HistoryMessage/FileItem/ToolDefinition/ToolCall; 56 list_of_dict -> CommsLog/History/FileItems/ToolDefinitions; 2 Optional[List[Dict[...]]] -> Optional[FileItems]; 2 assign_tuple_literal -> ToolCall"
    },
    "src/app_controller.py": {
      "weak_sites": 86,
      "replacement_strategy": "62 dict_str_any -> Metadata; 20 list_of_dict -> list[Metadata]; 4 optional_dict -> Optional[Metadata]"
    },
    "src/models.py": {
      "weak_sites": 51,
      "replacement_strategy": "48 dict_str_any -> Optional[Metadata]; 3 list_of_dict -> list[Metadata]"
    },
    "src/api_hook_client.py": {
      "weak_sites": 32,
      "replacement_strategy": "30 dict_str_any -> Metadata; 2 list_of_dict -> list[Metadata]"
    },
    "src/project_manager.py": {
      "weak_sites": 20,
      "replacement_strategy": "16 dict_str_any -> Metadata; 3 list_of_dict -> list[Metadata]; 1 optional_dict -> Optional[Metadata]"
    },
    "src/aggregate.py": {
      "weak_sites": 17,
      "replacement_strategy": "10 dict_str_any -> Metadata; 7 list_of_dict -> list[Metadata]"
    }
  },
  "audit_ci_gate": {
    "script": "scripts/audit_weak_types.py",
    "current_mode": "informational (exit 0 always)",
    "new_mode": "strict (exit 1 if new findings introduced vs baseline)",
    "baseline_file": "scripts/audit_weak_types.baseline.json",
    "baseline_after_phase_1": "~60 findings (only the 23 lower-impact files remain)",
    "target_reduction": "430 -> ~60 (86% reduction in the 6 high-traffic files)"
  },
  "ai_performance_analysis": {
    "win": "A name is a one-time cost the AI pays to learn, then reuses forever. With 10 aliases covering 370+ usages, the AI's vocabulary cost is bounded while the readability win is unbounded. The auto-generated registry gives the AI field-level information on demand at the cost of a few hundred tokens of context per query.",
    "cost": "10 new names for the AI to learn (same as adding 10 new function names to a module - well within normal Python codebase scale). Plus a small token cost when the AI reads a registry file: 200-500 lines of markdown per source file, read once and cached in context.",
    "caveat": "If we add too many aliases (50+), the cognitive cost exceeds the benefit. The proposed 10 is the sweet spot. The docs-based registry approach is an alternative to TypedDict migration: docs are advisory but auto-maintained, whereas TypedDict would enforce but cost more upfront.",
    "honest_assessment": "Net win. The current 0 aliases is the worst case; going to 10 is a strictly better state for AI readability. Adding auto-generated docs is a further improvement at modest token cost."
  },

  "type_registry": {
    "directory": "docs/type_registry/",
    "files": [
      "index.md (top-level TOCs)",
      "type_aliases.md (the 10 TypeAliases from src/type_aliases.py)",
      "result_types.md (the Result/ErrorInfo from data_oriented_error_handling_20260606)",
      "<one .md per source file that has structs>"
    ],
    "script": "scripts/generate_type_registry.py",
    "script_modes": {
      "default": "Generate / regenerate the registry",
      "--check": "CI mode; exits 1 if the registry would change",
      "--diff": "Dry run; print what would change without writing"
    },
    "agent_workflow": "The coding agent runs the generator before marking a track complete, and includes the registry diff in the commit. CI runs --check on every PR.",
    "ai_token_cost": "200-500 lines of markdown per source file. The LLM reads it once and caches the schema in context. Subsequent references to the same types don't re-fetch.",
    "rationale": "Trade upfront cost (TypedDict schema design for every type) for token cost (LLM reads docs at query time). Docs are auto-maintained; TypedDict schemas would need to be hand-maintained. For a codebase where the priority is 'name the shapes first, give them structure later', docs are the right v1 approach."
  },
  "coexistence_with_data_oriented_track": {
    "Result_T": "The data_oriented_error_handling_20260606 track introduces Result[T] as a control-level wrapper. The aliases introduced by THIS track are value-level types (what's inside the T).",
    "ErrorInfo": "Already a @dataclass from the data_oriented track; no change.",
    "Result_composition": "Result[FileItems] is valid - the aliases name the T, not the Result itself."
  },
  "architectural_invariant": "The 6 type aliases are the CANONICAL names for the metadata family. New code MUST use them. Old code is migrated opportunistically. The audit script enforces this via the --strict mode (exits 1 if new weak sites are introduced).",
  "threading_constraint": "No change. TypeAlias is type-level only; runtime behavior is identical to the underlying types. The aliases are thread-safe because dict / list / Callable are thread-safe for the operations performed.",
  "verification_criteria": [
    "src/type_aliases.py exists with 10 TypeAliases and 1 NamedTuple",
    "All 10 aliases import successfully (tests/test_type_aliases.py)",
    "Result[FileItems] is a valid generic (verified by importing)",
    "scripts/audit_weak_types.py reports 370+ fewer findings after Phase 1 (~60 total)",
    "scripts/audit_weak_types.py --strict mode exits 1 when a new weak site is added",
    "scripts/audit_weak_types.baseline.json is committed with the post-Phase-1 count",
    "src/ai_client.py: 139 weak sites -> 0 weak sites (all replaced with aliases)",
    "src/app_controller.py: 86 -> 0",
    "src/models.py: 51 -> 0",
    "src/api_hook_client.py: 32 -> 0",
    "src/project_manager.py: 20 -> 0",
    "src/aggregate.py: 17 -> 0",
    "Phase 2: _reread_file_items returns FileItemsDiff (NamedTuple); all call sites updated",
    "Phase 2: 1-2 more tuple returns converted to NamedTuples opportunistically",
    "tests/test_type_aliases.py: 8+ tests pass",
    "tests/test_audit_weak_types.py: 6+ tests pass",
    "tests/test_ai_client.py (existing): no regressions",
    "tests/test_app_controller.py (existing): no regressions",
    "tests/test_models.py (existing): no regressions",
    "tests/test_api_hook_client.py (existing): no regressions",
    "tests/test_project_manager.py (existing): no regressions",
    "tests/test_aggregate.py (existing): no regressions",
    "conductor/product-guidelines.md: new 'Data Structure Conventions' section added",
    "conductor/code_styleguides/type_aliases.md: the canonical reference",
    "No new threading.Thread calls in src/",
    "No new Optional[X] introduced by the refactor (the aliases compose with Optional, but no NEW Optional types are added)",
    "No runtime behavior changes (aliases are type-level only)"
  ],
  "links": {
    "backlog_entry": "conductor/tracks.md (to be added)",
    "audit_script": "scripts/audit_weak_types.py",
    "code_styleguide": "conductor/code_styleguides/type_aliases.md (to be created in Phase 2)",
    "testing_guide": "docs/guide_testing.md",
    "audit_baseline": "scripts/audit_weak_types.baseline.json (to be created in Phase 1)",
    "related_tracks": [
      "conductor/tracks/startup_speedup_20260606/",
      "conductor/tracks/test_batching_refactor_20260606/",
      "conductor/tracks/qwen_llama_grok_integration_20260606/",
      "conductor/tracks/data_oriented_error_handling_20260606/"
    ]
  }
}