manual_slop/conductor/tracks/test_infrastructure_hardening_20260609/metadata.json

{
  "track_id": "test_infrastructure_hardening_20260609",
  "name": "Test Infrastructure Hardening (2026-06-09)",
  "created_at": "2026-06-09",
  "status": "spec",
  "priority": "A",
  "blocked_by": [],
  "blocks": [
    "qwen_llama_grok_integration_20260606",
    "data_oriented_error_handling_20260606",
    "data_structure_strengthening_20260606",
    "mcp_architecture_refactor_20260606",
    "code_path_audit_20260607"
  ],
  "inherits_from": [
    "docs/reports/test_infra_hardening_foundation_20260608.md",
    "docs/reports/batch_resilience_plan_20260608.md",
    "docs/reports/rag_test_batch_failure_status_20260609_pm3.md",
    "docs/reports/rag_work_final_20260609_pm.md"
  ],
  "supersedes": [
    "test_harness_hardening_20260310",
    "test_patch_fixes_20260513",
    "test_batching_post_refactor_polish_20260607",
    "fix_remaining_tests_20260513",
    "manual_ux_validation_20260608_PLACEHOLDER (per FR5 clean_baseline)",
    "regression_fixes_20260605 (residual live_gui work)"
  ],
  "domain": "Meta-Tooling (test infrastructure; not the Application's GUI)",
  "scope_summary": "Fix 3 root causes of test regression churn (subprocess state pollution, filesystem path hygiene, io_pool race) + 2 related bugs (set_value hook, optional clean-baseline) so the 4 upcoming tracks start from a clean test bed.",
  "estimated_effort": "6.5 days (Phases 1-8)",
  "phases": 8,
  "verification_criteria": [
    "FR1: Autouse _check_live_gui_health fixture in place; 3 tests in tests/test_live_gui_respawn.py pass",
    "FR2: 6 test files no longer hardcode Path('tests/artifacts/live_gui_workspace'); live_gui_workspace fixture in place; 3 tests in tests/test_live_gui_workspace_fixture.py pass",
    "FR3: _sync_rag_engine uses token + dirty flag; 3 tests in tests/test_sync_rag_engine_coalescing.py pass",
    "FR4: set_value('ai_input', ...) actually mutates controller state; tests/test_gui2_set_value_hook_works.py passes in batch",
    "FR5: clean_baseline marker in place; 2 tests in tests/test_clean_baseline_marker.py pass",
    "FR6: docs/reports/test_bed_health_20260609.md written and committed with pass/fail counts",
    "Audit: 4 audit files committed in conductor/tracks/test_infrastructure_hardening_20260609/audit/",
    "Audit: scripts/check_test_toml_paths.py extended to flag hardcoded workspace paths",
    "Docs: docs/guide_testing.md updated with new fixtures (FR1, FR2, FR5)",
    "All tier-1 + tier-2 tests pass in batch (no regression)",
    "At least 3 previously-failing tests now pass in batch (the RAG test, the set_value test, the RAG stress test)"
  ],
  "out_of_scope": [
    "Per-file live_gui fixture scope (Solution A from batch_resilience_plan)",
    "MMA pipeline tests that don't reach 'tracks' state (3 tests, separate code path)",
    "Negative-flows tests (3 tests, separate code path)",
    "test_auto_switch_sim (separate code path)",
    "code_path_audit_20260607 (post-4-tracks)",
    "chunkification_optimization_20260608_PLACEHOLDER (not yet approved)",
    "CI infrastructure (no CI in repo)"
  ],
  "risks": [
    {
      "risk": "Per-test respawn adds >200ms per test (NFR1 violation)",
      "mitigation": "Measure with the 49 tests in batch; if exceeded, fall back to per-batch respawn"
    },
    {
      "risk": "tmp_path_factory refactor breaks on-disk chroma DB persistence",
      "mitigation": "Clear .slop_cache/ dirs at session start; OR add a live_gui_workspace_persist opt-in"
    },
    {
      "risk": "conftest.py corruption (previous attempt was reverted)",
      "mitigation": "git stash before each edit; use manual-slop_set_file_slice; Tier 2 supervises"
    },
    {
      "risk": "set_value fix changes behavior for existing tests that assert on the OLD broken behavior",
      "mitigation": "Run full tier-3 batch in Phase 5 and verify no regressions"
    }
  ],
  "tier_2_supervision_required_for": [
    "Phase 1 (audit review)",
    "Phase 3 (conftest refactor)",
    "Phase 4 (io_pool race fix)"
  ]
}