{ "track_id": "test_infrastructure_hardening_20260609", "name": "Test Infrastructure Hardening (2026-06-09)", "created_at": "2026-06-09", "status": "shipped", "priority": "A", "blocked_by": [], "blocks": [ "qwen_llama_grok_integration_20260606", "data_oriented_error_handling_20260606", "data_structure_strengthening_20260606", "mcp_architecture_refactor_20260606", "code_path_audit_20260607" ], "inherits_from": [ "docs/reports/test_infra_hardening_foundation_20260608.md", "docs/reports/batch_resilience_plan_20260608.md", "docs/reports/rag_test_batch_failure_status_20260609_pm3.md", "docs/reports/rag_work_final_20260609_pm.md" ], "supersedes": [ "test_harness_hardening_20260310", "test_patch_fixes_20260513", "test_batching_post_refactor_polish_20260607", "fix_remaining_tests_20260513", "manual_ux_validation_20260608_PLACEHOLDER (per FR5 clean_baseline)", "regression_fixes_20260605 (residual live_gui work)" ], "domain": "Meta-Tooling (test infrastructure; not the Application's GUI)", "scope_summary": "Fix 3 root causes of test regression churn (subprocess state pollution, filesystem path hygiene, io_pool race) + 2 related bugs (set_value hook, optional clean-baseline) so the 4 upcoming tracks start from a clean test bed.", "estimated_effort": "6.5 days (Phases 1-8)", "phases": 8, "verification_criteria": [ "FR1: Autouse _check_live_gui_health fixture in place; 3 tests in tests/test_live_gui_respawn.py pass", "FR2: 6 test files no longer hardcode Path('tests/artifacts/live_gui_workspace'); live_gui_workspace fixture in place; 3 tests in tests/test_live_gui_workspace_fixture.py pass", "FR3: _sync_rag_engine uses token + dirty flag; 3 tests in tests/test_sync_rag_engine_coalescing.py pass", "FR4: set_value('ai_input', ...) actually mutates controller state; tests/test_gui2_set_value_hook_works.py passes in batch", "FR5: clean_baseline marker in place; 2 tests in tests/test_clean_baseline_marker.py pass", "FR6: docs/reports/test_bed_health_20260609.md written and committed with pass/fail counts", "Audit: 4 audit files committed in conductor/tracks/test_infrastructure_hardening_20260609/audit/", "Audit: scripts/check_test_toml_paths.py extended to flag hardcoded workspace paths", "Docs: docs/guide_testing.md updated with new fixtures (FR1, FR2, FR5)", "All tier-1 + tier-2 tests pass in batch (no regression)", "At least 3 previously-failing tests now pass in batch (the RAG test, the set_value test, the RAG stress test)" ], "out_of_scope": [ "Per-file live_gui fixture scope (Solution A from batch_resilience_plan)", "MMA pipeline tests that don't reach 'tracks' state (3 tests, separate code path)", "Negative-flows tests (3 tests, separate code path)", "test_auto_switch_sim (separate code path)", "code_path_audit_20260607 (post-4-tracks)", "chunkification_optimization_20260608_PLACEHOLDER (not yet approved)", "CI infrastructure (no CI in repo)" ], "risks": [ { "risk": "Per-test respawn adds >200ms per test (NFR1 violation)", "mitigation": "Measure with the 49 tests in batch; if exceeded, fall back to per-batch respawn" }, { "risk": "tmp_path_factory refactor breaks on-disk chroma DB persistence", "mitigation": "Clear .slop_cache/ dirs at session start; OR add a live_gui_workspace_persist opt-in" }, { "risk": "conftest.py corruption (previous attempt was reverted)", "mitigation": "git stash before each edit; use manual-slop_set_file_slice; Tier 2 supervises" }, { "risk": "set_value fix changes behavior for existing tests that assert on the OLD broken behavior", "mitigation": "Run full tier-3 batch in Phase 5 and verify no regressions" } ], "tier_2_supervision_required_for": [ "Phase 1 (audit review)", "Phase 3 (conftest refactor)", "Phase 4 (io_pool race fix)" ] }