79 lines
3.9 KiB
JSON
79 lines
3.9 KiB
JSON
{
|
|
"track_id": "test_infrastructure_hardening_20260609",
|
|
"name": "Test Infrastructure Hardening (2026-06-09)",
|
|
"created_at": "2026-06-09",
|
|
"status": "spec",
|
|
"priority": "A",
|
|
"blocked_by": [],
|
|
"blocks": [
|
|
"qwen_llama_grok_integration_20260606",
|
|
"data_oriented_error_handling_20260606",
|
|
"data_structure_strengthening_20260606",
|
|
"mcp_architecture_refactor_20260606",
|
|
"code_path_audit_20260607"
|
|
],
|
|
"inherits_from": [
|
|
"docs/reports/test_infra_hardening_foundation_20260608.md",
|
|
"docs/reports/batch_resilience_plan_20260608.md",
|
|
"docs/reports/rag_test_batch_failure_status_20260609_pm3.md",
|
|
"docs/reports/rag_work_final_20260609_pm.md"
|
|
],
|
|
"supersedes": [
|
|
"test_harness_hardening_20260310",
|
|
"test_patch_fixes_20260513",
|
|
"test_batching_post_refactor_polish_20260607",
|
|
"fix_remaining_tests_20260513",
|
|
"manual_ux_validation_20260608_PLACEHOLDER (per FR5 clean_baseline)",
|
|
"regression_fixes_20260605 (residual live_gui work)"
|
|
],
|
|
"domain": "Meta-Tooling (test infrastructure; not the Application's GUI)",
|
|
"scope_summary": "Fix 3 root causes of test regression churn (subprocess state pollution, filesystem path hygiene, io_pool race) + 2 related bugs (set_value hook, optional clean-baseline) so the 4 upcoming tracks start from a clean test bed.",
|
|
"estimated_effort": "6.5 days (Phases 1-8)",
|
|
"phases": 8,
|
|
"verification_criteria": [
|
|
"FR1: Autouse _check_live_gui_health fixture in place; 3 tests in tests/test_live_gui_respawn.py pass",
|
|
"FR2: 6 test files no longer hardcode Path('tests/artifacts/live_gui_workspace'); live_gui_workspace fixture in place; 3 tests in tests/test_live_gui_workspace_fixture.py pass",
|
|
"FR3: _sync_rag_engine uses token + dirty flag; 3 tests in tests/test_sync_rag_engine_coalescing.py pass",
|
|
"FR4: set_value('ai_input', ...) actually mutates controller state; tests/test_gui2_set_value_hook_works.py passes in batch",
|
|
"FR5: clean_baseline marker in place; 2 tests in tests/test_clean_baseline_marker.py pass",
|
|
"FR6: docs/reports/test_bed_health_20260609.md written and committed with pass/fail counts",
|
|
"Audit: 4 audit files committed in conductor/tracks/test_infrastructure_hardening_20260609/audit/",
|
|
"Audit: scripts/check_test_toml_paths.py extended to flag hardcoded workspace paths",
|
|
"Docs: docs/guide_testing.md updated with new fixtures (FR1, FR2, FR5)",
|
|
"All tier-1 + tier-2 tests pass in batch (no regression)",
|
|
"At least 3 previously-failing tests now pass in batch (the RAG test, the set_value test, the RAG stress test)"
|
|
],
|
|
"out_of_scope": [
|
|
"Per-file live_gui fixture scope (Solution A from batch_resilience_plan)",
|
|
"MMA pipeline tests that don't reach 'tracks' state (3 tests, separate code path)",
|
|
"Negative-flows tests (3 tests, separate code path)",
|
|
"test_auto_switch_sim (separate code path)",
|
|
"code_path_audit_20260607 (post-4-tracks)",
|
|
"chunkification_optimization_20260608_PLACEHOLDER (not yet approved)",
|
|
"CI infrastructure (no CI in repo)"
|
|
],
|
|
"risks": [
|
|
{
|
|
"risk": "Per-test respawn adds >200ms per test (NFR1 violation)",
|
|
"mitigation": "Measure with the 49 tests in batch; if exceeded, fall back to per-batch respawn"
|
|
},
|
|
{
|
|
"risk": "tmp_path_factory refactor breaks on-disk chroma DB persistence",
|
|
"mitigation": "Clear .slop_cache/ dirs at session start; OR add a live_gui_workspace_persist opt-in"
|
|
},
|
|
{
|
|
"risk": "conftest.py corruption (previous attempt was reverted)",
|
|
"mitigation": "git stash before each edit; use manual-slop_set_file_slice; Tier 2 supervises"
|
|
},
|
|
{
|
|
"risk": "set_value fix changes behavior for existing tests that assert on the OLD broken behavior",
|
|
"mitigation": "Run full tier-3 batch in Phase 5 and verify no regressions"
|
|
}
|
|
],
|
|
"tier_2_supervision_required_for": [
|
|
"Phase 1 (audit review)",
|
|
"Phase 3 (conftest refactor)",
|
|
"Phase 4 (io_pool race fix)"
|
|
]
|
|
}
|