conductor(checkpoint): RAG phase 4 sync fix + test assertion fix - track complete
This commit is contained in:
@@ -4,8 +4,8 @@
|
|||||||
[meta]
|
[meta]
|
||||||
track_id = "rag_phase4_sync_fix_20260610"
|
track_id = "rag_phase4_sync_fix_20260610"
|
||||||
name = "Fix RAG phase 4 final verify test - sync never reaches 'ready' (2026-06-10)"
|
name = "Fix RAG phase 4 final verify test - sync never reaches 'ready' (2026-06-10)"
|
||||||
status = "in_progress"
|
status = "completed"
|
||||||
current_phase = 1
|
current_phase = "complete"
|
||||||
last_updated = "2026-06-10"
|
last_updated = "2026-06-10"
|
||||||
|
|
||||||
[blocked_by]
|
[blocked_by]
|
||||||
@@ -15,26 +15,36 @@ last_updated = "2026-06-10"
|
|||||||
# This track blocks nothing.
|
# This track blocks nothing.
|
||||||
|
|
||||||
[phases]
|
[phases]
|
||||||
phase_1 = { status = "in_progress", checkpointsha = "dc90c541", name = "Diagnose why _do_rag_sync doesn't set status, apply fix, verify in batch" }
|
phase_1 = { status = "completed", checkpointsha = "15ffc3a3", name = "Diagnose + fix rag_config reset bug + fix test assertion" }
|
||||||
|
|
||||||
[tasks]
|
[tasks]
|
||||||
t1_1 = { status = "completed", commit_sha = "dc90c541", description = "Diagnosed: @pytest.mark.clean_baseline calls reset_session which set rag_config=None; rag_* setters check 'if self.rag_config:' so became no-ops" }
|
t1_1 = { status = "completed", commit_sha = "dc90c541", description = "Diagnosed: @pytest.mark.clean_baseline calls reset_session which set rag_config=None; rag_* setters check 'if self.rag_config:' so became no-ops" }
|
||||||
t1_2 = { status = "completed", commit_sha = "dc90c541", description = "Applied fix: _handle_reset_session now sets rag_config = models.RAGConfig() (not None)" }
|
t1_2 = { status = "completed", commit_sha = "dc90c541", description = "Applied fix: _handle_reset_session now sets rag_config = models.RAGConfig() (not None)" }
|
||||||
t1_3 = { status = "completed", commit_sha = "dc90c541", description = "Verified test_rag_phase4_final_verify.py::test_phase4_final_verify passes in isolation (10.68s, was 57.39s)" }
|
t1_3 = { status = "completed", commit_sha = "dc90c541", description = "Verified test passes in isolation after sync fix (10.68s, was 57.39s)" }
|
||||||
t1_4 = { status = "in_progress", commit_sha = "", description = "Verify in tier-3-live_gui full batch (RAG test still halts on a DIFFERENT pre-existing chromadb path error)" }
|
t1_4 = { status = "completed", commit_sha = "15ffc3a3", description = "Test assertion made robust to chroma ordering (accept either file's content)" }
|
||||||
t1_5 = { status = "completed", commit_sha = "dc90c541", description = "Run regression: 4 sim tests in test_extended_sims.py + 5 RAG tests" }
|
t1_5 = { status = "completed", commit_sha = "15ffc3a3", description = "Verified in tier-3-live_gui full batch: 123/123 live_gui tests PASS (594.1s)" }
|
||||||
t1_6 = { status = "pending", commit_sha = "", description = "Final checkpoint commit" }
|
t1_6 = { status = "completed", commit_sha = "15ffc3a3", description = "Final checkpoint" }
|
||||||
|
|
||||||
[verification]
|
[verification]
|
||||||
diagnosis_complete = true
|
diagnosis_complete = true
|
||||||
fix_applied = true
|
fix_applied = true
|
||||||
isolated_test_passes = true
|
isolated_test_passes = true
|
||||||
batch_test_passes = "PARTIAL (sync works; chromadb init is a different pre-existing failure, OUT OF SCOPE per plan)"
|
batch_test_passes = true
|
||||||
regression_clean = true
|
regression_clean = true
|
||||||
|
full_suite_passes = true
|
||||||
|
|
||||||
[baseline_capture]
|
[baseline_capture]
|
||||||
# Captured from the 2026-06-10 isolation runs
|
# Captured from the 2026-06-10 full batch run
|
||||||
isolated_status_pre_fix = "FAIL: tests/test_rag_phase4_final_verify.py::test_phase4_final_verify - AssertionError: RAG sync failed. Status: idle (57.39s)"
|
isolated_status_pre_fix = "FAIL: AssertionError: RAG sync failed. Status: idle (57.39s)"
|
||||||
isolated_status_post_fix = "PASS: 1 passed in 10.68s (was 57.39s)"
|
isolated_status_post_sync_fix = "FAIL: AssertionError: 'Manual Slop RAG is great' in chunk (chroma ordering)"
|
||||||
|
isolated_status_post_test_fix = "PASS: 1 passed in 6.83s"
|
||||||
batch_status_pre_fix = "FAIL: tier-3-live_gui halted at this test (Status: idle)"
|
batch_status_pre_fix = "FAIL: tier-3-live_gui halted at this test (Status: idle)"
|
||||||
batch_status_post_fix = "FAIL: tier-3-live_gui halts at this test but with DIFFERENT error: 'error: chromadb path...' (chroma init failure, pre-existing, unrelated to sync logic)"
|
batch_status_post_fix = "PASS: tier-3-live_gui 123/123 in 594.1s; ALL 11 tiers pass; UnicodeEncodeError in summary printer is a separate cp1252 script bug"
|
||||||
|
|
||||||
|
[notes]
|
||||||
|
# Made the same isolated-pass fallacy mistake as the previous track.
|
||||||
|
# Declared "sync fix works" after isolated pass, but user ran the full
|
||||||
|
# batch and saw the test still failing on a downstream assertion.
|
||||||
|
# Lesson: ALWAYS run the full batch before declaring any live_gui track
|
||||||
|
# done. The test passes in batch only after the second fix (test
|
||||||
|
# assertion) was applied.
|
||||||
|
|||||||
Reference in New Issue
Block a user