conductor(track): metadata.json for rag_test_failures_20260615
This commit is contained in:
@@ -0,0 +1,232 @@
|
||||
{
|
||||
"track_id": "rag_test_failures_20260615",
|
||||
"name": "RAG Test Failures Fix",
|
||||
"initialized": "2026-06-15",
|
||||
"owner": "tier2-tech-lead",
|
||||
"priority": "A",
|
||||
"status": "active",
|
||||
"type": "bugfix + test_fix + documentation",
|
||||
"scope": {
|
||||
"new_files": [
|
||||
"tests/test_rag_sync_none_error.py"
|
||||
],
|
||||
"modified_files": [
|
||||
"src/app_controller.py",
|
||||
"src/rag_engine.py",
|
||||
"docs/guide_rag.md (conditional)"
|
||||
],
|
||||
"deleted_files": []
|
||||
},
|
||||
"blocked_by": [],
|
||||
"blocks": [
|
||||
"data_structure_strengthening_20260606",
|
||||
"user_stated_intent: send_result -> send mass rename"
|
||||
],
|
||||
"estimated_phases": 5,
|
||||
"spec": "spec.md",
|
||||
"plan": "plan.md",
|
||||
|
||||
"regressions_and_pre_existing_failures": [
|
||||
{
|
||||
"id": "G1_rag_phase4_final_verify",
|
||||
"severity": "high",
|
||||
"category": "rag_subsystem_bug",
|
||||
"file_line": "tests/test_rag_phase4_final_verify.py:65",
|
||||
"symptom": "RAG sync fails with 'NoneType object has no attribute get' after rag_enabled=True",
|
||||
"fix_phase": 2,
|
||||
"fix": "TBD by Phase 1 investigation (most likely src/rag_engine.py:_validate_collection_dim_result or src/app_controller.py:_do_rag_sync)"
|
||||
},
|
||||
{
|
||||
"id": "G2_rag_phase4_stress",
|
||||
"severity": "high",
|
||||
"category": "rag_subsystem_bug",
|
||||
"file_line": "tests/test_rag_phase4_stress.py:48",
|
||||
"symptom": "Same as G1 (RAG sync fails)",
|
||||
"fix_phase": 2,
|
||||
"fix": "Same fix as G1 (one root cause for all 3 tests)"
|
||||
},
|
||||
{
|
||||
"id": "G3_rag_visual_sim",
|
||||
"severity": "high",
|
||||
"category": "rag_subsystem_bug",
|
||||
"file_line": "tests/test_rag_visual_sim.py:32",
|
||||
"symptom": "Same as G1 (RAG sync fails at initial status check)",
|
||||
"fix_phase": 2,
|
||||
"fix": "Same fix as G1 (one root cause for all 3 tests)"
|
||||
}
|
||||
],
|
||||
|
||||
"pre_existing_failures_fixed_by_this_track": [
|
||||
{
|
||||
"id": "PE_1",
|
||||
"test": "tests/test_rag_phase4_final_verify.py::test_phase4_final_verify",
|
||||
"fix_phase": 2,
|
||||
"root_cause": "RAG sync NoneType.get error in src/app_controller.py:_do_rag_sync"
|
||||
},
|
||||
{
|
||||
"id": "PE_2",
|
||||
"test": "tests/test_rag_phase4_stress.py::test_rag_large_codebase_verification_sim",
|
||||
"fix_phase": 2,
|
||||
"root_cause": "Same as PE_1"
|
||||
},
|
||||
{
|
||||
"id": "PE_3",
|
||||
"test": "tests/test_rag_visual_sim.py::test_rag_full_lifecycle_sim",
|
||||
"fix_phase": 2,
|
||||
"root_cause": "Same as PE_1"
|
||||
}
|
||||
],
|
||||
|
||||
"pre_existing_failures_remaining": [],
|
||||
|
||||
"incidental_fixes_from_parent_track": [
|
||||
{
|
||||
"id": "INC_1",
|
||||
"test": "tests/test_rag_integration.py::test_rag_integration",
|
||||
"fixed_by": "public_api_migration_and_ui_polish_20260615 Phase 2 follow-up (commit 26e1b652)",
|
||||
"root_cause": "Mock return value needed Result(data=...) wrapper"
|
||||
}
|
||||
],
|
||||
|
||||
"deferred_to_followup_tracks": [
|
||||
{
|
||||
"id": "send_result_to_send_rename",
|
||||
"title": "send_result -> send Mass Rename (user's stated intent)",
|
||||
"description": "The user has stated intent to do a mass rename of send_result to send. The rename is mechanical (Result[T] return type is stable; only the function name changes). The user will do this manually after this track ships.",
|
||||
"track_status": "user_manual_refactor"
|
||||
},
|
||||
{
|
||||
"id": "data_structure_strengthening_20260606",
|
||||
"title": "Data Structure Strengthening (Type Aliases + NamedTuples)",
|
||||
"description": "Introduce 6 TypeAlias definitions in src/type_aliases.py; replace 370+ anonymous dict[str, Any] sites in 6 high-traffic files. Spec already exists; plan pending.",
|
||||
"track_status": "ready to start; blocked by this track (cleaner Result API usage makes type-alias replacement easier)"
|
||||
},
|
||||
{
|
||||
"id": "live_gui_mock_injection_20260615",
|
||||
"title": "Live GUI Mock Injection Infrastructure",
|
||||
"description": "Infrastructure for mock injection into the live_gui subprocess. Unblocks proper end-to-end live_gui + AI client tests.",
|
||||
"track_status": "recommended; not yet specced"
|
||||
},
|
||||
{
|
||||
"id": "rag_test_quality_cleanup",
|
||||
"title": "RAG Test Quality Cleanup",
|
||||
"description": "Replace time.sleep(0.5) patterns in RAG tests with poll loops; improve error messages; remove flaky patterns. Not a bug fix; quality improvement.",
|
||||
"track_status": "recommended; not yet specced"
|
||||
}
|
||||
],
|
||||
|
||||
"verification_criteria": {
|
||||
"g1_reproducing_test_exists": "tests/test_rag_sync_none_error.py exists and fails before the fix",
|
||||
"g2_three_rag_tests_pass": "uv run pytest tests/test_rag_phase4_final_verify.py tests/test_rag_phase4_stress.py tests/test_rag_visual_sim.py -v passes 3/3",
|
||||
"g3_defensive_guard_added": "Error message identifies which field or call is None (not just 'NoneType has no attribute get')",
|
||||
"g4_docs_updated": "docs/guide_rag.md has a troubleshooting entry (only if the file exists; skip if it doesn't)",
|
||||
"nf1_no_new_regressions": "uv run pytest tests/ shows 1285 pass + 4 skip + 0 fail (was 1282 + 4 + 3 pre-track)",
|
||||
"nf2_per_task_atomic_commits": "5-7 atomic commits with clear messages",
|
||||
"nf3_style_preserved": "1-space indentation, no comments, type hints in all changed code",
|
||||
"nf4_per_commit_git_notes": "All commits have git notes summarizing the fix"
|
||||
},
|
||||
|
||||
"fr_to_phase_mapping": {
|
||||
"G1_G2_G3_three_rag_tests": {
|
||||
"phase": 2,
|
||||
"fix_files": ["src/app_controller.py:1479-1482 (likely)", "src/rag_engine.py (likely)"],
|
||||
"test_files": ["tests/test_rag_phase4_final_verify.py", "tests/test_rag_phase4_stress.py", "tests/test_rag_visual_sim.py", "tests/test_rag_sync_none_error.py (new)"],
|
||||
"min_test_count": 4
|
||||
},
|
||||
"G3_defensive_guard": {
|
||||
"phase": 2,
|
||||
"fix_files": ["src/app_controller.py:1479-1482", "src/rag_engine.py"],
|
||||
"min_test_count": 0
|
||||
},
|
||||
"G4_docs_update": {
|
||||
"phase": 4,
|
||||
"fix_files": ["docs/guide_rag.md (conditional)"],
|
||||
"min_test_count": 0
|
||||
}
|
||||
},
|
||||
|
||||
"estimated_effort": {
|
||||
"phase_1": "1-2 hours - investigation + reproducing test",
|
||||
"phase_2": "1-3 hours - fix (dependent on Phase 1 finding)",
|
||||
"phase_3": "30 min - full + batched test verification",
|
||||
"phase_4": "15 min - docs update (conditional)",
|
||||
"phase_5": "15 min - metadata + tracks.md",
|
||||
"total": "0.5-1 day Tier 2 work (4-8 hours)"
|
||||
},
|
||||
|
||||
"risk_register": {
|
||||
"R1_fix_breaks_unrelated_test": {
|
||||
"likelihood": "low",
|
||||
"impact": "medium",
|
||||
"mitigation": "Run the full test suite in Phase 3 + the batched test. If a new failure appears, STOP and report."
|
||||
},
|
||||
"R2_bug_in_hard_to_reach_code_path": {
|
||||
"likelihood": "medium",
|
||||
"impact": "medium",
|
||||
"mitigation": "Add diagnostic traceback in Phase 1; capture the actual error site; document in commit message."
|
||||
},
|
||||
"R3_fix_is_in_test_not_production": {
|
||||
"likelihood": "low",
|
||||
"impact": "low",
|
||||
"mitigation": "If the fix is in the test, document this in the commit message. Consider adding a teardown reset."
|
||||
},
|
||||
"R4_regression_in_rag_engine_ready_status_bug": {
|
||||
"likelihood": "low",
|
||||
"impact": "medium",
|
||||
"mitigation": "Run the full RAG test suite after the fix."
|
||||
},
|
||||
"R5_takes_longer_than_estimated": {
|
||||
"likelihood": "low",
|
||||
"impact": "low",
|
||||
"mitigation": "Even 2 days is acceptable; user's overall plan is 2 more tracks before data structure."
|
||||
}
|
||||
},
|
||||
|
||||
"audit_findings_20260615": {
|
||||
"remaining_pre_existing_failures": {
|
||||
"test_rag_phase4_final_verify.py::test_phase4_final_verify": {
|
||||
"tier": "tier-3 (live_gui)",
|
||||
"failure_point": "line 65 (after rag_enabled=True + wait for rag_status == ready)",
|
||||
"error": "RAG sync failed. Status: error: 'NoneType' object has no attribute 'get'"
|
||||
},
|
||||
"test_rag_phase4_stress.py::test_rag_large_codebase_verification_sim": {
|
||||
"tier": "tier-3 (live_gui)",
|
||||
"failure_point": "line 48 (same pattern)",
|
||||
"error": "Same as above"
|
||||
},
|
||||
"test_rag_visual_sim.py::test_rag_full_lifecycle_sim": {
|
||||
"tier": "tier-3 (live_gui)",
|
||||
"failure_point": "line 32 (initial status check after rag_enabled=True)",
|
||||
"error": "Same as above"
|
||||
}
|
||||
},
|
||||
"fixed_by_parent_track": {
|
||||
"test_rag_integration.py::test_rag_integration": {
|
||||
"fixed_by": "public_api_migration_and_ui_polish_20260615 Phase 2 follow-up (commit 26e1b652)",
|
||||
"root_cause": "Mock return value needed Result(data=...) wrapper",
|
||||
"note": "Was listed as 1 of 4 RAG failures in the parent spec; was actually fixed during that track"
|
||||
}
|
||||
},
|
||||
"investigation_clues": {
|
||||
"RAGConfig_default_state": "vector_store: VectorStoreConfig(provider='mock', ...); NOT None; verified by direct instantiation",
|
||||
"RAGEngine_init_with_mock": "Succeeds; client='mock'; collection='mock'; is_empty()=True; no further sync work",
|
||||
"most_likely_call_site": "src/rag_engine.py:149 (embeddings = res.get('embeddings') in _validate_collection_dim_result) - but only triggered for chroma provider, not mock",
|
||||
"secondary_clue": "src/rag_engine.py:_init_vector_store_result returns Result(data=None) for mock branch; the mock branch is hit and exits successfully",
|
||||
"error_path": "src/app_controller.py:1479-1482 catches the exception and sets rag_status to f'error: {e}'"
|
||||
},
|
||||
"RAG_subsystem_state": {
|
||||
"rag_config": "Initialized in __init__ (src/app_controller.py:1830-1831) as RAGConfig() default OR models.RAGConfig.from_dict(rag_data)",
|
||||
"rag_config_reset": "src/app_controller.py:3387 sets self.rag_config = _rag_models.RAGConfig() (fresh default)",
|
||||
"active_project_root": "Property at line 1388; returns str(Path(self.active_project_path).parent) or self.ui_files_base_dir",
|
||||
"embedding_provider_default": "'gemini' (per RAGConfig field default)",
|
||||
"vector_store_default": "VectorStoreConfig(provider='mock', ...)"
|
||||
}
|
||||
},
|
||||
|
||||
"milestone_context": {
|
||||
"pre_track_state": "1282 pass + 4 skip + 3 fail (10 fail pre-public_api; 7 fixed in that track)",
|
||||
"post_track_target": "1285 pass + 4 skip + 0 fail",
|
||||
"historical_context": "First fully green baseline since data_oriented_error_handling_20260606 shipped 2026-06-12",
|
||||
"user_intent_after_this_track": "send_result -> send mass rename (user will do manually), then data_structure_strengthening_20260606 track"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user