diff --git a/conductor/tracks/rag_test_failures_20260615/metadata.json b/conductor/tracks/rag_test_failures_20260615/metadata.json new file mode 100644 index 00000000..af026998 --- /dev/null +++ b/conductor/tracks/rag_test_failures_20260615/metadata.json @@ -0,0 +1,232 @@ +{ + "track_id": "rag_test_failures_20260615", + "name": "RAG Test Failures Fix", + "initialized": "2026-06-15", + "owner": "tier2-tech-lead", + "priority": "A", + "status": "active", + "type": "bugfix + test_fix + documentation", + "scope": { + "new_files": [ + "tests/test_rag_sync_none_error.py" + ], + "modified_files": [ + "src/app_controller.py", + "src/rag_engine.py", + "docs/guide_rag.md (conditional)" + ], + "deleted_files": [] + }, + "blocked_by": [], + "blocks": [ + "data_structure_strengthening_20260606", + "user_stated_intent: send_result -> send mass rename" + ], + "estimated_phases": 5, + "spec": "spec.md", + "plan": "plan.md", + + "regressions_and_pre_existing_failures": [ + { + "id": "G1_rag_phase4_final_verify", + "severity": "high", + "category": "rag_subsystem_bug", + "file_line": "tests/test_rag_phase4_final_verify.py:65", + "symptom": "RAG sync fails with 'NoneType object has no attribute get' after rag_enabled=True", + "fix_phase": 2, + "fix": "TBD by Phase 1 investigation (most likely src/rag_engine.py:_validate_collection_dim_result or src/app_controller.py:_do_rag_sync)" + }, + { + "id": "G2_rag_phase4_stress", + "severity": "high", + "category": "rag_subsystem_bug", + "file_line": "tests/test_rag_phase4_stress.py:48", + "symptom": "Same as G1 (RAG sync fails)", + "fix_phase": 2, + "fix": "Same fix as G1 (one root cause for all 3 tests)" + }, + { + "id": "G3_rag_visual_sim", + "severity": "high", + "category": "rag_subsystem_bug", + "file_line": "tests/test_rag_visual_sim.py:32", + "symptom": "Same as G1 (RAG sync fails at initial status check)", + "fix_phase": 2, + "fix": "Same fix as G1 (one root cause for all 3 tests)" + } + ], + + "pre_existing_failures_fixed_by_this_track": [ + { + "id": "PE_1", + "test": "tests/test_rag_phase4_final_verify.py::test_phase4_final_verify", + "fix_phase": 2, + "root_cause": "RAG sync NoneType.get error in src/app_controller.py:_do_rag_sync" + }, + { + "id": "PE_2", + "test": "tests/test_rag_phase4_stress.py::test_rag_large_codebase_verification_sim", + "fix_phase": 2, + "root_cause": "Same as PE_1" + }, + { + "id": "PE_3", + "test": "tests/test_rag_visual_sim.py::test_rag_full_lifecycle_sim", + "fix_phase": 2, + "root_cause": "Same as PE_1" + } + ], + + "pre_existing_failures_remaining": [], + + "incidental_fixes_from_parent_track": [ + { + "id": "INC_1", + "test": "tests/test_rag_integration.py::test_rag_integration", + "fixed_by": "public_api_migration_and_ui_polish_20260615 Phase 2 follow-up (commit 26e1b652)", + "root_cause": "Mock return value needed Result(data=...) wrapper" + } + ], + + "deferred_to_followup_tracks": [ + { + "id": "send_result_to_send_rename", + "title": "send_result -> send Mass Rename (user's stated intent)", + "description": "The user has stated intent to do a mass rename of send_result to send. The rename is mechanical (Result[T] return type is stable; only the function name changes). The user will do this manually after this track ships.", + "track_status": "user_manual_refactor" + }, + { + "id": "data_structure_strengthening_20260606", + "title": "Data Structure Strengthening (Type Aliases + NamedTuples)", + "description": "Introduce 6 TypeAlias definitions in src/type_aliases.py; replace 370+ anonymous dict[str, Any] sites in 6 high-traffic files. Spec already exists; plan pending.", + "track_status": "ready to start; blocked by this track (cleaner Result API usage makes type-alias replacement easier)" + }, + { + "id": "live_gui_mock_injection_20260615", + "title": "Live GUI Mock Injection Infrastructure", + "description": "Infrastructure for mock injection into the live_gui subprocess. Unblocks proper end-to-end live_gui + AI client tests.", + "track_status": "recommended; not yet specced" + }, + { + "id": "rag_test_quality_cleanup", + "title": "RAG Test Quality Cleanup", + "description": "Replace time.sleep(0.5) patterns in RAG tests with poll loops; improve error messages; remove flaky patterns. Not a bug fix; quality improvement.", + "track_status": "recommended; not yet specced" + } + ], + + "verification_criteria": { + "g1_reproducing_test_exists": "tests/test_rag_sync_none_error.py exists and fails before the fix", + "g2_three_rag_tests_pass": "uv run pytest tests/test_rag_phase4_final_verify.py tests/test_rag_phase4_stress.py tests/test_rag_visual_sim.py -v passes 3/3", + "g3_defensive_guard_added": "Error message identifies which field or call is None (not just 'NoneType has no attribute get')", + "g4_docs_updated": "docs/guide_rag.md has a troubleshooting entry (only if the file exists; skip if it doesn't)", + "nf1_no_new_regressions": "uv run pytest tests/ shows 1285 pass + 4 skip + 0 fail (was 1282 + 4 + 3 pre-track)", + "nf2_per_task_atomic_commits": "5-7 atomic commits with clear messages", + "nf3_style_preserved": "1-space indentation, no comments, type hints in all changed code", + "nf4_per_commit_git_notes": "All commits have git notes summarizing the fix" + }, + + "fr_to_phase_mapping": { + "G1_G2_G3_three_rag_tests": { + "phase": 2, + "fix_files": ["src/app_controller.py:1479-1482 (likely)", "src/rag_engine.py (likely)"], + "test_files": ["tests/test_rag_phase4_final_verify.py", "tests/test_rag_phase4_stress.py", "tests/test_rag_visual_sim.py", "tests/test_rag_sync_none_error.py (new)"], + "min_test_count": 4 + }, + "G3_defensive_guard": { + "phase": 2, + "fix_files": ["src/app_controller.py:1479-1482", "src/rag_engine.py"], + "min_test_count": 0 + }, + "G4_docs_update": { + "phase": 4, + "fix_files": ["docs/guide_rag.md (conditional)"], + "min_test_count": 0 + } + }, + + "estimated_effort": { + "phase_1": "1-2 hours - investigation + reproducing test", + "phase_2": "1-3 hours - fix (dependent on Phase 1 finding)", + "phase_3": "30 min - full + batched test verification", + "phase_4": "15 min - docs update (conditional)", + "phase_5": "15 min - metadata + tracks.md", + "total": "0.5-1 day Tier 2 work (4-8 hours)" + }, + + "risk_register": { + "R1_fix_breaks_unrelated_test": { + "likelihood": "low", + "impact": "medium", + "mitigation": "Run the full test suite in Phase 3 + the batched test. If a new failure appears, STOP and report." + }, + "R2_bug_in_hard_to_reach_code_path": { + "likelihood": "medium", + "impact": "medium", + "mitigation": "Add diagnostic traceback in Phase 1; capture the actual error site; document in commit message." + }, + "R3_fix_is_in_test_not_production": { + "likelihood": "low", + "impact": "low", + "mitigation": "If the fix is in the test, document this in the commit message. Consider adding a teardown reset." + }, + "R4_regression_in_rag_engine_ready_status_bug": { + "likelihood": "low", + "impact": "medium", + "mitigation": "Run the full RAG test suite after the fix." + }, + "R5_takes_longer_than_estimated": { + "likelihood": "low", + "impact": "low", + "mitigation": "Even 2 days is acceptable; user's overall plan is 2 more tracks before data structure." + } + }, + + "audit_findings_20260615": { + "remaining_pre_existing_failures": { + "test_rag_phase4_final_verify.py::test_phase4_final_verify": { + "tier": "tier-3 (live_gui)", + "failure_point": "line 65 (after rag_enabled=True + wait for rag_status == ready)", + "error": "RAG sync failed. Status: error: 'NoneType' object has no attribute 'get'" + }, + "test_rag_phase4_stress.py::test_rag_large_codebase_verification_sim": { + "tier": "tier-3 (live_gui)", + "failure_point": "line 48 (same pattern)", + "error": "Same as above" + }, + "test_rag_visual_sim.py::test_rag_full_lifecycle_sim": { + "tier": "tier-3 (live_gui)", + "failure_point": "line 32 (initial status check after rag_enabled=True)", + "error": "Same as above" + } + }, + "fixed_by_parent_track": { + "test_rag_integration.py::test_rag_integration": { + "fixed_by": "public_api_migration_and_ui_polish_20260615 Phase 2 follow-up (commit 26e1b652)", + "root_cause": "Mock return value needed Result(data=...) wrapper", + "note": "Was listed as 1 of 4 RAG failures in the parent spec; was actually fixed during that track" + } + }, + "investigation_clues": { + "RAGConfig_default_state": "vector_store: VectorStoreConfig(provider='mock', ...); NOT None; verified by direct instantiation", + "RAGEngine_init_with_mock": "Succeeds; client='mock'; collection='mock'; is_empty()=True; no further sync work", + "most_likely_call_site": "src/rag_engine.py:149 (embeddings = res.get('embeddings') in _validate_collection_dim_result) - but only triggered for chroma provider, not mock", + "secondary_clue": "src/rag_engine.py:_init_vector_store_result returns Result(data=None) for mock branch; the mock branch is hit and exits successfully", + "error_path": "src/app_controller.py:1479-1482 catches the exception and sets rag_status to f'error: {e}'" + }, + "RAG_subsystem_state": { + "rag_config": "Initialized in __init__ (src/app_controller.py:1830-1831) as RAGConfig() default OR models.RAGConfig.from_dict(rag_data)", + "rag_config_reset": "src/app_controller.py:3387 sets self.rag_config = _rag_models.RAGConfig() (fresh default)", + "active_project_root": "Property at line 1388; returns str(Path(self.active_project_path).parent) or self.ui_files_base_dir", + "embedding_provider_default": "'gemini' (per RAGConfig field default)", + "vector_store_default": "VectorStoreConfig(provider='mock', ...)" + } + }, + + "milestone_context": { + "pre_track_state": "1282 pass + 4 skip + 3 fail (10 fail pre-public_api; 7 fixed in that track)", + "post_track_target": "1285 pass + 4 skip + 0 fail", + "historical_context": "First fully green baseline since data_oriented_error_handling_20260606 shipped 2026-06-12", + "user_intent_after_this_track": "send_result -> send mass rename (user will do manually), then data_structure_strengthening_20260606 track" + } +}