diff --git a/conductor/tests/diag_subagent.py b/conductor/tests/diag_subagent.py deleted file mode 100644 index e4b290aa..00000000 --- a/conductor/tests/diag_subagent.py +++ /dev/null @@ -1,23 +0,0 @@ -import subprocess -import sys - -def run_diag(role: str, prompt: str) -> str: - print(f"--- Running Diag for {role} ---") - cmd = [sys.executable, "scripts/mma_exec.py", "--role", role, prompt] - try: - result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8') - print("STDOUT:") - print(result.stdout) - print("STDERR:") - print(result.stderr) - return result.stdout - except Exception as e: - print(f"FAILED: {e}") - return str(e) - -if __name__ == "__main__": -# Test 1: Simple read - print("TEST 1: read_file") - run_diag("tier3-worker", "Read the file 'pyproject.toml' and tell me the version of the project. ONLY the version string.") - print("\nTEST 2: run_shell_command") - run_diag("tier3-worker", "Use run_shell_command to execute 'echo HELLO_SUBAGENT' and return the output. ONLY the output.") diff --git a/conductor/tests/test_gui_markdown_table_width.py b/conductor/tests/test_gui_markdown_table_width.py deleted file mode 100644 index 8a458d71..00000000 --- a/conductor/tests/test_gui_markdown_table_width.py +++ /dev/null @@ -1,64 +0,0 @@ -import unittest -from unittest.mock import MagicMock, patch -import sys -import os - -# Ensure project root is in path so we can import src.gui_2 -project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) -if project_root not in sys.path: - sys.path.insert(0, project_root) - -class TestMarkdownTableWidth(unittest.TestCase): - def test_render_discussion_entry_full_width(self): - """ - Verify that render_discussion_entry calls imgui.dummy with the full available width. - """ - # Mock all dependencies to avoid side effects and complex setup during import/execution - with patch('src.gui_2.imgui') as mock_imgui, \ - patch('src.gui_2.imscope') as mock_imscope, \ - patch('src.gui_2.theme') as mock_theme, \ - patch('src.gui_2.project_manager') as mock_pm, \ - patch('src.gui_2.render_thinking_trace') as mock_rtt, \ - patch('src.gui_2.render_discussion_entry_read_mode') as mock_rderm: - - # 1. Setup available width and coordinates - expected_width = 850.0 - mock_avail = MagicMock() - mock_avail.x = expected_width - mock_imgui.get_content_region_avail.return_value = mock_avail - - # Mock ImVec2 to return a simple tuple for easier assertion - mock_imgui.ImVec2.side_effect = lambda x, y: (x, y) - - # 3. Mock app and entry state - mock_app = MagicMock() - mock_app.disc_roles = ["User", "Assistant"] - - entry = { - "role": "User", - "content": "Hello world", - "collapsed": False, - "read_mode": False - } - - # Mock interactive elements - mock_imgui.begin_combo.return_value = False - mock_imgui.button.return_value = False - mock_imgui.input_text_multiline.return_value = (False, entry["content"]) - - # 4. Import the function within the patch context - from src.gui_2 import render_discussion_entry - - # 5. Execute the function - render_discussion_entry(mock_app, entry, 0) - - # 6. Verification - # The function should call imgui.dummy(imgui.ImVec2(full_width, 0)) - mock_imgui.dummy.assert_any_call((expected_width, 0.0)) - - # CRITICAL: Verify newline or spacing is called to prevent squashing - # We expect this to fail currently - assert mock_imgui.new_line.called or mock_imgui.spacing.called - -if __name__ == '__main__': - unittest.main() diff --git a/conductor/tests/test_gui_monolithic_restoration.py b/conductor/tests/test_gui_monolithic_restoration.py deleted file mode 100644 index ef680f76..00000000 --- a/conductor/tests/test_gui_monolithic_restoration.py +++ /dev/null @@ -1,33 +0,0 @@ -import inspect -import sys -import os -import pytest - -# Ensure project root is in path -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) - -def test_gui_monolithic_symbols(): - try: - from src.gui_2 import App, render_discussion_entry, render_thinking_trace - import src.gui_2 - except ImportError as e: - pytest.fail(f"FAILURE: Could not import from src.gui_2: {e}") - - # Verify App is importable - assert App is not None - - # Verify render_discussion_entry is in src.gui_2 - assert hasattr(src.gui_2, 'render_discussion_entry'), "render_discussion_entry missing from src.gui_2" - - # Verify it's defined in src.gui_2, not imported - mod = inspect.getmodule(render_discussion_entry) - assert mod is not None, "Could not determine module for render_discussion_entry" - assert mod.__name__ == 'src.gui_2', f"render_discussion_entry expected in src.gui_2, but found in {mod.__name__}" - - # Verify render_thinking_trace is in src.gui_2 - assert hasattr(src.gui_2, 'render_thinking_trace'), "render_thinking_trace missing from src.gui_2" - - # Verify it's defined in src.gui_2, not imported - mod = inspect.getmodule(render_thinking_trace) - assert mod is not None, "Could not determine module for render_thinking_trace" - assert mod.__name__ == 'src.gui_2', f"render_thinking_trace expected in src.gui_2, but found in {mod.__name__}" diff --git a/conductor/tests/test_imgui_scopes_id_stability.py b/conductor/tests/test_imgui_scopes_id_stability.py deleted file mode 100644 index 5bae1bd8..00000000 --- a/conductor/tests/test_imgui_scopes_id_stability.py +++ /dev/null @@ -1,29 +0,0 @@ -import pytest -from unittest.mock import patch, MagicMock -from src.imgui_scopes import _ScopeId -import src.imgui_scopes as imgui_scopes - -def test_scope_id_string(): - with patch('src.imgui_scopes.imgui') as mock_imgui: - sid = _ScopeId("test_id") - with sid: - pass - mock_imgui.push_id.assert_called_once_with("test_id") - mock_imgui.pop_id.assert_called_once() - -def test_scope_id_int(): - with patch('src.imgui_scopes.imgui') as mock_imgui: - # Python type hint is str, but we test runtime resilience - sid = _ScopeId(1234) - with sid: - pass - # Verify it was converted to string to prevent low-level crashes - mock_imgui.push_id.assert_called_once_with("1234") - mock_imgui.pop_id.assert_called_once() - -def test_id_helper_function(): - with patch('src.imgui_scopes.imgui') as mock_imgui: - with imgui_scopes.id(42): - pass - mock_imgui.push_id.assert_called_once_with("42") - mock_imgui.pop_id.assert_called_once() diff --git a/conductor/tests/test_infrastructure.py b/conductor/tests/test_infrastructure.py deleted file mode 100644 index 9423d25d..00000000 --- a/conductor/tests/test_infrastructure.py +++ /dev/null @@ -1,60 +0,0 @@ -import subprocess -from unittest.mock import patch, MagicMock - -def run_ps_script(role: str, prompt: str) -> subprocess.CompletedProcess: - """Helper to run the run_subagent.ps1 script.""" - # Using -File is safer and handles arguments better - cmd = [ - "powershell", "-NoProfile", "-ExecutionPolicy", "Bypass", - "-File", "./scripts/run_subagent.ps1", - "-Role", role, - "-Prompt", prompt - ] - result = subprocess.run(cmd, capture_output=True, text=True) - if result.stdout: - print(f"\n[Sub-Agent {role} Output]:\n{result.stdout}") - if result.stderr: - print(f"\n[Sub-Agent {role} Error]:\n{result.stderr}") - return result - -@patch('subprocess.run') -def test_subagent_script_qa_live(mock_run) -> None: - """Verify that the QA role works and returns a compressed fix.""" - mock_run.return_value = MagicMock(returncode=0, stdout='Fix the division by zero error.', stderr='') - prompt = "Traceback (most recent call last): File 'test.py', line 1, in 1/0 ZeroDivisionError: division by zero" - result = run_ps_script("QA", prompt) - assert result.returncode == 0 - # Expected output should mention the fix for division by zero - assert "zero" in result.stdout.lower() - # It should be short (QA agents compress) - assert len(result.stdout.split()) < 40 - -@patch('subprocess.run') -def test_subagent_script_worker_live(mock_run) -> None: - """Verify that the Worker role works and returns code.""" - mock_run.return_value = MagicMock(returncode=0, stdout='def hello(): return "hello world"', stderr='') - prompt = "Write a python function that returns 'hello world'" - result = run_ps_script("Worker", prompt) - assert result.returncode == 0 - assert "def" in result.stdout.lower() - assert "hello" in result.stdout.lower() - -@patch('subprocess.run') -def test_subagent_script_utility_live(mock_run) -> None: - """Verify that the Utility role works.""" - mock_run.return_value = MagicMock(returncode=0, stdout='True', stderr='') - prompt = "Tell me 'True' if 1+1=2, otherwise 'False'" - result = run_ps_script("Utility", prompt) - assert result.returncode == 0 - assert "true" in result.stdout.lower() - -@patch('subprocess.run') -def test_subagent_isolation_live(mock_run) -> None: - """Verify that the sub-agent is stateless and does not see the parent's conversation context.""" - mock_run.return_value = MagicMock(returncode=0, stdout='UNKNOWN', stderr='') - # This prompt asks the sub-agent about a 'secret' mentioned only here, not in its prompt. - prompt = "What is the secret code I just told you? If I didn't tell you, say 'UNKNOWN'." - result = run_ps_script("Utility", prompt) - assert result.returncode == 0 - # A stateless agent should not know any previous context. - assert "unknown" in result.stdout.lower() diff --git a/conductor/tests/test_mma_exec.py b/conductor/tests/test_mma_exec.py deleted file mode 100644 index acb1b785..00000000 --- a/conductor/tests/test_mma_exec.py +++ /dev/null @@ -1,140 +0,0 @@ -import pytest -import os -from pathlib import Path -from unittest.mock import patch, MagicMock -from scripts.mma_exec import create_parser, get_role_documents, execute_agent, get_model_for_role, get_dependencies - -def test_parser_role_choices() -> None: - """Test that the parser accepts valid roles and the prompt argument.""" - parser = create_parser() - valid_roles = ['tier1', 'tier2', 'tier3', 'tier4'] - test_prompt = "Analyze the codebase for bottlenecks." - for role in valid_roles: - args = parser.parse_args(['--role', role, test_prompt]) - assert args.role == role - assert args.prompt == test_prompt - -def test_parser_invalid_role() -> None: - """Test that the parser rejects roles outside the specified choices.""" - parser = create_parser() - with pytest.raises(SystemExit): - parser.parse_args(['--role', 'tier5', 'Some prompt']) - -def test_parser_prompt_optional() -> None: - """Test that the prompt argument is optional if role is provided (or handled in main).""" - parser = create_parser() - # Prompt is now optional (nargs='?') - args = parser.parse_args(['--role', 'tier3']) - assert args.role == 'tier3' - assert args.prompt is None - -def test_parser_help() -> None: - """Test that the help flag works without raising errors (exits with 0).""" - parser = create_parser() - with pytest.raises(SystemExit) as excinfo: - parser.parse_args(['--help']) - assert excinfo.value.code == 0 - -def test_get_role_documents() -> None: - """Test that get_role_documents returns the correct documentation paths for each tier.""" - assert get_role_documents('tier1') == ['conductor/product.md', 'conductor/product-guidelines.md', 'docs/guide_architecture.md', 'docs/guide_mma.md'] - assert get_role_documents('tier2') == ['conductor/tech-stack.md', 'conductor/workflow.md', 'docs/guide_architecture.md', 'docs/guide_mma.md'] - assert get_role_documents('tier3') == ['docs/guide_architecture.md'] - assert get_role_documents('tier4') == ['docs/guide_architecture.md'] - -def test_get_model_for_role() -> None: - """Test that get_model_for_role returns the correct model for each role.""" - assert get_model_for_role('tier1-orchestrator') == 'gemini-3.1-pro-preview' - assert get_model_for_role('tier2-tech-lead') == 'gemini-3-flash-preview' - assert get_model_for_role('tier3-worker') == 'gemini-3-flash-preview' - assert get_model_for_role('tier4-qa') == 'gemini-2.5-flash-lite' - -def test_execute_agent() -> None: - """ - Test that execute_agent calls subprocess.run with powershell and the correct gemini CLI arguments - including the model specified for the role. - """ - role = "tier3-worker" - prompt = "Write a unit test." - docs = ["file1.py", "docs/spec.md"] - expected_model = "gemini-3-flash-preview" - mock_stdout = "Mocked AI Response" - with patch("subprocess.run") as mock_run: - mock_process = MagicMock() - mock_process.stdout = mock_stdout - mock_process.returncode = 0 - mock_run.return_value = mock_process - result = execute_agent(role, prompt, docs) - mock_run.assert_called_once() - args, kwargs = mock_run.call_args - cmd_list = args[0] - assert cmd_list[0] == "powershell.exe" - assert "-Command" in cmd_list - ps_cmd = cmd_list[cmd_list.index("-Command") + 1] - assert "gemini" in ps_cmd - assert f"--model {expected_model}" in ps_cmd - # Verify input contains the prompt and system directive - input_text = kwargs.get("input") - assert "STRICT SYSTEM DIRECTIVE" in input_text - assert "TASK: Write a unit test." in input_text - assert kwargs.get("capture_output") is True - assert kwargs.get("text") is True - assert result == mock_stdout - -def test_get_dependencies(tmp_path: Path) -> None: - content = ( - "import os\n" - "import sys\n" - "import file_cache\n" - "from mcp_client import something\n" - ) - filepath = tmp_path / "mock_script.py" - filepath.write_text(content) - dependencies = get_dependencies(str(filepath)) - assert dependencies == ['os', 'sys', 'file_cache', 'mcp_client'] - -import re - -def test_execute_agent_logging(tmp_path: Path) -> None: - log_file = tmp_path / "mma_delegation.log" - # mma_exec now uses logs/agents/ for individual logs and logs/mma_delegation.log for master - # We will patch LOG_FILE to point to our temp location - with patch("scripts.mma_exec.LOG_FILE", str(log_file)), \ - patch("subprocess.run") as mock_run: - mock_process = MagicMock() - mock_process.stdout = "" - mock_process.returncode = 0 - mock_run.return_value = mock_process - test_role = "tier1" - test_prompt = "Plan the next phase" - execute_agent(test_role, test_prompt, []) - assert log_file.exists() - log_content = log_file.read_text() - assert test_role in log_content - assert test_prompt in log_content # Master log should now have the summary prompt - assert re.search(r"\d{4}-\d{2}-\d{2}", log_content) - -def test_execute_agent_tier3_injection(tmp_path: Path) -> None: - main_content = "import dependency\n\ndef run():\n dependency.do_work()\n" - main_file = tmp_path / "main.py" - main_file.write_text(main_content) - dep_content = "def do_work():\n pass\n\ndef other_func():\n print('hello')\n" - dep_file = tmp_path / "dependency.py" - dep_file.write_text(dep_content) - # We need to ensure generate_skeleton is mockable or working - old_cwd = os.getcwd() - os.chdir(tmp_path) - try: - with patch("subprocess.run") as mock_run: - mock_process = MagicMock() - mock_process.stdout = "OK" - mock_process.returncode = 0 - mock_run.return_value = mock_process - execute_agent('tier3-worker', 'Modify main.py', ['main.py']) - assert mock_run.called - input_text = mock_run.call_args[1].get("input") - assert "DEPENDENCY SKELETON: dependency.py" in input_text - assert "def do_work():" in input_text - assert "Modify main.py" in input_text - finally: - os.chdir(old_cwd) diff --git a/conductor/tests/verify_phase_1.py b/conductor/tests/verify_phase_1.py deleted file mode 100644 index 74e1b623..00000000 --- a/conductor/tests/verify_phase_1.py +++ /dev/null @@ -1,40 +0,0 @@ -import sys -import os - -# Add src to path -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))) - -from src.history import HistoryManager - -def verify_phase_1(): - print("Verifying Phase 1: History Core Logic...") - hm = HistoryManager(max_capacity=10) - - # Test push - hm.push({"test": 1}, "initial") - if not hm.can_undo: - print("Error: can_undo should be true after push") - sys.exit(1) - - # Test undo - entry = hm.undo({"test": 2}, "current") - if entry.state != {"test": 1}: - print(f"Error: expected state {{'test': 1}}, got {entry.state}") - sys.exit(1) - if entry.description != "initial": - print(f"Error: expected description 'initial', got {entry.description}") - sys.exit(1) - - # Test redo - entry = hm.redo({"test": 1}, "back") - if entry.state != {"test": 2}: - print(f"Error: expected state {{'test': 2}}, got {entry.state}") - sys.exit(1) - if entry.description != "current": - print(f"Error: expected description 'current', got {entry.description}") - sys.exit(1) - - print("Phase 1 verification PASSED.") - -if __name__ == "__main__": - verify_phase_1() diff --git a/conductor/tests/verify_phase_2.py b/conductor/tests/verify_phase_2.py deleted file mode 100644 index 03189fbe..00000000 --- a/conductor/tests/verify_phase_2.py +++ /dev/null @@ -1,24 +0,0 @@ -import subprocess -import sys -import os - -def verify_phase_2(): - print("Verifying Phase 2: Text Input & Control Undo/Redo...") - - # Run the simulation test - result = subprocess.run( - ["uv", "run", "pytest", "tests/test_undo_redo_sim.py"], - capture_output=True, - text=True - ) - - if result.returncode == 0: - print("Phase 2 verification PASSED.") - else: - print("Phase 2 verification FAILED.") - print(result.stdout) - print(result.stderr) - sys.exit(1) - -if __name__ == "__main__": - verify_phase_2() diff --git a/conductor/tests/verify_phase_3.py b/conductor/tests/verify_phase_3.py deleted file mode 100644 index 01dbfb45..00000000 --- a/conductor/tests/verify_phase_3.py +++ /dev/null @@ -1,24 +0,0 @@ -import subprocess -import sys - -def verify_phase_3(): - print("Verifying Phase 3: GUI Menu Integration...") - - # We rely on the existing simulation test to verify the callback logic, - # which underpins the GUI menu integration. - result = subprocess.run( - ["uv", "run", "pytest", "tests/test_workspace_profiles_sim.py"], - capture_output=True, - text=True - ) - - if result.returncode == 0: - print("Phase 3 verification PASSED.") - else: - print("Phase 3 verification FAILED.") - print(result.stdout) - print(result.stderr) - sys.exit(1) - -if __name__ == "__main__": - verify_phase_3() diff --git a/conductor/tests/verify_phase_3_rag.py b/conductor/tests/verify_phase_3_rag.py deleted file mode 100644 index d5463499..00000000 --- a/conductor/tests/verify_phase_3_rag.py +++ /dev/null @@ -1,54 +0,0 @@ -import sys -import os -import time - -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src"))) - -from src import api_hook_client - -def verify_phase_3(): - print("[VERIFY] Starting Phase 3 Automated Verification...") - client = api_hook_client.ApiHookClient() - if not client.wait_for_server(timeout=10): - print("[VERIFY] ERROR: Hook server not reachable.") - sys.exit(1) - - try: - # Check RAG status - status = client.get_value("rag_status") - print(f"[VERIFY] Current RAG status: {status}") - - # Check if RAG settings are accessible - enabled = client.get_value("rag_enabled") - source = client.get_value("rag_source") - print(f"[VERIFY] RAG Enabled: {enabled}, Source: {source}") - - # Verify status transitions (indexing) - print("[VERIFY] Triggering index rebuild...") - client.click("btn_rebuild_rag_index") - - time.sleep(0.5) - status = client.get_value("rag_status") - print(f"[VERIFY] Status during indexing: {status}") - - # Wait for completion - max_wait = 10 - start = time.time() - while time.time() - start < max_wait: - status = client.get_value("rag_status") - if status == "ready": - print("[VERIFY] RAG reached 'ready' status.") - break - time.sleep(1) - else: - print(f"[VERIFY] WARNING: RAG status timeout. Final: {status}") - - print("[VERIFY] Phase 3 verification COMPLETED successfully.") - - except Exception as e: - print(f"[VERIFY] ERROR during verification: {e}") - sys.exit(1) - -if __name__ == "__main__": - verify_phase_3() diff --git a/conductor/tests/verify_phase_4.py b/conductor/tests/verify_phase_4.py deleted file mode 100644 index 8dcc7a34..00000000 --- a/conductor/tests/verify_phase_4.py +++ /dev/null @@ -1,23 +0,0 @@ -import subprocess -import sys -import os - -def verify_phase_4(): - print("Verifying Phase 4: Contextual Auto-Switch...") - - result = subprocess.run( - ["uv", "run", "pytest", "tests/test_auto_switch_sim.py"], - capture_output=True, - text=True - ) - - if result.returncode == 0: - print("Phase 4 verification PASSED.") - else: - print("Phase 4 verification FAILED.") - print(result.stdout) - print(result.stderr) - sys.exit(1) - -if __name__ == "__main__": - verify_phase_4() diff --git a/conductor/tier2/agents/tier2-autonomous.md b/conductor/tier2/agents/tier2-autonomous.md index 741b701a..efda6989 100644 --- a/conductor/tier2/agents/tier2-autonomous.md +++ b/conductor/tier2/agents/tier2-autonomous.md @@ -21,6 +21,8 @@ permission: "git reset*": deny --- +Note: You may use superpowers skills to assist you (brainstorming, recieving code reviews, writing plans, writting skills, dispatching parallel agents) + STRICT SYSTEM DIRECTIVE: You are a Tier 2 Tech Lead in AUTONOMOUS mode, running in the **META-TOOLING** domain (per `docs/guide_meta_boundary.md`). This is NOT the manual-slop application's MMA engine — that's `src/multi_agent_conductor.py` in the APPLICATION domain. You are an AI agent orchestrating development of the manual_slop codebase. ## MANDATORY: Domain Distinction (added 2026-06-27) diff --git a/conductor/tracks/enforcement_gap_closure_20260627/metadata.json b/conductor/tracks/enforcement_gap_closure_20260627/metadata.json new file mode 100644 index 00000000..5bae87f4 --- /dev/null +++ b/conductor/tracks/enforcement_gap_closure_20260627/metadata.json @@ -0,0 +1,109 @@ +{ + "track_id": "enforcement_gap_closure_20260627", + "name": "Enforcement Gap Closure (Boundary-Layer Audit + Optional[T] Audit Widening)", + "status": "active", + "branch": "master", + "created": "2026-06-27", + "owner": "Tier 1 (initialized); implementation delegated to Tier 2/3.", + "blocked_by": [], + "blocks": [], + "scope": { + "new_files": [ + "scripts/audit_boundary_layer.py", + "scripts/boundary_layer_allowlist.toml", + "scripts/audit_optional_returns.py (renamed from audit_optional_in_3_files.py)", + "scripts/audit_optional_returns.baseline.json", + "tests/test_audit_boundary_layer.py", + "tests/test_audit_optional_returns.py", + "docs/reports/TRACK_COMPLETION_enforcement_gap_closure_20260627.md" + ], + "modified_files": [ + "conductor/code_styleguides/python.md (sections 17.7, 17.8, inventory table 449-456)", + "conductor/code_styleguides/error_handling.md (cross-reference sweep only)", + "docs/AGENTS.md (cross-reference sweep only)", + "conductor/tracks.md (active-track row + status)", + "conductor/chronology.md (prepend shipment row)" + ], + "deleted_files": [ + "scripts/audit_optional_in_3_files.py (renamed to audit_optional_returns.py via git mv)" + ] + }, + "estimated_effort": { + "method": "scope (per workflow.md Tier 1 Track Initialization Rules. NO day estimates.)", + "phase_1": "4 tasks: 1 test file (10 tests) + 1 audit script + 1 allowlist TOML + green-phase verification", + "phase_2": "3 tasks: 1 test file (5 tests) + 1 rename/edit + 1 baseline JSON + green-phase verification", + "phase_3": "2 tasks: 1 styleguide inventory edit + 1 cross-reference sweep", + "phase_4": "4 tasks: 7-audit verification + 1 end-of-track report + 1 state update + user sign-off" + }, + "verification_criteria": [ + "G1: scripts/audit_boundary_layer.py exists + AST-scans all src/*.py + exits 1 in --strict on un-allowlisted dict[str, Any] sites", + "G2: scripts/boundary_layer_allowlist.toml exists + lists ~14 boundary files with reasons + --show-allowlist prints them", + "G3: scripts/audit_optional_returns.py exists (renamed from audit_optional_in_3_files.py) + scans all src/*.py + 3 history.py residuals baselined in audit_optional_returns.baseline.json (strict stays green)", + "G4: conductor/code_styleguides/python.md sections 17.7, 17.8, and inventory table reflect post-track reality (audit_boundary_layer implemented; audit_optional_returns implemented; audit_imports implemented)", + "G5: cross-reference sweep complete (no enforcement-instruction references to audit_optional_in_3_files.py; historical references preserved)", + "G6: tests/test_audit_boundary_layer.py has >=10 tests; all pass", + "G7: tests/test_audit_optional_returns.py has >=5 tests; all pass", + "G8: docs/reports/TRACK_COMPLETION_enforcement_gap_closure_20260627.md exists; documents contradiction closure (C1, C2, C3-partial, C18-partial, C21) and remaining (C5, C6, C16, C17 - deferred per user directive)", + "VC_pre_commit_parallel_safe": "ZERO file overlap with the running tier2/post_module_taxonomy_de_cruft_20260627 branch (verified by Tier 1 against ddcec7b0 + TRACK_COMPLETION file-level changes)" + ], + "regressions_and_pre_existing_failures": [], + "pre_existing_failures_remaining": [], + "deferred_to_followup_tracks": [ + { + "title": "Optional[T] return migration in src/history.py", + "description": "3 RETURN_OPTIONAL sites in src/history.py baselined by this track; cruft_elimination_20260627 Phase 6 owns the migration to Result[T] + NIL_T.", + "track_status": "planned in cruft_elimination_20260627" + }, + { + "title": "dict[str, Any] migration in hot_reloader.py + startup_profiler.py", + "description": "2 un-allowlisted boundary violations baselined by this track; a future track promotes them to typed dataclasses (HotReloadSnapshot, ProfilerSnapshot).", + "track_status": "not yet initialized" + }, + { + "title": "Main-repo pre-commit hook wiring", + "description": "The 5 audit scripts strict mode (weak_types, boundary_layer, optional_returns, exception_handling, imports) is not wired into the main repo's .git/hooks/. Per contradictions report C4.", + "track_status": "not yet initialized" + }, + { + "title": "Docs-count drift in docs/Readme.md (C7, C8, C9) + styleguide drift (C16 python.md s10, C17 type_aliases.md line 19) + RAGChunk.id in guides (C6)", + "description": "Deferred per user directive 2026-06-27 until tier2 branch stabilizes; these describe code state that exists post-merge of the taxonomy branches.", + "track_status": "deferred; will bundle into a docs-sync track post-merge" + } + ], + "risk_register": [ + { + "id": "R1", + "description": "audit_optional_returns.baseline.json format mismatch with audit_weak_types.baseline.json contract", + "likelihood": "medium", + "impact": "the renamed --strict mode behaves inconsistently with the existing baseline pattern", + "mitigation": "Tier 3 reads scripts/audit_weak_types.py + its baseline JSON before implementing; mirror the exact contract" + }, + { + "id": "R2", + "description": "Cross-file rename race if Tier 2 branch touches scripts/audit_optional_in_3_files.py in parallel", + "likelihood": "low", + "impact": "the git mv conflicts with Tier 2 work", + "mitigation": "Tier 1 verified post_module_taxonomy_de_cruft TRACK_COMPLETION does not touch audit_optional_*; only scripts/audit_no_models_config_io.py" + }, + { + "id": "R3", + "description": "Boundary allowlist under-classifies a genuine violation as boundary (false negative)", + "likelihood": "medium", + "impact": "the audit misses a real dict[str, Any] escape hatch that future LLMs reach for", + "mitigation": "Tier 1's spec 'Current State Audit' manually classified the 14 legitimate boundary files + 2 genuine violators; the audit starts from that classification. Reviewer (user) inspects boundary_layer_allowlist.toml before merge." + }, + { + "id": "R4", + "description": "Over-classification: audit flags a genuine boundary function as a violation (false positive)", + "likelihood": "low", + "impact": "strict mode is red on a real boundary file; either the allowlist is amended (correct fix) or the violation is suppressed (wrong fix, masks drift)", + "mitigation": "Per spec FR1, allowlisting is the explicit 'declare your boundary' mechanism; the reviewer audits the allowlist at merge time. The audit's `--no-allowlist` mode exposes every site so reviewers can spot-check classifications." + } + ], + "contradictions_report_cross_reference": { + "source": "docs/reports/CONTRADICTIONS_REPORT_20260627.md", + "closes": ["C1", "C2", "C3_partial", "C18_partial", "C21"], + "defers": ["C5", "C6", "C7", "C8", "C9", "C11", "C12", "C13", "C14", "C15", "C16", "C17", "C19", "C20"], + "rationale": "C1+C2+C21 are about the Optional audit name+scope (closed by Phase 2 rename+widen). C3-partial is 'audit_imports.py planned but exists' (closed by Phase 3 inventory correction). C18-partial is the audit count (closed by Phase 3). The 14 deferred items are docs-sync (C5-C9, C16, C17) or status drift (C11-C15, C19, C20) that per user directive 2026-06-27 wait for the tier2 taxonomy branch to stabilize before touching master's docs." + } +} \ No newline at end of file diff --git a/conductor/tracks/enforcement_gap_closure_20260627/plan.md b/conductor/tracks/enforcement_gap_closure_20260627/plan.md new file mode 100644 index 00000000..48b87e84 --- /dev/null +++ b/conductor/tracks/enforcement_gap_closure_20260627/plan.md @@ -0,0 +1,172 @@ +# Plan: Enforcement Gap Closure (Boundary-Layer Audit + Optional[T] Audit Widening) + +Track: `enforcement_gap_closure_20260627` +Branch: master (parallel-safe against `tier2/post_module_taxonomy_de_cruft_20260627`) +Spec: `conductor/tracks/enforcement_gap_closure_20260627/spec.md` + +This plan is read by a Tier 3 Worker (or Tier 2). All Python edits MUST use 1-space indentation. No comments in body. CRLF preserved via `manual-slop_edit_file` MCP tool (never native `edit`). + +**Audit-then-specify verification done by Tier 1:** All file:line references below were verified against master at `77b70226` on 2026-06-27. + +--- + +## Phase 1: Boundary-Layer Audit Script + +Focus: Implement `scripts/audit_boundary_layer.py` + `scripts/boundary_layer_allowlist.toml` + tests, mirroring the `audit_imports.py` + `audit_imports_whitelist.toml` contract. + +- [ ] Task 1.1: Write failing tests for `scripts/audit_boundary_layer.py` + - **WHERE:** `tests/test_audit_boundary_layer.py` (NEW file) + - **WHAT:** 10 tests per spec FR5 (finder detects `dict[str, Any]` in return / param / local; allowlist suppression + WHITELISTED annotation; `--strict` exit 1 on un-allowlisted; `--strict` exit 0 on allowlisted; `--json` shape; missing-file handling; syntax-error handling; `--show-allowlist`). + - **HOW:** Use `tmp_path` (or `tests/artifacts/` per workspace_paths.md — see workflow.md "Test Sandbox Hardening") to create a synthetic `src/` tree the audit can scan via a `--src` flag (mirror `audit_weak_types.py --src`). Each test creates 1-2 small .py files with the pattern under test, invokes the audit via `subprocess.run(["python", "scripts/audit_boundary_layer.py", "--src", str(tmp_src), ...])`, asserts on stdout + exit code. Tests MUST fail before the script exists (Red phase). + - **SAFETY:** No `live_gui` fixture (these are unit tests of a script). No `unittest.mock.patch` of core code. Use `monkeypatch.setenv` for the `--src` path or pass via argv. + - **COMMIT:** `test(audit): add 10 failing tests for boundary-layer audit` + - **GIT NOTE:** Red-phase tests for `scripts/audit_boundary_layer.py`; cover finder + allowlist + strict + json + error-handling per spec FR1 + FR5. + +- [ ] Task 1.2: Implement `scripts/audit_boundary_layer.py` + - **WHERE:** `scripts/audit_boundary_layer.py` (NEW file) + - **WHAT:** Implement the audit per spec FR1. The structure mirrors `scripts/audit_imports.py` (309 lines): module docstring → argparse → `audit_file(path) -> list[Finding]` → main loop over `sorted(Path(src).glob("*.py"))` → exit code logic. + - **HOW:** Reuse the `audit_optional_in_3_files.py` AST detector pattern (it already has `_annotation_is_optional_arg` — copy the analogous `_is_dict_str_any` helper). Detection contract (FR1): + 1. Walk each `ast.FunctionDef` / `AsyncFunctionDef`: + - If `node.returns` is `dict[str, Any]` (Subscript with value Name "dict"|"Dict" and slice Tuple `[Name "str", Name "Any"]`) → emit `RETURN_DICT_ANY`. + - For each arg in `args.args + kwonlyargs + posonlyargs`: if `arg.annotation` is `dict[str, Any]` → emit `PARAM_DICT_ANY`. + 2. Walk each `ast.AnnAssign` inside a function body: if `target.annotation` is `dict[str, Any]` → emit `LOCAL_ANNOT_DICT_ANY`. + 3. Allowlist: load `scripts/boundary_layer_allowlist.toml` (use `tomllib.load`); for any file whose relative path is a key, suppress all findings for that file and emit a single `WHITELISTED` finding per file (matches `audit_imports.py` precedent). + 4. CLI flags: `--strict`, `--json`, `--show-allowlist`, `--no-allowlist`, `--src ` (default `"src"`). + 5. Default mode: print summary table (file, sites, allowlisted) + a list of violations; exit 0. + 6. `--strict`: same + exit 1 if there are un-allowlisted `RETURN_DICT_ANY` / `PARAM_DICT_ANY` / `LOCAL_ANNOT_DICT_ANY` findings. + 7. `--json`: print JSON `{files_scanned, files_with_findings, total_findings, by_kind, findings}` and exit 0. + 8. `--show-allowlist`: print the TOML contents + reasons; exit 0. + 9. `--no-allowlist`: do not read the TOML; audit all sites. + - **SAFETY:** Pure stdlib (`ast`, `argparse`, `json`, `sys`, `pathlib.Path`, `tomllib`). No subprocess to `src/` files. + - **COMMIT:** `feat(audit): implement audit_boundary_layer.py per FR1` + - **GIT NOTE:** Implements the §17.7 boundary-layer audit; mirrors audit_imports.py contract; allowlist-driven per-file suppression. + +- [ ] Task 1.3: Write `scripts/boundary_layer_allowlist.toml` + - **WHERE:** `scripts/boundary_layer_allowlist.toml` (NEW file) + - **WHAT:** Initial allowlist with the ~14 legitimate boundary files from spec "Current State Audit": `context_presets.py`, `events.py`, `openai_compatible.py`, `theme_models.py`, `log_registry.py`, `presets.py`, `tool_presets.py`, `personas.py`, `workspace_manager.py`, `paths.py`, `gemini_cli_adapter.py`, `mcp_client.py`, `type_aliases.py`, `session_logger.py`. + - **HOW:** Mirror `audit_imports_whitelist.toml` format: + - Header comment block (purpose + format). + - "Last reviewed: 2026-06-27" + - One `[allowlist.""]` entry per file with `reason = "..."` documenting why it's at the wire boundary (the reasons are documented in spec "Current State Audit" — e.g., context_presets = "project_dict is the wire TOML"; events.to_dict = "wire serialization for WS protocol"; etc.). + - **SAFETY:** Pure TOML; no code. + - **COMMIT:** `feat(audit): seed boundary_layer_allowlist.toml with 14 boundary files` + - **GIT NOTE:** Allowlist seeds the §17.7 legitimate boundary; per audit_imports_whitelist.toml precedent. + +- [ ] Task 1.4: Run tests for Phase 1 (Green phase) + - **WHAT:** Execute `uv run pytest tests/test_audit_boundary_layer.py -v` (batched-runner convention can also be used: `uv run python scripts/run_tests_batched.py --filter test_audit_boundary_layer`). All 10 tests must pass. If any fail, debug (≤2 retries per workflow.md "Deduction Loop" rule), then STOP and report if still failing. + - **COMMIT:** `conductor(state): mark Phase 1 task 1.4 verification` (or skip the commit if no code changes; just verify). + - **GIT NOTE:** Green-phase verification for boundary-layer audit + allowlist. + +--- + +## Phase 2: Optional[T] Audit Rename + Widening + +Focus: Rename `audit_optional_in_3_files.py` → `audit_optional_returns.py`, widen from 4 files to all `src/*.py`, baseline the 3 `history.py` residuals. + +- [ ] Task 2.1: Write failing tests for the renamed + widened audit + - **WHERE:** `tests/test_audit_optional_returns.py` (NEW file) + - **WHAT:** 5 tests per spec FR5: test_renamed_script_exists, test_scans_all_src_files, test_baseline_reading_keeps_strict_green, test_strict_exits_1_above_baseline, test_param_optional_is_warning_not_strict. + - **HOW:** For test_scans_all_src_files, use `monkeypatch` + `--src ` flag (the script may need a `--src` flag added in Task 2.2 if it doesn't already have one — current `audit_optional_in_3_files.py` hardcodes the 4-file path; Task 2.2 adds `--src`). Tests must fail against the OLD script (which still hardcodes 4 files). + - **SAFETY:** No `live_gui`. No core mocking. + - **COMMIT:** `test(audit): add 5 failing tests for audit_optional_returns widening` + - **GIT NOTE:** Red-phase tests for the rename + widening to all src/*.py per spec FR3 + FR5. + +- [ ] Task 2.2: Rename + widen `audit_optional_in_3_files.py` → `audit_optional_returns.py` + - **WHERE:** `git mv scripts/audit_optional_in_3_files.py scripts/audit_optional_returns.py` then edit the new file. + - **WHAT:** Per spec FR3: + 1. `git mv` the file (preserves history). + 2. Edit `scripts/audit_optional_returns.py`: + - Module docstring: drop "4 baseline files"; say "all `src/*.py` per §17 post-2026-06-27 widening (the successor to `audit_optional_in_3_files.py`, which was renamed + widened on 2026-06-27)." + - Replace `BASELINE_FILES: tuple[str, ...] = (...)` with `def _discover_src_files(src_dir: str = "src") -> list[Path]: return sorted(Path(src_dir).glob("*.py"))`. + - Update `main()` to iterate `_discover_src_files(args.src)` instead of the hardcoded tuple. + - Add `--src ` arg (default `"src"`) mirroring `audit_weak_types.py`. + - Update `--json` output's `"files_scanned"` field to reflect the glob count. + 3. Create `scripts/audit_optional_returns.baseline.json` recording the 3 `src/history.py` `RETURN_OPTIONAL` findings so `--strict` exits 0 on master (findings ≤ baseline). Format: same as `audit_weak_types.baseline.json` (a JSON object with a count or a list of `{file, line, function, kind}` entries that strict mode subtracts). The strict-mode logic: load baseline; subtract baseline findings from current findings; exit 1 if residuals > 0. (Mirror `audit_weak_types.py`'s `--strict` + baseline contract — read its source to confirm the exact subtraction mechanism.) + - **SAFETY:** No `src/` edits. No tests/ edits except the new test file from Task 2.1. + - **COMMIT:** `refactor(audit): rename audit_optional_in_3_files.py -> audit_optional_returns.py; widen to all src/*.py; baseline 3 history.py residuals` + - **GIT NOTE:** Closes contradictions C1+C21 (script name) + C2 (Optional ban scope ambiguity); script name + scope + baseline now honest per §17 post-2026-06-27. + +- [ ] Task 2.3: Run tests for Phase 2 (Green phase) + - **WHAT:** `uv run pytest tests/test_audit_optional_returns.py -v`. All 5 tests must pass. If failures, ≤2 debug retries; then STOP. + - **VERIFY:** Also run the existing audit_optional tests (if any reference the old name, update them — likely there are no callers other than `code_path_audit_20260607`'s historical references which don't run). + - **COMMIT:** `conductor(state): mark Phase 2 task 2.3 verification` (or skip if no code changes). + - **GIT NOTE:** Green-phase verification for the rename + widening. + +--- + +## Phase 3: Styleguide Doc Reconciliation + +Focus: Fix `python.md` §17 enforcement inventory + §17.8 section to match post-track reality. Close contradictions C3, C18 (audit_imports exists), C1+C21 (script renamed), C2 (scope clarified), C5 (Result notation — only if no branch-sensitivity; per spec OOS, this is C5 which is deferred — confirm during this phase). + +- [ ] Task 3.1: Fix `python.md` §17 inventory table (lines 449-456) + §17.8 enforcement section (lines 357-362) + - **WHERE:** `conductor/code_styleguides/python.md` + - **WHAT:** Per spec FR4: + 1. Inventory table (lines 449-456): update the rows: + - `dict[str, Any]` ban: ADD a row for `scripts/audit_boundary_layer.py --strict` (implemented this track; reads `boundary_layer_allowlist.toml`; `--no-allowlist` audits all). KEEP the existing `audit_weak_types.py --strict` row (they catch overlapping but distinct shapes — weak_types catches `Any` in any position; boundary_layer specifically targets `dict[str, Any]` in *signatures* outside the allowlisted boundary). + - `Optional[T]` returns: change the row from "audit_optional_in_3_files.py covering 4 baseline files" to "audit_optional_returns.py --strict covering all src/*.py; reads audit_optional_returns.baseline.json for the 3 history.py residuals until cruft_elimination Phase 6". Mark "✅ implemented". + - Local imports + `_PREFIX` aliasing + repeated `.from_dict()`: change `audit_imports.py` row to "✅ implemented" (was "⚠️ not yet built" — wrong; the script exists at `scripts/audit_imports.py`). + - Repeated `.from_dict()`: drop "(no script planned; relies on Tier 2 review)" — covered by `audit_imports.py`. + 2. §17.8 enforcement section (lines 357-362): rewrite the bullets per spec FR4: + - Bullet for `audit_optional_returns.py` → reflects rename + all-src scope. + - Bullet for `audit_imports.py` → drop the "(planned per §17.9a)" parenthetical; mark as implemented. + - Bullet for `audit_boundary_layer.py --strict` → replace the "boundary_layer audit (planned...)" bullet; describe the script + allowlist + `--no-allowlist` flag. + - The "Pre-commit: every commit MUST pass all four audits above" line → "five audits above" (weak_types, boundary_layer, optional_returns, exception_handling, imports). + - **HOW:** Use `manual-slop_edit_file` MCP tool. Verify exact line ranges via `manual-slop_get_file_slice` before editing (the line numbers above are approximate; the actual edit replaces a contiguous block). Preserve CRLF. + - **SAFETY:** Pure doc edit. No code. No `src/` changes. No tests changes. + - **COMMIT:** `docs(python.md): reconcile §17 inventory + §17.8 with post-track reality` + - **GIT NOTE:** Closes C3 (audit_imports.py was "planned" but exists), C18 (audit count), C1+C21 reflected in doc; C2 scope clarified. + +- [ ] Task 3.2: Cross-reference sweep for `audit_optional_in_3_files.py` references + - **WHAT:** Use `manual-slop_py_find_usages` / `rg` to find ALL references to the old script name across `conductor/` and `docs/`. Per the spec, references likely exist in `error_handling.md:885` + `docs/AGENTS.md §"Convention Enforcement"`. For each reference: + - If it's a historical/cross-reference note (e.g., "was `audit_optional_in_3_files.py`"), leave it. + - If it's an enforcement-instruction reference (e.g., "run `uv run python scripts/audit_optional_in_3_files.py --strict`"), update to `audit_optional_returns.py`. + - **COMMIT:** `docs: update audit_optional_in_3_files.py references to audit_optional_returns.py` + - **GIT NOTE:** Historical references preserved (the rename history is documented in python.md:359); enforcement instructions updated. + +--- + +## Phase 4: End-of-Track Report + State Update + +- [ ] Task 4.1: Run the full 7-audit strict suite (gate verification) + - **WHAT:** Execute all 7 audit scripts (now including the 2 new ones this track ships) in `--strict` mode: + ``` + uv run python scripts/audit_weak_types.py --strict + uv run python scripts/audit_boundary_layer.py --strict + uv run python scripts/audit_optional_returns.py --strict + uv run python scripts/audit_exception_handling.py --strict + uv run python scripts/audit_imports.py --strict + uv run python scripts/audit_main_thread_imports.py + uv run python scripts/audit_no_models_config_io.py + ``` + Expected: all pass (the boundary audit's 2 residuals `hot_reloader.py` + `startup_profiler.py` MUST be in the baseline JSON or the allowlist — verify before this step). The Optional audit's 3 `history.py` residuals are in `audit_optional_returns.baseline.json` (created in Phase 2). + - **VERIFY:** If any audit fails, fix the baseline OR the allowlist. Do NOT mask a real violation; document the residual in the end-of-track report instead. + - **COMMIT:** `test(audit): verify all 7 audit gates pass --strict post-track` + - **GIT NOTE:** The 7-audit strict suite green; the 2 boundary + 3 Optional residuals baselined per spec. + +- [ ] Task 4.2: Write end-of-track report + - **WHERE:** `docs/reports/TRACK_COMPLETION_enforcement_gap_closure_20260627.md` (NEW file) + - **WHAT:** Report following the precedent of `TRACK_COMPLETION_post_module_taxonomy_de_cruft_20260627.md`: + - TL;DR + - Phase summary (each phase + commits + status) + - Verification Criteria status (mapped to spec G1-G8) + - File-level changes (new + modified + renamed + new test files) + - Commits log (atomic, ordered) + - Audit gate status (all 7) + - Contradictions closed (C1, C2, C3-partial, C18-partial, C21) and remaining (C5, C6, C16, C17 — deferred per user directive; cite spec OOS) + - Known residuals: 2 boundary (`hot_reloader.py`, `startup_profiler.py`) + 3 Optional (`src/history.py`); these are baselined + owned by future tracks + - Next steps for the user (review + the recommended follow-up track) + - **COMMIT:** `docs(reports): TRACK_COMPLETION_enforcement_gap_closure_20260627` + - **GIT NOTE:** End-of-track report; documents contradiction closure + residual baselines. + +- [ ] Task 4.3: Update `conductor/tracks.md` + `conductor/chronology.md` + `conductor/tracks/enforcement_gap_closure_20260627/state.toml` + - **WHAT:** + 1. `state.toml`: mark all phases "completed" with their checkpoint SHA; set `status = "completed"` + `current_phase = "complete"`. + 2. `conductor/tracks.md`: add a row to the Active Tracks table for this track (status "shipped"); or per the convention of recent tracks, the row is added when the track is initiated and the status updated when shipped. + 3. `conductor/chronology.md`: prepend a row for `2026-06-27 | enforcement_gap_closure_20260627 | shipped | summary...` at the top of the table. + - **COMMIT:** `conductor(state): enforcement_gap_closure_20260627 SHIPPED + TRACK_COMPLETION` + - **GIT NOTE:** Track state + chronology + tracks.md closed out. + +- [ ] Task 4.4: Conductor - User Manual Verification (Protocol in workflow.md) + - **WHAT:** Per the workflow.md "Phase Completion Verification and Checkpointing Protocol", present the results to the user for confirmation. Present: the 7-audit strict pass result, the test count, the contradictions closed, and the residual baselines. PAUSE for user sign-off. + - **COMMIT:** (no commit; this is the user-confirmation gate) + - **GIT NOTE:** User sign-off record. \ No newline at end of file diff --git a/conductor/tracks/enforcement_gap_closure_20260627/spec.md b/conductor/tracks/enforcement_gap_closure_20260627/spec.md new file mode 100644 index 00000000..7b0b8f57 --- /dev/null +++ b/conductor/tracks/enforcement_gap_closure_20260627/spec.md @@ -0,0 +1,433 @@ +# Track Specification: Enforcement Gap Closure (Boundary-Layer Audit + Optional[T] Audit Widening) + +## Overview + +Close the two genuine enforcement gaps in the 7-banned-pattern mandate documented in +`conductor/code_styleguides/python.md` §17 (the LLM Default Anti-Patterns): + +1. **The boundary-layer audit** — the script that enforces "no `dict[str, Any]` + outside the 2-3 wire-parse functions per file" (`python.md` §17.7). Currently + marked "⚠️ not yet built" in the §17 enforcement inventory (`python.md:454`), + though the cruft_elimination_20260627 Phase 10 only produced a *report* + (`docs/reports/boundary_layer_20260628.md`) — never the *audit script*. This + is the one that prevents the next LLM from reaching for `dict[str, Any]` in + `app_controller.py` again. + +2. **The `audit_optional_in_3_files.py` rename + widening** — the script + currently named `audit_optional_in_3_files.py` actually checks 4 files + (the contradictions report C1+C21) and only enforces the `Optional[T]` ban + on those 4 baseline files. `python.md:359` already references a successor + `audit_optional_returns.py` (claimed "✅ implemented" in the inventory at + `python.md:452`) but the rename never happened and the script never widened + to all `src/*.py`. This track lands reality on both the script and the doc. + +Both pieces are parallel-safe against the running `post_module_taxonomy_de_cruft_20260627` +Tier 2 work: this track touches only `scripts/audit_*`, `scripts/*.toml` (allowlists), +`conductor/code_styleguides/python.md` (the inventory table), and new `tests/test_*` +files. Zero overlap with `src/models.py`, `tests/test_models*`, `src/api_hooks.py`, +`scripts/audit_no_models_config_io.py`, or anything else Tier 2 is modifying. + +## Current State Audit (as of master `77b70226`, branch `tier2/post_module_taxonomy_de_cruft_20260627` `ddcec7b0`) + +### Already Implemented (DO NOT re-implement) + +- `scripts/audit_weak_types.py` (388 lines) — flags `dict[str, Any]`, `Any`, + anonymous tuple returns; informational default + `--strict` CI gate; reads + `scripts/audit_weak_types.baseline.json`. **Implemented, working.** Covers + §17.1 (`dict[str, Any]` / `Any` ban) and §17.2 (anonymous tuples) globally. + +- `scripts/audit_exception_handling.py` (~500 lines) — classifies + `try/except/finally/raise` sites into 10 categories; informational default + + `--strict` CI gate. **Implemented, working.** Covers §17.3 (silent swallow / + broad catch) globally. + +- `scripts/audit_imports.py` (309 lines) — flags local imports (§17.9a), + `_PREFIX` aliasing (§17.9b), and repeated `.from_dict()` (§17.9c); + informational default + `--strict` CI gate; reads + `scripts/audit_imports_whitelist.toml` for vendor-SDK-warmup + hot-reload + per-file exemptions. **Implemented, working** (despite `python.md:455-456` + marking it "not yet built" — a doc drift this track fixes). Covers §17.9 + fully. + +- `scripts/audit_imports_whitelist.toml` (81 lines) — per-file whitelist with + `reason` field + "Last reviewed" header. **The precedent template** for the + new `boundary_layer_allowlist.toml` this track creates. + +- `scripts/audit_optional_in_3_files.py` (122 lines) — AST-scans 4 files + (`src/mcp_client.py`, `src/ai_client.py`, `src/rag_engine.py`, + `src/code_path_audit.py`); the `BASELINE_FILES` tuple at line 17-22 is the + only thing pinning it to those files; the audit logic is generic + (`_return_annotation_is_optional`, `_annotation_is_optional_arg`, + `audit_file`). **Implementation 100% reusable; only the file glob + + name + docs need to change.** + +### Gaps to Fill (This Track's Scope) + +- **GAP-1: No boundary-layer audit script exists.** `python.md:454` and + `python.md:361` mark it "planned / not yet built". The + `cruft_elimination_20260627` spec describes it at FR1 §72 ("Boundary Layer + is EXACTLY 2 places") and G14 ("boundary layer is documented as exactly 2 + places") but only ever delivered a *report* (`boundary_layer_20260628.md`), + never a *static audit*. Without this, the §17.7 contract ("2-3 boundary + functions per file, everything else must be typed") is policy-without-teeth. + +- **GAP-2: `audit_optional_in_3_files.py` name lies + scope is too narrow.** + - It actually checks 4 files (mcp_client, ai_client, rag_engine, + code_path_audit) but is named "_3_files". + - It only covers those 4 baseline files. The §17 mandate requires + `Optional[T]` return-types banned in *all* `src/*.py`. + - `python.md:359` + `python.md:452` already promise an + `audit_optional_returns.py` "covering all `src/*.py`" — but no such + script exists. The doc claims reality that the code doesn't match. + +- **GAP-3: `python.md` §17 inventory table is internally inconsistent.** + Lines 451-456 mark `audit_imports.py` as "not yet built" (false — it exists) + and `audit_optional_returns.py` as "implemented" (false — it doesn't exist; + only the `audit_optional_in_3_files.py` does). This track corrects both rows + to match post-track reality. + +### Verified `dict[str, Any]` Distribution on master (the blast-radius for GAP-1) + +Per the audit-style AST scan I ran on master at `77b70226` (full scan of all +`src/*.py`): + +| File | ret sites | param sites | has `from_dict` | calls tomllib/json.loads | +|------|-----------|-------------|------------------|--------------------------| +| src/theme_models.py | 2 | 2 | yes | yes | +| src/context_presets.py | 0 | 3 | no | no | +| src/log_registry.py | 2 | 1 | yes | yes | +| src/hot_reloader.py | 1 | 1 | no | no | +| src/mcp_client.py | 0 | 2 | yes | yes | +| src/personas.py | 1 | 1 | yes | yes | +| src/presets.py | 1 | 1 | no | yes | +| src/tool_presets.py | 1 | 1 | yes | yes | +| src/type_aliases.py | 1 | 1 | yes | no | +| src/workspace_manager.py | 1 | 1 | yes | yes | +| src/events.py | 1 | 0 | no | no | +| src/gemini_cli_adapter.py | 1 | 0 | no | yes | +| src/openai_compatible.py | 1 | 0 | no | no | +| src/paths.py | 1 | 0 | no | yes | +| src/session_logger.py | 0 | 1 | no | no | +| src/startup_profiler.py | 1 | 0 | no | no | +| ... 50 other `src/*.py` | 0 | 0 | (varies) | (varies) | + +Totals: **12 `dict[str, Any]` returns + 16 params across 16 files**; ~50 other +files have zero `dict[str, Any]` in signatures. + +Per-file manual classification (the same kind of classification the +`audit_imports_whitelist.toml` makes for hot-reload files): + +- **LEGITIMATE BOUNDARY** (audit must allow): `context_presets.py` + (`load_all/save_preset/delete_preset(project_dict: Dict[str, Any])` — + `project_dict` IS the wire TOML), `events.py` `to_dict()` (wire + serialization for the WS protocol), `openai_compatible.py` + `_to_dict_tool_call(tc: ToolCall) -> dict[str, Any]` (converts typed + `ToolCall` to vendor wire dict), `theme_models.py` (the schema is the wire + for `.ini` rendering), `log_registry.py` (JSON-L log shape), `presets.py`, + `tool_presets.py`, `personas.py`, `workspace_manager.py`, `paths.py`, + `gemini_cli_adapter.py`, `mcp_client.py` (the MCP wire-protocol parsers), + `type_aliases.py` (`from_dict(raw: dict[str, Any])` classmethods — the + literal definition of boundary), `session_logger.py` (writes JSONL). +- **GENUINE VIOLATIONS** (audit should flag, baseline captures them so + strict stays green until a migration track fixes): `hot_reloader.py` + (`capture_state`/`restore_state(app, ...) -> dict[str, Any]` — internal + state, could be a `HotReloadSnapshot` dataclass), `startup_profiler.py` + (`snapshot() -> dict[str, Any]` — could be a `ProfilerSnapshot` dataclass). + +So the audit must: +1. Find every `dict[str, Any]` in function signatures (param + return + + annotated assignment) in every `src/*.py`. +2. For each site, check whether its enclosing function is allowlisted in + `scripts/boundary_layer_allowlist.toml` (per-file + per-function entries + with a `reason` field, mirroring the `audit_imports_whitelist.toml` + contract). +3. Exit 1 in `--strict` mode on any *un*-allowlisted site. +4. Emit a `WHITELISTED` annotation per allowlisted file so the user sees the + audit considered it (mirrors the `audit_imports.py` precedent). +5. Ship an initial `boundary_layer_allowlist.toml` listing the ~14 legitimate + boundary files identified above, each with a `reason` field documenting + why it's at the wire. + +### Verified `Optional[T]` Return-Type Distribution on master (the blast-radius for GAP-2) + +Same AST scan, but counting `Optional[X]` return annotations: +- **Total `RETURN_OPTIONAL` violations: 3, in 1 file** (`src/history.py`) +- **Total `PARAM_OPTIONAL` (warning only, never blocks strict): 119 across many files** + — these are legal per `error_handling.md` ("argument types that may be + `None` describe a caller choice, not a runtime failure"). + +So widening the audit from 4 files → all `src/*.py` surfaces **3 new strict +violations** in `src/history.py`. The existing `audit_optional_in_3_files.py` +already covers the 4 baseline files (all clean). This track adds the 3 +`history.py` sites to a new `audit_optional_returns.baseline.json` so the +widened strict gate stays green until cruft_elimination Phase 6 (which owns +those 3 sites) actually migrates them. The 3 sites are documented in the +allowlist; they are NOT fixed by this track (out of scope; the fix belongs to +the cruft_elimination Phase 6 Optional[T]-migration work). + +## Goals + +- **G1.** A working `scripts/audit_boundary_layer.py` that AST-scans all + `src/*.py` for `dict[str, Any]` in function signatures (params, returns, + annotated locals) and exits 1 in `--strict` mode on any un-allowlisted site. + +- **G2.** A working `scripts/boundary_layer_allowlist.toml` that declares the + legitimate boundary functions per file, each with a `reason` field, modeled + on `audit_imports_whitelist.toml` (with `--show-allowlist` and + `--no-allowlist` flags mirroring the imports whitelist precedent). + +- **G3.** `audit_optional_in_3_files.py` renamed to + `audit_optional_returns.py`, `BASELINE_FILES` replaced with a `src/*.py` + glob, docstrings updated to drop the "3 files" fiction. The 3 `history.py` + violations baselined in `audit_optional_returns.baseline.json` so strict + stays green. Existing strict callers (`code_path_audit_20260607` referenced + the old name — update or alias accordingly). + +- **G4.** `python.md` §17 enforcement inventory (lines 449-456) corrected to + match post-track reality: `audit_boundary_layer.py` implemented, the renamed + `audit_optional_returns.py` "scans all `src/*.py`", `audit_imports.py` + marked implemented (it already is), and the inventory's "Pre-commit: every + commit MUST pass all four audits" line updated to "five audits" (or + whatever the actual post-track count is). + +- **G5.** `conductor/code_styleguides/error_handling.md` and + `conductor/code_styleguides/python.md` references to the renamed script + updated (any line saying `audit_optional_in_3_files.py` -> + `audit_optional_returns.py`, except the one legacy cross-reference note + in `python.md:359` documenting the rename history). + +- **G6.** New tests in `tests/test_audit_boundary_layer.py` (≥10 tests: + finder detects `dict[str, Any]` in return / param / local annotation; + allowlist suppresses findings + emits WHITELISTED; `--strict` exits 1 on + un-allowlisted site, exits 0 on allowlisted; `--json` output shape; missing + file handling; syntax error handling). + +- **G7.** New/updated tests in `tests/test_audit_optional_returns.py` + (or update existing test file if one references the old name): ≥5 tests + confirming the widened scope, the rename, baseline reading, and + `--strict` behavior. + +- **G8.** End-of-track report at + `docs/reports/TRACK_COMPLETION_enforcement_gap_closure_20260627.md` + documenting what shipped + the residual violation baselines + any + contradictions from `CONTRADICTIONS_REPORT_20260627.md` closed (C1, C2, + C3-partial, C18-partial, C21) and which remain (C5, C6, C16, C17 — those + are docs-sync items deferred until tier2 stabilizes, per user directive + 2026-06-27). + +## Functional Requirements + +### FR1: `scripts/audit_boundary_layer.py` + +- **CLI contract** mirrors `audit_exception_handling.py` + `audit_imports.py`: + - `uv run python scripts/audit_boundary_layer.py` — informational (exits 0) + - `uv run python scripts/audit_boundary_layer.py --strict` — exits 1 on + any un-allowlisted `dict[str, Any]` signature site + - `uv run python scripts/audit_boundary_layer.py --json` — JSON output + - `uv run python scripts/audit_boundary_layer.py --show-allowlist` — + prints the current allowlist + reasons, exits 0 + - `uv run python scripts/audit_boundary_layer.py --no-allowlist` — + audits all sites regardless of allowlist (for one-off audits) +- **Detection contract** — finds `dict[str, Any]` in: + - function return annotations (`def f(...) -> dict[str, Any]`) + - function parameter annotations (`def f(x: dict[str, Any])`) + - annotated assignments to locals at function scope + (`acc: dict[str, dict[str, Any]] = {}` — common pattern in vendor adapters) +- **Allowlist contract** — reads `scripts/boundary_layer_allowlist.toml`. + Per-file entries: `[allowlist.""] reason = "..."`. Within + an allowlisted file, ALL `dict[str, Any]` sites are suppressed with a + single `WHITELISTED` annotation per file (mirrors `audit_imports.py` + precedent; per-line entries would be brittle because the same file has + multiple boundary functions). Use `--no-allowlist` to ignore the allowlist. +- **Coverage:** all `src/*.py`. The audit does NOT traverse `tests/`, + `scripts/`, `simulation/` — those aren't subject to §17.7. +- **Defaults:** informational mode prints a summary table (file, sites, + allowlisted?) + a list of violations. `--strict` prints the same and + exits 1 if there are un-allowlisted sites. +- **Source:** 1-space indent, no comments in body, type-hinted, docstrings + where the contract is non-obvious. Module docstring explains the §17.7 + contract + the allowlist pattern. + +### FR2: `scripts/boundary_layer_allowlist.toml` + +- TOML file modeled on `audit_imports_whitelist.toml`: + - Header comment block explaining the purpose + the format. + - "Last reviewed: 2026-06-27" + - `[allowlist.""]` entries for each legitimate boundary + file with a `reason` field documenting why it's at the wire boundary. +- **Initial contents:** the ~14 legitimate boundary files identified in the + Current State Audit (`context_presets.py`, `events.py`, + `openai_compatible.py`, `theme_models.py`, `log_registry.py`, `presets.py`, + `tool_presets.py`, `personas.py`, `workspace_manager.py`, `paths.py`, + `gemini_cli_adapter.py`, `mcp_client.py`, `type_aliases.py`, + `session_logger.py`). The two genuine violators (`hot_reloader.py`, + `startup_profiler.py`) are NOT in the allowlist — the audit will flag them + on master, but `audit_boundary_layer.baseline.json` will record them so + `--strict` stays green until a future track migrates them. + +### FR3: Rename + widen `audit_optional_in_3_files.py` → `audit_optional_returns.py` + +- **Rename:** `git mv scripts/audit_optional_in_3_files.py + scripts/audit_optional_returns.py` (preserves git history). +- **Code changes:** + - Module docstring: drop "4 baseline files"; say "all `src/*.py` per + §17 post-2026-06-27 widening". + - `BASELINE_FILES: tuple[str, ...] = (...)` → `def _discover_src_files() -> + list[Path]: return sorted(Path("src").glob("*.py"))` (the precedent is + `audit_exception_handling.py`'s glob approach). + - `audit_file()` is already generic — no logic change. + - Output: the summary line says "scanned N files" with N = the count. +- **Baseline file:** create `scripts/audit_optional_returns.baseline.json` + recording the 3 `src/history.py` `RETURN_OPTIONAL` violations so + `--strict` stays green. The strict-mode behavior: exit 1 if findings > + baseline, exit 0 otherwise. (Mirrors `audit_weak_types.py`'s baseline + + `--strict` contract — see `audit_weak_types.baseline.json`.) +- **Backward-compat:** The old name `audit_optional_in_3_files.py` is gone. + Any external references to the old name must be updated. (Per the + pre-flight grep, references exist in `python.md:359`, `python.md:452`, + and possibly `error_handling.md` — those are doc edits in G5. The + `code_path_audit_20260607` track's plan referenced the old name as a + cross-reference contract — that's historical; not updated.) + +### FR4: `python.md` §17 enforcement inventory + §17.8 enforcement section + +- **§17 inventory table (lines 449-456)** corrected: + - Row for `dict[str, Any]` ban: `audit_weak_types.py` (implemented) + + `audit_boundary_layer.py --strict` (implemented this track) — BOTH + listed, with the boundary audit's note: "uses + `scripts/boundary_layer_allowlist.toml`; use `--no-allowlist` to audit + all `src/*.py` without suppression." + - Row for `Optional[T]` returns: `audit_optional_returns.py` (renamed + + widened to all `src/*.py` this track; reads + `audit_optional_returns.baseline.json` for the 3 `history.py` residuals + until cruft_elimination Phase 6). + - Row for local imports + aliasing + repeated `from_dict()`: + `audit_imports.py` — marked "✅ implemented" (CORRECTED from current + "⚠️ not yet built"). + - Row for repeated `.from_dict()`: same as above (covered by + `audit_imports.py`). +- **§17.8 enforcement section (lines 357-362)** updated: + - Bullet for `audit_optional_returns.py` → reflects rename + widening. + - Bullet for `audit_imports.py` → marked implemented (drop the parenthetical + "planned in §17.9a"). + - Bullet for "boundary_layer audit (planned...)" → replaced with bullet + for `audit_boundary_layer.py --strict` (implemented, references + `boundary_layer_allowlist.toml`). + - The "Pre-commit: every commit MUST pass all four audits above" line → + "five audits" (weak_types, boundary_layer, optional_returns, + exception_handling, imports). + +### FR5: Test files + +- **`tests/test_audit_boundary_layer.py`** (NEW) — ≥10 tests: + - `test_finder_detects_dict_return_annotation` — synthetic .py with a + `def f() -> dict[str, Any]: ...` → finding emitted. + - `test_finder_detects_dict_param_annotation` — `def f(x: dict[str, Any])` + → finding emitted. + - `test_finder_detects_dict_local_assignment` — `acc: dict[str, Any] = {}` + inside a function → finding emitted. + - `test_finder_ignores_non_dict_any` — `def f() -> dict[str, int]` → no + finding. + - `test_allowlist_suppresses_findings` — file in allowlist → findings + suppressed, `WHITELISTED` annotation emitted instead. + - `test_strict_exits_1_on_violation` — un-allowlisted violation → exit 1. + - `test_strict_exits_0_when_allowlisted` — allowlisted file → exit 0. + - `test_json_output_shape` — `--json` output has the expected top-level + keys (`files_scanned`, `files_with_findings`, `total_findings`, + `by_kind`, `findings`). + - `test_missing_file_handling` — referenced file absent → graceful + `MISSING_FILE` finding, not a crash. + - `test_syntax_error_handling` — malformed .py → graceful `SYNTAX_ERROR` + finding, not a crash. + - `test_show_allowlist_flag` — `--show-allowlist` prints entries, exits 0. +- **`tests/test_audit_optional_returns.py`** (NEW) — ≥5 tests: + - `test_renamed_script_exists` — `scripts/audit_optional_returns.py` + exists; `scripts/audit_optional_in_3_files.py` does NOT. + - `test_scans_all_src_files` — audit finds a synthetic `Optional[X]` + return in a new file under `src/` that wasn't in the old 4-file + baseline. (Use `monkeypatch` to point at a `tmp_path` src/ tree.) + - `test_baseline_reading_keeps_strict_green` — with 3 known `history.py` + sites baselined, `--strict` exits 0. + - `test_strict_exits_1_above_baseline` — add 1 new `Optional[X]` return + not in baseline → exit 1. + - `test_param_optional_is_warning_not_strict` — `PARAM_OPTIONAL` + findings never cause `--strict` to exit 1. + +## Non-Functional Requirements + +- **1-space indentation** for all Python code (hard rule per workflow.md). +- **No comments in body** per AGENTS.md "No comments to source code". +- **CRLF line endings** preserved on Windows (use `manual-slop_edit_file` + MCP tool, not native `edit`, to preserve formatting per workflow.md). +- **Atomic per-task commits** — never batch; one task = one commit + one + plan/state update commit. +- **No diagnostic noise** — no `sys.stderr.write("[FOO] ...")` lines in + the audit scripts. +- **`--json` mode** produces machine-readable output for CI integration. +- **Default mode** is informational (exit 0) per the precedent of every + other audit script; `--strict` is the CI gate. +- **Performance** — the audit scans all `src/*.py` (~66 files); AST parse + + walk should complete in well under 1 second wall-clock (the existing + `audit_weak_types.py` does the same scale and is sub-second). + +## Architecture Reference + +- **`docs/guide_meta_boundary.md`** — the domain-distinction rule; the + boundary layer is an Application concept, not a meta-tooling one. +- **`docs/reports/boundary_layer_20260628.md`** — the *report* this audit + *implements*. Lists every legitimate `Metadata` usage and explains why + each is at the wire boundary. +- **`conductor/code_styleguides/python.md` §17.7** — the §17.7 contract: + "the ONLY place these patterns are allowed is at the literal wire + boundary — the function that calls `tomllib.load()`, `json.loads()`, or + a vendor SDK's response parser. The boundary is 2-3 functions per file." +- **`conductor/code_styleguides/data_oriented_design.md` §8.5** — the + Python Type Promotion Mandate (the canonical rule this audit enforces). +- **`conductor/code_styleguides/error_handling.md`** — the `Optional[T]` + ban (and the `Result[T]` + `NIL_T` replacement pattern). +- **`scripts/audit_imports.py` + `scripts/audit_imports_whitelist.toml`** — + the precedent template: AST scan + per-file allowlist + `--strict` CI gate + + `--json` / `--show-whitelist` / `--no-whitelist` flags. The new + `audit_boundary_layer.py` should match this contract closely. +- **`scripts/audit_weak_types.py` + `scripts/audit_weak_types.baseline.json`** — + the precedent for the `--strict` baseline-JSOא contract (baseline of known + violations; `--strict` exits 1 if current findings exceed baseline). The + renamed `audit_optional_returns.py` reuses this pattern for the 3 + `history.py` residuals. +- **`docs/reports/CONTRADICTIONS_REPORT_20260627.md`** — the source of the + contradictions this track closes: C1 (audit name vs behavior), C2 + (Optional ban scope ambiguity), C3 (audit_imports "planned" but actually + built), C18 (2/7 vs actually 4/7 patterns audited), C21 (script name). +- **`docs/reports/TRACK_COMPLETION_post_module_taxonomy_de_cruft_20260627.md`** + — current state of the running parallel track; confirms zero file-overlap. + +## Out of Scope + +- **Fixing the 3 `src/history.py` `Optional[T]` returns.** Those belong to + `cruft_elimination_20260627` Phase 6 (the deferred Optional[T]-returns + migration work). This track only *baselines* them so the widened strict + gate stays green; the actual migration is the future track's job. +- **Fixing the 2 `hot_reloader.py` + `startup_profiler.py` `dict[str, Any]` + violations.** Same logic: baseline only; a future track migrates them to + typed dataclasses (`HotReloadSnapshot`, `ProfilerSnapshot`). +- **Docs-count drift in `docs/Readme.md`** (providers 5→8, tests 322→251, + commands 50+→33). Per user directive 2026-06-27: wait for tier2 branch + to stabilize before touching `docs/Readme.md`. +- **Styleguide §10 Anti-OOP self-contradiction (C16)** and + **`type_aliases.md` line 19 table (C17)** — both deferred per user + directive (they describe code state that only exists post-merge of the + tier2 taxonomy branches; fixing them now would make master's docs + describe code master doesn't have). +- **`RAGChunk.id` field in `guide_rag.md` (C6)** — same branch-sensitivity + reason; deferred. +- **Building the "repeated `.from_dict()` in same expression" enforcement.** + `audit_imports.py` already covers it per §17.9c. No new script needed. +- **Building `scripts/audit_optional_returns.py` baseline migration path.** + The 3 `history.py` sites are simply added to the initial baseline JSON; + no migration script is needed. +- **Wire `--strict` mode of `audit_boundary_layer.py` into actual pre-commit + hooks in the main repo's `.git/hooks/`.** Per C4 in the contradictions + report, pre-commit enforcement is sandbox-only for now; main-repo wiring + is a separate track. +- **Touching any `src/*.py` source.** This track is pure audit + + styleguide + tests. Zero `src/` edits. \ No newline at end of file diff --git a/conductor/tracks/enforcement_gap_closure_20260627/state.toml b/conductor/tracks/enforcement_gap_closure_20260627/state.toml new file mode 100644 index 00000000..da4a6c15 --- /dev/null +++ b/conductor/tracks/enforcement_gap_closure_20260627/state.toml @@ -0,0 +1,64 @@ +# Track state for enforcement_gap_closure_20260627 +# Initialized by Tier 1 Orchestrator on 2026-06-27. +# Implementation delegated to Tier 2 (autonomous) or Tier 3 worker dispatch. + +[meta] +track_id = "enforcement_gap_closure_20260627" +name = "Enforcement Gap Closure (Boundary-Layer Audit + Optional[T] Audit Widening)" +status = "active" +current_phase = 0 # 0 = pre-Phase 1; bump to 1 when implementation starts +last_updated = "2026-06-27" + +[blocked_by] +# None. This track is parallel-safe against the running +# tier2/post_module_taxonomy_de_cruft_20260627 branch (zero file overlap +# verified by Tier 1 against ddcec7b0 + TRACK_COMPLETION file-level changes). + +[blocks] +# None. Follow-up tracks (history.py Optional migration, hot_reloader/ +# startup_profiler dict migration) are documented in metadata.json but not +# formally tracked here. + +[phases] +# All 4 phases per plan.md. checkpointsha filled when the phase checkpoint +# commit is made by the implementing Tier 2/Tier 3. +phase_1 = { status = "pending", checkpointsha = "", name = "Boundary-Layer Audit Script (script + allowlist + 10 tests)" } +phase_2 = { status = "pending", checkpointsha = "", name = "Optional[T] Audit Rename + Widening (rename + 5 tests + baseline JSON)" } +phase_3 = { status = "pending", checkpointsha = "", name = "Styleguide Doc Reconciliation (python.md s17 + cross-ref sweep)" } +phase_4 = { status = "pending", checkpointsha = "", name = "End-of-Track Report + State Update + User Sign-off" } + +[tasks] +# Phase 1: boundary-layer audit script + allowlist + tests +t1_1 = { status = "pending", commit_sha = "", description = "Write 10 failing tests in tests/test_audit_boundary_layer.py (Red phase)" } +t1_2 = { status = "pending", commit_sha = "", description = "Implement scripts/audit_boundary_layer.py per spec FR1 (finder + allowlist + strict + json + --show-allowlist + --no-allowlist + --src)" } +t1_3 = { status = "pending", commit_sha = "", description = "Write scripts/boundary_layer_allowlist.toml with ~14 boundary files + reasons" } +t1_4 = { status = "pending", commit_sha = "", description = "Run tests/test_audit_boundary_layer.py -v (Green phase); verify all 10 pass" } +# Phase 2: Optional audit rename + widening +t2_1 = { status = "pending", commit_sha = "", description = "Write 5 failing tests in tests/test_audit_optional_returns.py (Red phase)" } +t2_2 = { status = "pending", commit_sha = "", description = "git mv audit_optional_in_3_files.py -> audit_optional_returns.py + widen glob to all src/*.py + add --src flag + create audit_optional_returns.baseline.json with 3 history.py residuals" } +t2_3 = { status = "pending", commit_sha = "", description = "Run tests/test_audit_optional_returns.py -v (Green phase); verify all 5 pass" } +# Phase 3: styleguide doc reconciliation +t3_1 = { status = "pending", commit_sha = "", description = "Edit conductor/code_styleguides/python.md s17 inventory table (lines 449-456) + s17.8 enforcement section (lines 357-362) per spec FR4" } +t3_2 = { status = "pending", commit_sha = "", description = "Cross-reference sweep for audit_optional_in_3_files.py in conductor/ + docs/ (update enforcement references; preserve historical)" } +# Phase 4: end-of-track +t4_1 = { status = "pending", commit_sha = "", description = "Run the 7-audit strict suite (verify all pass; the 2 boundary + 3 Optional residuals baselined)" } +t4_2 = { status = "pending", commit_sha = "", description = "Write docs/reports/TRACK_COMPLETION_enforcement_gap_closure_20260627.md per spec G8" } +t4_3 = { status = "pending", commit_sha = "", description = "Update conductor/tracks.md + conductor/chronology.md + state.toml -> status='completed'" } +t4_4 = { status = "pending", commit_sha = "", description = "Conductor - User Manual Verification (PAUSE for user sign-off)" } + +[verification] +# Filled as phases complete. +phase_1_complete = false +phase_2_complete = false +phase_3_complete = false +phase_4_complete = false +all_7_audit_gates_strict_pass = false +contradictions_closed_c1_c2_c3_partial_c18_partial_c21 = false + +[scope_summary] +# Populated by Tier 1; static scope summary for re-warm after compaction. +new_files_count = 7 +modified_files_count = 5 +deleted_files_count = 1 # via git mv (audit_optional_in_3_files.py -> audit_optional_returns.py) +parallel_safe_against_post_module_taxonomy_de_cruft = true +parallel_safety_evidence = "Tier 1 verified zero file overlap against ddcec7b0 + TRACK_COMPLETION_post_module_taxonomy_de_cruft_20260627.md file-level changes table on 2026-06-27" \ No newline at end of file diff --git a/conductor/tracks/test_engine_integration_20260627/metadata.json b/conductor/tracks/test_engine_integration_20260627/metadata.json new file mode 100644 index 00000000..20145d51 --- /dev/null +++ b/conductor/tracks/test_engine_integration_20260627/metadata.json @@ -0,0 +1,107 @@ +{ + "track_id": "test_engine_integration_20260627", + "name": "ImGui Test Engine Integration (Bridge via API Hooks)", + "status": "active", + "branch": "master", + "created": "2026-06-27", + "owner": "Tier 1 (initialized); implementation delegated to Tier 2/3.", + "blocked_by": [], + "blocks": ["test_engine_docking_tests (Track 2)", "test_engine_capture_regression (Track 3)"], + "scope": { + "new_files": [ + "tests/test_test_engine_smoke.py", + "docs/reports/TRACK_COMPLETION_test_engine_integration_20260627.md" + ], + "modified_files": [ + "sloppy.py (add --enable-test-engine CLI flag)", + "src/app_controller.py (add test_engine_enabled field)", + "src/gui_2.py (enable engine in App.run + _register_imgui_tests method)", + "src/api_hooks.py (4 new /api/test_engine/* endpoints)", + "src/api_hook_client.py (4 new client methods)", + "tests/conftest.py (pass --enable-test-engine in live_gui fixture)", + "conductor/tracks.md (add row)", + "conductor/chronology.md (prepend row)" + ], + "deleted_files": [] + }, + "estimated_effort": { + "method": "scope (per workflow.md Tier 1 Track Initialization Rules. NO day estimates.)", + "phase_1": "4 tasks: 1 failing test + 1 CLI flag + 1 engine enable + 1 manual verification", + "phase_2": "4 tasks: 1 failing tests + 4 endpoints + 4 client methods + green verification", + "phase_3": "2 tasks: 1 conftest update + 1 full smoke test verification", + "phase_4": "3 tasks: 1 end-of-track report + 1 state update + 1 user sign-off" + }, + "verification_criteria": [ + "G1: sloppy.py accepts --enable-test-engine; when set, runner_params.use_imgui_test_engine = True + callbacks.register_tests assigned", + "G2: App._register_imgui_tests exists + registers at least 1 smoke test via imgui.test_engine.register_test", + "G3: HookServer has 4 new /api/test_engine/* endpoints (queue, status, results, abort)", + "G4: ApiHookClient has 4 new methods (queue_test, get_test_status, get_test_results, wait_for_test_results)", + "G5: live_gui fixture passes --enable-test-engine in subprocess args", + "G6: tests/test_test_engine_smoke.py has >=3 tests; all pass (engine enabled + queue+run smoke + results shape)", + "G7: docs/reports/TRACK_COMPLETION_test_engine_integration_20260627.md exists; documents threading model verification + Track 2 handoff", + "VC_parallel_safe": "ZERO file overlap with tier2/post_module_taxonomy_de_cruft_20260627 (touching sloppy.py, gui_2.py:641-700, api_hooks.py, api_hook_client.py, conftest.py — none of which Tier 2 touches) or enforcement_gap_closure_20260627 (touching scripts/audit_*, python.md — zero overlap)" + ], + "regressions_and_pre_existing_failures": [], + "pre_existing_failures_remaining": [], + "deferred_to_followup_tracks": [ + { + "title": "Track 2: test_engine_docking_tests", + "description": "Migrate docking/focus/panel tests (test_workspace_profiles_restoration, test_auto_switch_sim, etc.) to use ctx.dock_into, ctx.window_focus, ctx.window_resize. The bridge built in this track enables it.", + "track_status": "planned (Track 2 of 3)" + }, + { + "title": "Track 3: test_engine_capture_regression", + "description": "Visual regression via ctx.capture_screenshot_window + baseline PNG diff. The capture API is available but not wired in this track.", + "track_status": "planned (Track 3 of 3)" + }, + { + "title": "Headless test execution", + "description": "The test engine requires a live GLFW window. Headless mode (no window) is a future research item; the engine's scenario thread drives the actual render loop.", + "track_status": "not yet initialized; research item" + }, + { + "title": "Interactive test engine panel", + "description": "show_test_engine_windows(engine, True) opens the engine's debug UI. Not shown by default; can be added as a debug toggle in a follow-up.", + "track_status": "not yet initialized" + } + ], + "risk_register": [ + { + "id": "R1", + "description": "GIL-transfer crash: the test engine's scenario thread calls Python test_func from a different thread; if the GIL transfer mechanism in hello_imgui/immapp doesn't work with the app's existing thread layout, the app crashes", + "likelihood": "medium", + "impact": "hard blocker; the entire test engine approach is invalid if the threading model doesn't work", + "mitigation": "Phase 1 Task 1.4 is a manual verification checkpoint that catches this before any further work. If it crashes, STOP and report to user. The demo_testengine.py proves the mechanism works for simple apps; the risk is specific to this app's thread layout (AppController, SyncEventQueue, etc.)" + }, + { + "id": "R2", + "description": "Label path mismatch: the smoke test's ctx.set_ref('###manual slop') + ctx.item_click('**/Session') may not match the actual label tree", + "likelihood": "high", + "impact": "smoke test fails with 'item not found'; not a crash, just a wrong path", + "mitigation": "Use imgui.show_id_stack_tool_window() or ctx.window_info() to find the correct labels during implementation. The label tree is deterministic (same build, same layout). Once found, the path is stable." + }, + { + "id": "R3", + "description": "Engine overhead degrades live_gui test performance", + "likelihood": "low", + "impact": "live_gui tests take longer; batch run exceeds timeout", + "mitigation": "The engine is idle when no tests are queued (sub-ms per-frame overhead). The existing fps_idling settings are unchanged. If measurable, the --enable-test-engine flag can be made conditional (only passed when running test_test_engine_* files)." + }, + { + "id": "R4", + "description": "test_func accesses App state from the scenario thread, causing a race with the GUI render thread", + "likelihood": "medium", + "impact": "intermittent test failures or state corruption", + "mitigation": "The spec FR2 + plan Task 1.3 explicitly document: test_func must NOT directly mutate App/AppController state; it must use ctx.* primitives (which post simulated input to the GUI thread). Reading via ctx.item_info / ctx.window_info is safe (C++ accessors). CHECK() runs on the scenario thread but only writes to the engine's C++ result log (thread-safe)." + } + ], + "campaign": { + "name": "Test Engine Campaign (3 tracks)", + "tracks": [ + "test_engine_integration_20260627 (THIS TRACK; bridge + smoke test)", + "test_engine_docking_tests (Track 2; migrate docking/focus/panel tests)", + "test_engine_capture_regression (Track 3; visual regression via screenshot capture)" + ], + "campaign_rationale": "The test engine enables high-fidelity simulation of docking, focus, panel visibility, drag-and-drop, and keyboard input that the current Hook API cannot express. The campaign is split into 3 tracks to isolate risk: Track 1 proves the threading model + bridge work; Track 2 migrates the high-value docking tests; Track 3 adds visual regression. Each track is independently shippable." + } +} \ No newline at end of file diff --git a/conductor/tracks/test_engine_integration_20260627/plan.md b/conductor/tracks/test_engine_integration_20260627/plan.md new file mode 100644 index 00000000..8727a368 --- /dev/null +++ b/conductor/tracks/test_engine_integration_20260627/plan.md @@ -0,0 +1,163 @@ +# Plan: ImGui Test Engine Integration (Bridge via API Hooks) + +Track: `test_engine_integration_20260627` +Branch: master (parallel-safe; touches `sloppy.py`, `src/gui_2.py`, `src/app_controller.py`, `src/api_hooks.py`, `src/api_hook_client.py`, `tests/conftest.py`, new `tests/test_test_engine_smoke.py` — zero overlap with the running tier2 taxonomy branch or the enforcement_gap_closure track) +Spec: `conductor/tracks/test_engine_integration_20260627/spec.md` + +All Python edits use 1-space indentation. No comments in body. CRLF preserved. + +--- + +## Phase 1: Enable the Test Engine in the App + +Focus: Add `--enable-test-engine` CLI flag, set `runner_params.use_imgui_test_engine`, add the `register_tests` callback with a placeholder smoke test. + +- [ ] Task 1.1: Write failing test for `--enable-test-engine` flag + engine activation + - **WHERE:** `tests/test_test_engine_smoke.py` (NEW file) + - **WHAT:** Test 1: `test_engine_enabled` — start `live_gui` (which will pass `--enable-test-engine`), verify the engine is active by calling `client.get_test_status()` (new method, implemented in Phase 3) and asserting `queue_empty == True` (engine is running, no tests queued). This test will FAIL before Phase 1 + Phase 3 land (the endpoint doesn't exist yet). + - **HOW:** Use the `live_gui` fixture. Call `client.get_test_status()`. Assert the response has a `queue_empty` field. (The method is added in Phase 3; the test is written first per TDD.) + - **SAFETY:** No `live_gui` state mutation; just a GET request. + - **COMMIT:** `test(smoke): add failing test for test engine activation` + - **GIT NOTE:** Red-phase test for the `--enable-test-engine` flag + engine activation. + +- [ ] Task 1.2: Add `--enable-test-engine` CLI flag to `sloppy.py` + `AppController` + - **WHERE:** `sloppy.py:35` (add arg), `src/app_controller.py:1042` (add `test_engine_enabled` field) + - **WHAT:** + 1. `sloppy.py`: add `parser.add_argument("--enable-test-engine", action="store_true", help="Enable Dear ImGui Test Engine for automated UI testing")` after the `--enable-test-hooks` line. + 2. `src/app_controller.py:1042`: add `self.test_engine_enabled: bool = ("--enable-test-engine" in sys.argv)` after the `test_hooks_enabled` line. + - **HOW:** Use `manual-slop_edit_file` MCP tool. 1-space indent. + - **SAFETY:** The flag is opt-in; normal runs are unaffected. + - **COMMIT:** `feat(cli): add --enable-test-engine flag` + - **GIT NOTE:** CLI flag for test engine; mirrors the --enable-test-hooks pattern at app_controller.py:1042. + +- [ ] Task 1.3: Enable the engine in `App.run()` + add `_register_imgui_tests` callback + - **WHERE:** `src/gui_2.py:641` (after `RunnerParams()` construction) + `src/gui_2.py:~700` (new `_register_imgui_tests` method) + - **WHAT:** + 1. In `App.run()` between line 641 (`self.runner_params = _hi.RunnerParams()`) and line 684 (`callbacks.show_gui = ...`), add: + ```python + if getattr(self.controller, "test_engine_enabled", False): + self.runner_params.use_imgui_test_engine = True + self.runner_params.callbacks.register_tests = self._register_imgui_tests + ``` + 2. Add `_register_imgui_tests(self)` method on `App` (after `_post_init`, ~line 700): + ```python + def _register_imgui_tests(self) -> None: + from imgui_bundle import hello_imgui + from imgui_bundle.imgui import test_engine + engine = hello_imgui.get_imgui_test_engine() + test = test_engine.register_test(engine, "Smoke Tests", "Tab Switch") + def smoke_func(ctx) -> None: + from imgui_bundle.imgui.test_engine_checks import CHECK + ctx.set_ref("###manual slop") + ctx.item_click("**/Session") + CHECK(True) + test.test_func = smoke_func + ``` + The exact `set_ref` + `item_click` targets are determined during implementation by inspecting the running GUI's label tree. The smoke test should click a harmless tab (e.g., switch to "Session" tab) and `CHECK(True)` as a placeholder assertion. The real assertion (verify the tab actually switched) is added once the label path is confirmed. + - **HOW:** Use `manual-slop_edit_file` / `manual-slop_py_update_definition` MCP tool. 1-space indent. + - **SAFETY:** Guarded by `test_engine_enabled`; normal runs skip this entirely. The `register_tests` callback is only called by `hello_imgui` when `use_imgui_test_engine = True`. + - **COMMIT:** `feat(gui): enable test engine + register smoke test via callbacks.register_tests` + - **GIT NOTE:** Activates the test engine when --enable-test-engine is set; registers a placeholder smoke test. + +- [ ] Task 1.4: Verify the engine activates (manual) + - **WHAT:** Run `uv run python sloppy.py --enable-test-hooks --enable-test-engine` locally. Verify the app starts without crashing (the GIL-transfer mechanism works). Verify `hello_imgui.get_imgui_test_engine()` returns a non-None engine. This is a manual checkpoint before proceeding to Phase 2. + - **COMMIT:** (no commit; manual verification checkpoint) + - **GIT NOTE:** Manual verification that the engine + GIL transfer works with the app's existing thread layout. + +--- + +## Phase 2: Build the API Hooks Bridge + +Focus: Add the 4 `/api/test_engine/*` endpoints to `HookServer` + the 4 methods to `ApiHookClient`. + +- [ ] Task 2.1: Write failing tests for the 4 new `ApiHookClient` methods + - **WHERE:** `tests/test_test_engine_smoke.py` (append to the file from Task 1.1) + - **WHAT:** 2 more tests: + - `test_queue_and_run_smoke_test`: queue the smoke test via `client.queue_test("Smoke Tests", "Tab Switch")`, poll via `client.wait_for_test_results(timeout=30)`, assert `results["count_success"] >= 1` and `results["count_tested"] >= 1`. + - `test_engine_results_shape`: call `client.get_test_results()`, assert the response dict has keys `count_tested`, `count_success`, `count_in_queue`. + - **HOW:** Use `live_gui` fixture. These tests fail until Phase 2 + Phase 3 land (the client methods + endpoints don't exist yet). + - **SAFETY:** The smoke test queues a harmless tab-switch; no destructive state change. + - **COMMIT:** `test(smoke): add failing tests for queue_test + wait_for_test_results + get_test_results` + - **GIT NOTE:** Red-phase tests for the 4 new ApiHookClient methods. + +- [ ] Task 2.2: Add the 4 `/api/test_engine/*` endpoints to `HookServer` + - **WHERE:** `src/api_hooks.py` — `do_GET` (line 157) + `do_POST` (line 490) + - **WHAT:** Add 4 new `elif` branches: + 1. `do_GET`: `elif self.path == "/api/test_engine/status":` — lazy-import `hello_imgui` + `test_engine`; get engine via `hello_imgui.get_imgui_test_engine()`; call `test_engine.is_test_queue_empty(engine)`; respond `{"queue_empty": bool}`. + 2. `do_GET`: `elif self.path == "/api/test_engine/results":` — get engine; create `TestEngineResultSummary()`; call `test_engine.get_result_summary(engine, out_results)`; respond `{"count_tested": N, "count_success": N, "count_in_queue": N}`. + 3. `do_POST`: `elif self.path == "/api/test_engine/queue":` — body `{"group": "...", "name": "..."}`; get engine; find test via `test_engine.find_test_by_name(engine, group, name)`; if found, `test_engine.queue_test(engine, test)`; respond `{"status": "queued"}` or `{"error": "test not found"}` (404). + 4. `do_POST`: `elif self.path == "/api/test_engine/abort":` — get engine; `test_engine.abort_current_test(engine)`; respond `{"status": "aborted"}`. + - **HOW:** Follow the existing endpoint pattern (lines 499-505 for POST, lines 231-241 for GET). Use `_get_app_attr(app, "controller")` to check `test_engine_enabled`; if not enabled, respond 503. Use `json.dumps(...)` for the response body. 1-space indent. + - **SAFETY:** The endpoints run on the HTTP handler thread. `hello_imgui.get_imgui_test_engine()` is a C++ accessor (thread-safe). `queue_test` / `is_test_queue_empty` / `get_result_summary` are thread-safe C++ engine operations (the engine is designed for cross-thread test scheduling). `abort_current_test` is also thread-safe. + - **COMMIT:** `feat(api_hooks): add /api/test_engine/* bridge endpoints` + - **GIT NOTE:** 4 new endpoints: queue, status, results, abort; bridge the test process to the engine via HTTP. + +- [ ] Task 2.3: Add the 4 new methods to `ApiHookClient` + - **WHERE:** `src/api_hook_client.py` (after the existing methods, ~line 500) + - **WHAT:** 4 new methods: + 1. `queue_test(self, group: str, name: str) -> dict` — POST `/api/test_engine/queue` with `{"group": group, "name": name}`; return the response dict. + 2. `get_test_status(self) -> dict` — GET `/api/test_engine/status`; return `{"queue_empty": bool}`. + 3. `get_test_results(self) -> dict` — GET `/api/test_engine/results`; return `{"count_tested": N, "count_success": N, "count_in_queue": N}`. + 4. `wait_for_test_results(self, timeout: float = 30.0) -> dict` — poll `get_test_status()` every 0.5s until `queue_empty == True` or timeout; then return `get_test_results()`. On timeout, return the last results (with a `timed_out: True` field). + - **HOW:** Follow the existing method pattern (e.g., `get_status` at line 105, `push_event` at line 156). Use `requests.get/post` + retry. 1-space indent. + - **SAFETY:** Pure HTTP client; no thread safety concerns. + - **COMMIT:** `feat(api_hook_client): add queue_test + get_test_status + get_test_results + wait_for_test_results` + - **GIT NOTE:** 4 new client methods mirroring the 4 new endpoints; wait_for_test_results replaces time.sleep+get_value polling. + +- [ ] Task 2.4: Run Phase 2 tests (Green phase) + - **WHAT:** `uv run pytest tests/test_test_engine_smoke.py -v --timeout=60`. All 3 tests must pass. If the smoke test (test_queue_and_run_smoke_test) fails, the most likely cause is the `set_ref` / `item_click` label path being wrong — debug by using `imgui.show_id_stack_tool_window()` or `ctx.window_info("manual slop")` to find the correct label. If the GIL transfer fails, the app will crash — that's a hard blocker; report to user. + - **COMMIT:** `conductor(state): Phase 2 green-phase verification` (or skip if no code changes) + - **GIT NOTE:** Green-phase verification for the 4 new endpoints + 4 new client methods. + +--- + +## Phase 3: Live_gui Fixture + Full Smoke Test + +Focus: Pass `--enable-test-engine` in the `live_gui` fixture + verify the full bridge works end-to-end. + +- [ ] Task 3.1: Update `live_gui` fixture to pass `--enable-test-engine` + - **WHERE:** `tests/conftest.py:792` + - **WHAT:** Change `gui_args = ["uv", "run", "python", "-u", gui_script, "--enable-test-hooks"]` to include `"--enable-test-engine"`: + ```python + gui_args = ["uv", "run", "python", "-u", gui_script, "--enable-test-hooks", "--enable-test-engine"] + ``` + - **HOW:** `manual-slop_edit_file` MCP tool. 1-space indent. + - **SAFETY:** The engine is idle when no tests are queued. Existing `live_gui` tests that don't use the test engine are unaffected (the engine adds sub-ms per-frame overhead). + - **COMMIT:** `test(conftest): pass --enable-test-engine in live_gui fixture` + - **GIT NOTE:** Engine activates on every live_gui run; idle when no tests queued. + +- [ ] Task 3.2: Run the full smoke test suite (Green phase) + - **WHAT:** `uv run pytest tests/test_test_engine_smoke.py -v --timeout=60`. All 3 tests pass. Then run a small batch of existing `live_gui` tests to verify no regression: `uv run pytest tests/test_workspace_profiles_restoration.py tests/test_undo_redo_lifecycle.py -v --timeout=120`. + - **COMMIT:** `conductor(state): Phase 3 green-phase verification` + - **GIT NOTE:** Full bridge verified: pytest → HTTP → HookServer → engine → scenario thread → ctx.item_click → GUI thread → CHECK → results → HTTP → pytest assert. + +--- + +## Phase 4: End-of-Track Report + State Update + +- [ ] Task 4.1: Write end-of-track report + - **WHERE:** `docs/reports/TRACK_COMPLETION_test_engine_integration_20260627.md` (NEW file) + - **WHAT:** Report following the precedent: + - TL;DR + - Phase summary (each phase + commits + status) + - Verification Criteria status (mapped to spec G1-G7) + - Threading model verification (did the GIL transfer work? any crashes? any state-access issues from the scenario thread?) + - The 4 new endpoints + 4 new client methods documented + - The smoke test result + - Handoff to Track 2 (docking test migration) — what's now possible that wasn't before + - Known limitations (engine requires a live window; not headless; the interactive panel is not shown) + - **COMMIT:** `docs(reports): TRACK_COMPLETION_test_engine_integration_20260627` + - **GIT NOTE:** End-of-track report; documents the bridge + threading model verification + Track 2 handoff. + +- [ ] Task 4.2: Update `conductor/tracks.md` + `conductor/chronology.md` + `state.toml` + - **WHAT:** + 1. `state.toml`: mark all phases "completed" with checkpoint SHA; `status = "completed"`. + 2. `conductor/tracks.md`: add row for this track (status "shipped"). + 3. `conductor/chronology.md`: prepend row for `2026-06-27 | test_engine_integration_20260627 | shipped | ...`. + - **COMMIT:** `conductor(state): test_engine_integration_20260627 SHIPPED + TRACK_COMPLETION` + - **GIT NOTE:** Track state + chronology + tracks.md closed out. + +- [ ] Task 4.3: Conductor - User Manual Verification + - **WHAT:** Present the results: the smoke test pass, the threading model verification, the 4 new endpoints, the 4 new client methods. PAUSE for user sign-off. + - **COMMIT:** (no commit; user-confirmation gate) + - **GIT NOTE:** User sign-off record. \ No newline at end of file diff --git a/conductor/tracks/test_engine_integration_20260627/spec.md b/conductor/tracks/test_engine_integration_20260627/spec.md new file mode 100644 index 00000000..f1e9f508 --- /dev/null +++ b/conductor/tracks/test_engine_integration_20260627/spec.md @@ -0,0 +1,187 @@ +# Track Specification: ImGui Test Engine Integration (Bridge via API Hooks) + +## Overview + +Integrate the Dear ImGui Test Engine (`imgui_bundle.imgui.test_engine`) into Manual Slop's test infrastructure to enable high-fidelity simulation of user interactions — docking, window focus, panel visibility, drag-and-drop, keyboard input — that the current Hook API cannot express. + +**The design principle:** the API hooks layer (`HookServer` on :8999 + `ApiHookClient`) remains the **single communication boundary** between the test process (pytest) and the GUI subprocess. The test engine is integrated *behind* the API hooks, not alongside them. New `/api/test_engine/*` endpoints bridge the test process to the engine's `queue_test` / `get_result_summary` API. The engine's `test_func` closures run on the engine's scenario thread (GIL-transferred by `hello_imgui`/`immapp`); they use `ctx.item_click("**/Label")`, `ctx.dock_into(src, dst, dir)`, `ctx.window_focus(ref)` etc. to post simulated input events to the GUI render thread. The existing `_pending_gui_tasks` queue and the engine's input simulation are two separate event injection paths into the same GUI thread; they compose without conflict. + +This is **Track 1 of 3** in the test engine campaign. Track 1 = enable the engine + build the bridge + smoke test. Track 2 (follow-up) = migrate docking/focus/panel tests. Track 3 (follow-up) = visual regression via screenshot capture. + +## Current State Audit (as of master `77b70226`) + +### Already Implemented (DO NOT re-implement) + +- **`imgui_bundle` v1.92.5** (pinned in `pyproject.toml:7`) ships the test engine compiled into the nanobind binary. Verified: `from imgui_bundle import imgui; imgui.test_engine.TestEngine` is a live class; `imgui.test_engine.register_test`, `imgui.test_engine.queue_test`, `imgui.test_engine.get_result_summary`, `imgui.test_engine.TestContext` with `dock_into`, `window_focus`, `item_click`, `capture_screenshot_window`, etc. are all present (verified via `dir()` enumeration — ~95 `TestContext` methods + ~35 module-level functions). The `.pyi` stub at `.venv/Lib/site-packages/imgui_bundle/imgui/test_engine.pyi` documents the full API. + +- **`hello_imgui.RunnerParams.use_imgui_test_engine: bool = False`** (`.venv/Lib/site-packages/imgui_bundle/hello_imgui.pyi:2969`) — the flag that enables the engine. When `True`, `hello_imgui`/`immapp` compiles the engine into the runner and provides the GIL-transfer mechanism for the scenario thread. The engine is **already compiled into the wheel** (the C++ build flag `-DHELLOIMGUI_WITH_TEST_ENGINE=ON` was set for the published wheel); the Python-side flag just activates it. + +- **`hello_imgui.get_imgui_test_engine()`** (`.venv/Lib/site-packages/imgui_bundle/hello_imgui.pyi:3355`) — returns the live `TestEngine` instance after `use_imgui_test_engine = True`. Verified callable. + +- **`RunnerCallbacks.register_tests: VoidFunction`** (`.venv/Lib/site-packages/imgui_bundle/hello_imgui.pyi:1809`) — the callback that `hello_imgui` invokes at startup to let the app register tests via `imgui.test_engine.register_test(engine, group, name)`. The demo at `.venv/Lib/site-packages/imgui_bundle/demos_python/demos_immapp/demo_testengine.py` shows the full pattern. + +- **`imgui_bundle.imgui.test_engine_checks.CHECK(result: bool)`** — the assertion primitive that emits pass/fail to the engine's result log with file:line traceback. Verified importable. + +- **The app already uses `hello_imgui.RunnerParams` + `immapp.run()`** — the exact integration path the test engine requires: + - `src/gui_2.py:641`: `self.runner_params = _hi.RunnerParams()` + - `src/gui_2.py:684-688`: `self.runner_params.callbacks.show_gui/show_menus/load_additional_fonts/setup_imgui_style/post_init` are set + - `src/gui_2.py:1486`: `immapp.run(app.runner_params, ...)` — the main loop entry point + - The GIL-transfer mechanism is built into `immapp.run` when `use_imgui_test_engine = True`; no additional threading code is needed on the Python side. + +- **`HookServer`** (`src/api_hooks.py:857`) — the HTTP server on `127.0.0.1:8999`, started when `--enable-test-hooks` is passed. The `do_GET` method (line 157) and `do_POST` method (line 490) use a flat `if/elif self.path == "/api/..."` dispatch. The server holds `self.app` (the `App` instance) and accesses it via `_get_app_attr(app, ...)` helpers. The `_pending_gui_tasks` queue (`app_controller.py:900`) + `_pending_gui_tasks_lock` (`app_controller.py:822`) + `_process_pending_gui_tasks()` (`app_controller.py:1844`, called per-frame from `gui_2.py:1759`) is the existing thread-safe command queue from HTTP handler thread → main render thread. + +- **`ApiHookClient`** (`src/api_hook_client.py`) — the Python client with retry logic, health-check polling, `wait_for_server(timeout)`, `push_event(action, payload)`, `get_value(item)`, `set_value(item, value)`, `click(item)`, `wait_for_event(event_type, timeout)`, etc. Used by all `live_gui` tests. + +- **`live_gui` fixture** (`tests/conftest.py:641`) — session-scoped; spawns `sloppy.py --enable-test-hooks --config=` as a subprocess; polls `http://127.0.0.1:8999/status` until ready; yields a `_LiveGuiHandle` with `.client` (an `ApiHookClient`), `.process`, `.workspace`. The fixture's subprocess args are at `conftest.py:792`: `gui_args = ["uv", "run", "python", "-u", gui_script, "--enable-test-hooks"]`. + +- **`sloppy.py`** (79 lines) — the entry point. CLI flags at lines 31-36: `--headless`, `--web-host`, `--web-port`, `--enable-test-hooks`, `--config`. The `else` branch at line 75 (the normal GUI mode) calls `from src.gui_2 import main; main()`. + +- **`AppController.test_hooks_enabled`** (`src/app_controller.py:1042`) — set via `"--enable-test-hooks" in sys.argv` or `SLOP_TEST_HOOKS=1` env var. Same pattern works for `--enable-test-engine`. + +### Gaps to Fill (This Track's Scope) + +- **GAP-1: The test engine is not enabled.** `runner_params.use_imgui_test_engine` is never set to `True`. No `callbacks.register_tests` callback exists. The engine's scenario thread + GIL-transfer mechanism are dormant. + +- **GAP-2: No `/api/test_engine/*` bridge endpoints.** The `HookServer` has no way for the test process to queue a test, poll results, or abort a running test. The test engine API (`queue_test`, `get_result_summary`, `is_test_queue_empty`, `abort_current_test`) is only accessible from inside the GUI process — not from the HTTP boundary. + +- **GAP-3: No `ApiHookClient` methods for test engine operations.** The client has `click`, `set_value`, `push_event`, `wait_for_event` — but no `queue_test`, `wait_for_test_results`, `get_test_results`. + +- **GAP-4: `live_gui` fixture doesn't pass `--enable-test-engine`.** The subprocess at `conftest.py:792` only passes `--enable-test-hooks`. Without the engine flag, the engine won't activate even after GAP-1 is fixed. + +- **GAP-5: No smoke test proving the end-to-end threading model works.** The test engine's scenario thread + GIL transfer is the highest-risk piece. A minimal smoke test (register a trivial test that clicks a known button + asserts a state change, queue it via the API, poll for results, assert pass) is needed to prove the bridge works before Track 2 migrates real tests. + +### Architecture: Why the API hooks + test engine compose + +``` +pytest test process + └── ApiHookClient (HTTP :8999) ← single communication boundary (KEPT) + └── HookServer.do_POST ← new /api/test_engine/* endpoints + └── imgui.test_engine.queue_test(engine, test) ← schedules on engine + └── TestContext.test_func(ctx) ← runs on engine scenario thread + └── ctx.item_click("**/Label") ← posts simulated input to GUI thread + └── GUI render thread processes the simulated event + └── _process_pending_gui_tasks() still runs per-frame + (existing queue; unaffected; two separate injection paths) +``` + +The test engine's `test_func` runs on its own thread (the scenario thread). The `ctx.*` primitives post simulated input events to the ImGui input queue on the GUI render thread. This is the same destination as real user input and the same destination as `_pending_gui_tasks` — but a different injection mechanism. The two paths are independent; they don't share state, locks, or queues. The test engine doesn't touch `_pending_gui_tasks` and vice versa. + +The GIL-transfer caveat (documented at the top of `test_engine.pyi`) is handled by `hello_imgui`/`immapp` when `use_imgui_test_engine = True` — the C++ layer transfers the GIL between the main thread and the scenario thread. No additional Python-side threading code is needed. The `test_func` callback runs with the GIL held; it can safely call `ctx.*` primitives (which are C++ nanobind calls that release the GIL during the simulated input wait). + +## Goals + +- **G1.** `sloppy.py` accepts `--enable-test-engine` CLI flag; when set, `App.run()` sets `runner_params.use_imgui_test_engine = True` + assigns `runner_params.callbacks.register_tests` to a method that registers tests. + +- **G2.** `App` has a `_register_imgui_tests(self)` method (called by `hello_imgui` at startup via the `register_tests` callback) that registers at least one smoke test ("Smoke Tests", "Click Increment Button") via `imgui.test_engine.register_test(engine, group, name)`. The smoke test's `test_func(ctx)` calls `ctx.set_ref("...")` + `ctx.item_click("**/...")` + `CHECK(...)`. + +- **G3.** `HookServer` (in `src/api_hooks.py`) has 4 new endpoints: + - `POST /api/test_engine/queue` — body `{"group": "...", "name": "..."}`; finds the test by group+name via `imgui.test_engine.find_test_by_name(engine, group, name)`; calls `queue_test(engine, test)`; responds `{"status": "queued"}`. + - `GET /api/test_engine/status` — calls `is_test_queue_empty(engine)`; responds `{"queue_empty": true/false}`. + - `GET /api/test_engine/results` — calls `get_result_summary(engine, out_results)`; responds `{"count_tested": N, "count_success": N, "count_in_queue": N}`. + - `POST /api/test_engine/abort` — calls `abort_current_test(engine)`; responds `{"status": "aborted"}`. + +- **G4.** `ApiHookClient` (in `src/api_hook_client.py`) has 4 new methods: + - `queue_test(group: str, name: str) -> dict` — POST to `/api/test_engine/queue`. + - `get_test_status() -> dict` — GET `/api/test_engine/status`. + - `get_test_results() -> dict` — GET `/api/test_engine/results`. + - `wait_for_test_results(timeout: float = 30.0) -> dict` — polls `get_test_status()` until `queue_empty == True` or timeout; then returns `get_test_results()`. + +- **G5.** The `live_gui` fixture passes `--enable-test-engine` in addition to `--enable-test-hooks` in the subprocess args (`conftest.py:792`). The engine activates on every `live_gui` test run. + +- **G6.** A smoke test in `tests/test_test_engine_smoke.py` that: + 1. Uses the `live_gui` fixture. + 2. Queues the smoke test via `client.queue_test("Smoke Tests", "Click Increment Button")`. + 3. Polls via `client.wait_for_test_results(timeout=30)`. + 4. Asserts `results["count_success"] >= 1` and `results["count_tested"] >= 1`. + This proves the full bridge works: pytest → HTTP → HookServer → engine → scenario thread → `ctx.item_click` → GUI thread → state change → `CHECK` → result log → `get_result_summary` → HTTP → pytest assert. + +- **G7.** End-of-track report at `docs/reports/TRACK_COMPLETION_test_engine_integration_20260627.md` documenting: what shipped, the threading model verification, any GIL-transfer issues encountered, and the handoff to Track 2 (docking test migration). + +## Functional Requirements + +### FR1: `--enable-test-engine` CLI flag + +- `sloppy.py`: add `parser.add_argument("--enable-test-engine", action="store_true", help="Enable the Dear ImGui Test Engine for automated UI testing")` alongside the existing `--enable-test-hooks` flag (line 35). +- `src/app_controller.py`: add `self.test_engine_enabled: bool = ("--enable-test-engine" in sys.argv)` near line 1042 (same pattern as `test_hooks_enabled`). +- `src/gui_2.py` `App.run()` (line 619): between the `RunnerParams()` construction (line 641) and the `callbacks.show_gui = ...` assignments (line 684), add: + ```python + if getattr(self.controller, "test_engine_enabled", False): + self.runner_params.use_imgui_test_engine = True + self.runner_params.callbacks.register_tests = self._register_imgui_tests + ``` + This is guarded by the flag so normal runs are unaffected. + +### FR2: `App._register_imgui_tests(self)` method + +- New method on `App` in `src/gui_2.py` (near the other callback registrations, ~line 700): + ```python + def _register_imgui_tests(self) -> None: + """Called by hello_imgui at startup to register ImGui Test Engine tests. + Reads the live engine via hello_imgui.get_imgui_test_engine(). + [C: src/gui_2.py:App.run (via callbacks.register_tests)] + """ + from imgui_bundle import hello_imgui + from imgui_bundle.imgui import test_engine + engine = hello_imgui.get_imgui_test_engine() + # Smoke test: click a known button and verify state change + test = test_engine.register_test(engine, "Smoke Tests", "Click Increment Button") + def smoke_func(ctx) -> None: + from imgui_bundle.imgui.test_engine_checks import CHECK + ctx.set_ref("...") # TODO: set to a known window + ctx.item_click("**/...") # TODO: click a known button + CHECK(True) # TODO: verify state change + test.test_func = smoke_func + ``` + The exact button + state to click + verify is determined during implementation by inspecting the running GUI's item tree (use `ctx.window_info` / `imgui.show_id_stack_tool_window` to find labels). The smoke test should click something harmless (e.g., a tab switch, a checkbox toggle) and verify the state changed. + +### FR3: `/api/test_engine/*` endpoints in `HookServer` + +- In `src/api_hooks.py` `do_POST` (line 490): add 2 new `elif` branches for `POST /api/test_engine/queue` and `POST /api/test_engine/abort`. +- In `src/api_hooks.py` `do_GET` (line 157): add 2 new `elif` branches for `GET /api/test_engine/status` and `GET /api/test_engine/results`. +- All 4 endpoints guard on `test_engine_enabled` — if the engine is not active, respond `{"error": "test engine not enabled", "enabled": false}` with HTTP 503. +- The engine instance is obtained via `hello_imgui.get_imgui_test_engine()` inside the handler (lazy import; the handler runs on the HTTP thread, but `get_imgui_test_engine()` is a C++ accessor that returns a pointer — safe to call from any thread). + +### FR4: `ApiHookClient` methods + +- In `src/api_hook_client.py`: add 4 methods per G4. Follow the existing method pattern (e.g., `get_status`, `push_event`): construct the URL, `requests.post/get`, retry on connection error, parse JSON, return the dict. + +### FR5: `live_gui` fixture update + +- In `tests/conftest.py:792`: change `gui_args` to include `"--enable-test-engine"` when the fixture spawns the subprocess. The flag flows through to `AppController.test_engine_enabled` → `App.run()` → `runner_params.use_imgui_test_engine = True`. + +### FR6: Smoke test + +- `tests/test_test_engine_smoke.py` (NEW) — 2-3 tests: + - `test_engine_enabled`: `client.get_value("test_engine_enabled")` returns True (or verify via a new gettable field). + - `test_queue_and_run_smoke_test`: queue the smoke test, poll for results, assert success. + - `test_engine_results_shape`: `get_test_results()` returns the expected dict shape. + +## Non-Functional Requirements + +- **1-space indentation** for all Python code. +- **No comments in body** per AGENTS.md. +- **CRLF line endings** preserved. +- **Atomic per-task commits.** +- **Thread safety:** the `test_func` runs on the engine scenario thread. It must NOT directly mutate `App` or `AppController` state — it must use `ctx.*` primitives (which post simulated input to the GUI thread). Reading state via `hello_imgui.get_imgui_test_engine()` or engine queries (`ctx.item_info`, `ctx.window_info`) is safe. The `CHECK()` assertion runs on the scenario thread but only writes to the engine's result log (thread-safe C++ structure). +- **No `live_gui` regression:** the `--enable-test-engine` flag must not affect normal GUI behavior when `live_gui` tests are NOT using the engine. The engine's scenario thread is idle when no tests are queued. The `show_test_engine_windows` panel is NOT shown by default (only via explicit call). +- **Performance:** the engine adds a per-frame overhead when active. The `fps_idling` settings in `runner_params` remain unchanged. The engine's overhead is sub-millisecond per frame when no tests are running. + +## Architecture Reference + +- **`docs/guide_testing.md`** — the `live_gui` fixture, the structural testing contract, the Puppeteer pattern. +- **`docs/guide_api_hooks.md`** — the Hook API surface, the `/api/ask` protocol, the `ApiHookClient` method reference. +- **`docs/guide_gui_2.md`** — the `App` class lifecycle, the `runner_params` construction, the `callbacks` system. +- **`.venv/Lib/site-packages/imgui_bundle/demos_python/demos_immapp/demo_testengine.py`** — the canonical demo for the test engine integration pattern (register_tests callback + test_func closures + CHECK). +- **`.venv/Lib/site-packages/imgui_bundle/imgui/test_engine.pyi`** — the full API stub (2644 lines). Key sections: `TestContext` methods (lines 1445-2096), module-level functions (lines 433-500, 2639+), `TestEngineResultSummary` (3 fields: count_tested, count_success, count_in_queue). +- **`.venv/Lib/site-packages/imgui_bundle/imgui/test_engine_checks.py`** — the `CHECK(result: bool)` assertion primitive. +- **`conductor/workflow.md`** "Live_gui Test Fragility" + "Async Setters Need Poll-For-State" — the existing patterns for `live_gui` tests; the test engine's `wait_for_test_results` replaces `time.sleep` + `get_value` polling with a single engine-side poll. + +## Out of Scope + +- **Migrating existing `live_gui` tests to the test engine.** That's Track 2 (`test_engine_docking_tests_`). This track only builds the bridge + proves it works with 1 smoke test. +- **Visual regression via screenshot capture.** That's Track 3 (`test_engine_capture_regression_`). The `ctx.capture_screenshot_window` API is available but not wired in this track. +- **Headless test execution (no GUI window).** The test engine requires a live GLFW window (the scenario thread drives the actual ImGui render loop). Headless mode is a future research item, not this track. +- **The test engine's interactive UI panel (`show_test_engine_windows`).** Not shown by default. Can be added as a debug toggle in a follow-up. +- **Test engine license audit.** Per the stub: "free for individuals, educational, open-source, and small businesses. Paid for larger businesses." This project is personal-use; no audit needed. Flagged for awareness only. +- **CI wiring of the test engine.** The `live_gui` fixture already runs in CI via the batched runner. The `--enable-test-engine` flag is additive. No CI config changes needed. +- **Touching `src/models.py` or any taxonomy files.** Zero overlap with the running `tier2/post_module_taxonomy_de_cruft_20260627` branch or the `enforcement_gap_closure_20260627` track. \ No newline at end of file diff --git a/conductor/tracks/test_engine_integration_20260627/state.toml b/conductor/tracks/test_engine_integration_20260627/state.toml new file mode 100644 index 00000000..23faa5b5 --- /dev/null +++ b/conductor/tracks/test_engine_integration_20260627/state.toml @@ -0,0 +1,64 @@ +# Track state for test_engine_integration_20260627 +# Initialized by Tier 1 Orchestrator on 2026-06-27. +# Implementation delegated to Tier 2 (autonomous) or Tier 3 worker dispatch. +# This is Track 1 of 3 in the Test Engine Campaign. + +[meta] +track_id = "test_engine_integration_20260627" +name = "ImGui Test Engine Integration (Bridge via API Hooks)" +status = "active" +current_phase = 0 +last_updated = "2026-06-27" + +[blocked_by] +# None. Parallel-safe against tier2/post_module_taxonomy_de_cruft_20260627 +# (zero file overlap: this track touches sloppy.py, gui_2.py:641-700, +# api_hooks.py, api_hook_client.py, conftest.py — none of which Tier 2 touches) +# and enforcement_gap_closure_20260627 (scripts/audit_*, python.md — zero overlap). + +[blocks] +test_engine_docking_tests = "planned (Track 2 of 3 campaign)" +test_engine_capture_regression = "planned (Track 3 of 3 campaign)" + +[phases] +phase_1 = { status = "pending", checkpointsha = "", name = "Enable the Test Engine in the App (CLI flag + runner_params + register_tests callback)" } +phase_2 = { status = "pending", checkpointsha = "", name = "Build the API Hooks Bridge (4 endpoints + 4 client methods)" } +phase_3 = { status = "pending", checkpointsha = "", name = "Live_gui Fixture + Full Smoke Test" } +phase_4 = { status = "pending", checkpointsha = "", name = "End-of-Track Report + State Update + User Sign-off" } + +[tasks] +# Phase 1: enable the engine +t1_1 = { status = "pending", commit_sha = "", description = "Write failing test for --enable-test-engine flag + engine activation (Red phase)" } +t1_2 = { status = "pending", commit_sha = "", description = "Add --enable-test-engine CLI flag to sloppy.py + test_engine_enabled field to AppController" } +t1_3 = { status = "pending", commit_sha = "", description = "Enable engine in App.run() (runner_params.use_imgui_test_engine = True + callbacks.register_tests = self._register_imgui_tests) + add _register_imgui_tests method with smoke test" } +t1_4 = { status = "pending", commit_sha = "", description = "Manual verification: run sloppy.py --enable-test-engine locally; confirm engine activates + no GIL-transfer crash" } +# Phase 2: build the bridge +t2_1 = { status = "pending", commit_sha = "", description = "Write failing tests for queue_test + wait_for_test_results + get_test_results (Red phase)" } +t2_2 = { status = "pending", commit_sha = "", description = "Add 4 /api/test_engine/* endpoints to HookServer (queue, status, results, abort)" } +t2_3 = { status = "pending", commit_sha = "", description = "Add 4 new methods to ApiHookClient (queue_test, get_test_status, get_test_results, wait_for_test_results)" } +t2_4 = { status = "pending", commit_sha = "", description = "Run Phase 2 tests (Green phase); verify all 3 smoke tests pass" } +# Phase 3: live_gui fixture + full smoke test +t3_1 = { status = "pending", commit_sha = "", description = "Update live_gui fixture (conftest.py:792) to pass --enable-test-engine" } +t3_2 = { status = "pending", commit_sha = "", description = "Run full smoke test + regression batch (Green phase)" } +# Phase 4: end-of-track +t4_1 = { status = "pending", commit_sha = "", description = "Write docs/reports/TRACK_COMPLETION_test_engine_integration_20260627.md" } +t4_2 = { status = "pending", commit_sha = "", description = "Update conductor/tracks.md + chronology.md + state.toml -> status='completed'" } +t4_3 = { status = "pending", commit_sha = "", description = "Conductor - User Manual Verification (PAUSE for user sign-off)" } + +[verification] +phase_1_complete = false +phase_2_complete = false +phase_3_complete = false +phase_4_complete = false +engine_activates_without_crash = false +smoke_test_passes = false +no_live_gui_regression = false + +[campaign_context] +# This is Track 1 of 3. The campaign enables high-fidelity UI simulation via the +# Dear ImGui Test Engine, bridged through the existing API hooks layer. +campaign_name = "Test Engine Campaign" +track_1 = "test_engine_integration_20260627 (THIS; bridge + smoke test)" +track_2 = "test_engine_docking_tests (migrate docking/focus/panel tests)" +track_3 = "test_engine_capture_regression (visual regression via screenshot capture)" +key_risk = "R1: GIL-transfer crash if the app's thread layout doesn't work with the engine's scenario thread (mitigated by Phase 1 Task 1.4 manual checkpoint)" \ No newline at end of file diff --git a/docs/reports/MCP_BUGFIX.md b/docs/reports/2026-03-02/MCP_BUGFIX_20260306.md similarity index 100% rename from docs/reports/MCP_BUGFIX.md rename to docs/reports/2026-03-02/MCP_BUGFIX_20260306.md diff --git a/docs/reports/CULLING_CANDIDATES_PHASE5.md b/docs/reports/2026-05-04/CULLING_CANDIDATES_PHASE5.md similarity index 100% rename from docs/reports/CULLING_CANDIDATES_PHASE5.md rename to docs/reports/2026-05-04/CULLING_CANDIDATES_PHASE5.md diff --git a/docs/reports/MUTATION_MATRIX_PHASE5.md b/docs/reports/2026-05-04/MUTATION_MATRIX_PHASE5.md similarity index 100% rename from docs/reports/MUTATION_MATRIX_PHASE5.md rename to docs/reports/2026-05-04/MUTATION_MATRIX_PHASE5.md diff --git a/docs/reports/PHASE5_STABILISATION_REPORT.md b/docs/reports/2026-05-04/PHASE5_STABILISATION_REPORT.md similarity index 100% rename from docs/reports/PHASE5_STABILISATION_REPORT.md rename to docs/reports/2026-05-04/PHASE5_STABILISATION_REPORT.md diff --git a/docs/reports/PIPELINE_ANALYSIS_PHASE5_INIT.md b/docs/reports/2026-05-04/PIPELINE_ANALYSIS_PHASE5_INIT.md similarity index 100% rename from docs/reports/PIPELINE_ANALYSIS_PHASE5_INIT.md rename to docs/reports/2026-05-04/PIPELINE_ANALYSIS_PHASE5_INIT.md diff --git a/docs/reports/STATE_INVENTORY_PHASE5.md b/docs/reports/2026-05-04/STATE_INVENTORY_PHASE5.md similarity index 100% rename from docs/reports/STATE_INVENTORY_PHASE5.md rename to docs/reports/2026-05-04/STATE_INVENTORY_PHASE5.md diff --git a/docs/reports/ai_decoupling_revert_report.md b/docs/reports/2026-05-11/ai_decoupling_revert_report.md similarity index 100% rename from docs/reports/ai_decoupling_revert_report.md rename to docs/reports/2026-05-11/ai_decoupling_revert_report.md diff --git a/docs/reports/AUDIT_ARCHITECTURAL_CHEATS_20260607.md b/docs/reports/2026-06-01/AUDIT_ARCHITECTURAL_CHEATS_20260607.md similarity index 100% rename from docs/reports/AUDIT_ARCHITECTURAL_CHEATS_20260607.md rename to docs/reports/2026-06-01/AUDIT_ARCHITECTURAL_CHEATS_20260607.md diff --git a/docs/reports/COMPACTION_DIGEST_20260607.md b/docs/reports/2026-06-01/COMPACTION_DIGEST_20260607.md similarity index 100% rename from docs/reports/COMPACTION_DIGEST_20260607.md rename to docs/reports/2026-06-01/COMPACTION_DIGEST_20260607.md diff --git a/docs/reports/PLANNING_DIGEST_20260606.md b/docs/reports/2026-06-01/PLANNING_DIGEST_20260606.md similarity index 100% rename from docs/reports/PLANNING_DIGEST_20260606.md rename to docs/reports/2026-06-01/PLANNING_DIGEST_20260606.md diff --git a/docs/reports/SESSION_REPORT_code_path_audit_20260607_20260622.md b/docs/reports/2026-06-01/SESSION_REPORT_code_path_audit_20260607_20260622.md similarity index 100% rename from docs/reports/SESSION_REPORT_code_path_audit_20260607_20260622.md rename to docs/reports/2026-06-01/SESSION_REPORT_code_path_audit_20260607_20260622.md diff --git a/docs/reports/TRACK_COMPLETION_data_structure_strengthening_20260606.md b/docs/reports/2026-06-01/TRACK_COMPLETION_data_structure_strengthening_20260606.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_data_structure_strengthening_20260606.md rename to docs/reports/2026-06-01/TRACK_COMPLETION_data_structure_strengthening_20260606.md diff --git a/docs/reports/TRACK_COMPLETION_unused_scripts_cleanup_20260607.md b/docs/reports/2026-06-01/TRACK_COMPLETION_unused_scripts_cleanup_20260607.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_unused_scripts_cleanup_20260607.md rename to docs/reports/2026-06-01/TRACK_COMPLETION_unused_scripts_cleanup_20260607.md diff --git a/docs/reports/TRACK_STATUS_code_path_audit_20260607_20260622.md b/docs/reports/2026-06-01/TRACK_STATUS_code_path_audit_20260607_20260622.md similarity index 100% rename from docs/reports/TRACK_STATUS_code_path_audit_20260607_20260622.md rename to docs/reports/2026-06-01/TRACK_STATUS_code_path_audit_20260607_20260622.md diff --git a/docs/reports/startup_audit_20260606.txt b/docs/reports/2026-06-01/startup_audit_20260606.txt similarity index 100% rename from docs/reports/startup_audit_20260606.txt rename to docs/reports/2026-06-01/startup_audit_20260606.txt diff --git a/docs/reports/startup_baseline_20260606.txt b/docs/reports/2026-06-01/startup_baseline_20260606.txt similarity index 100% rename from docs/reports/startup_baseline_20260606.txt rename to docs/reports/2026-06-01/startup_baseline_20260606.txt diff --git a/docs/reports/SSDL_CONTEXT_HANDLING_PIPELINE_20260613.md b/docs/reports/2026-06-08/SSDL_CONTEXT_HANDLING_PIPELINE_20260613.md similarity index 100% rename from docs/reports/SSDL_CONTEXT_HANDLING_PIPELINE_20260613.md rename to docs/reports/2026-06-08/SSDL_CONTEXT_HANDLING_PIPELINE_20260613.md diff --git a/docs/reports/SSDL_DISCUSSION_AI_TURN_CYCLE_20260613.md b/docs/reports/2026-06-08/SSDL_DISCUSSION_AI_TURN_CYCLE_20260613.md similarity index 100% rename from docs/reports/SSDL_DISCUSSION_AI_TURN_CYCLE_20260613.md rename to docs/reports/2026-06-08/SSDL_DISCUSSION_AI_TURN_CYCLE_20260613.md diff --git a/docs/reports/SSDL_MMA_CONDUCTOR_DAG_EXECUTION_LOOP_20260613.md b/docs/reports/2026-06-08/SSDL_MMA_CONDUCTOR_DAG_EXECUTION_LOOP_20260613.md similarity index 100% rename from docs/reports/SSDL_MMA_CONDUCTOR_DAG_EXECUTION_LOOP_20260613.md rename to docs/reports/2026-06-08/SSDL_MMA_CONDUCTOR_DAG_EXECUTION_LOOP_20260613.md diff --git a/docs/reports/TEST_REGRESSION_ANALYSIS_MINIMAX_OPENAI_20260613.md b/docs/reports/2026-06-08/TEST_REGRESSION_ANALYSIS_MINIMAX_OPENAI_20260613.md similarity index 100% rename from docs/reports/TEST_REGRESSION_ANALYSIS_MINIMAX_OPENAI_20260613.md rename to docs/reports/2026-06-08/TEST_REGRESSION_ANALYSIS_MINIMAX_OPENAI_20260613.md diff --git a/docs/reports/TRACK_COMPLETION_ai_client_docs_20260613.md b/docs/reports/2026-06-08/TRACK_COMPLETION_ai_client_docs_20260613.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_ai_client_docs_20260613.md rename to docs/reports/2026-06-08/TRACK_COMPLETION_ai_client_docs_20260613.md diff --git a/docs/reports/ascii_sketch_ux_workflow_20260608.md b/docs/reports/2026-06-08/ascii_sketch_ux_workflow_20260608.md similarity index 100% rename from docs/reports/ascii_sketch_ux_workflow_20260608.md rename to docs/reports/2026-06-08/ascii_sketch_ux_workflow_20260608.md diff --git a/docs/reports/batch_resilience_plan_20260608.md b/docs/reports/2026-06-08/batch_resilience_plan_20260608.md similarity index 100% rename from docs/reports/batch_resilience_plan_20260608.md rename to docs/reports/2026-06-08/batch_resilience_plan_20260608.md diff --git a/docs/reports/c11_python_interop_assessment_20260608.md b/docs/reports/2026-06-08/c11_python_interop_assessment_20260608.md similarity index 100% rename from docs/reports/c11_python_interop_assessment_20260608.md rename to docs/reports/2026-06-08/c11_python_interop_assessment_20260608.md diff --git a/docs/reports/computational_shapes_ssdl_digest_20260608.md b/docs/reports/2026-06-08/computational_shapes_ssdl_digest_20260608.md similarity index 100% rename from docs/reports/computational_shapes_ssdl_digest_20260608.md rename to docs/reports/2026-06-08/computational_shapes_ssdl_digest_20260608.md diff --git a/docs/reports/docs_sync_test_era_20260610.md b/docs/reports/2026-06-08/docs_sync_test_era_20260610.md similarity index 100% rename from docs/reports/docs_sync_test_era_20260610.md rename to docs/reports/2026-06-08/docs_sync_test_era_20260610.md diff --git a/docs/reports/doeh-session_report.md b/docs/reports/2026-06-08/doeh-session_report.md similarity index 100% rename from docs/reports/doeh-session_report.md rename to docs/reports/2026-06-08/doeh-session_report.md diff --git a/docs/reports/meta_llama_api_verification_20260611.md b/docs/reports/2026-06-08/meta_llama_api_verification_20260611.md similarity index 100% rename from docs/reports/meta_llama_api_verification_20260611.md rename to docs/reports/2026-06-08/meta_llama_api_verification_20260611.md diff --git a/docs/reports/nagent_review_session_20260612.md b/docs/reports/2026-06-08/nagent_review_session_20260612.md similarity index 100% rename from docs/reports/nagent_review_session_20260612.md rename to docs/reports/2026-06-08/nagent_review_session_20260612.md diff --git a/docs/reports/namespace_cleanup_sidetrack_report_20260611.md b/docs/reports/2026-06-08/namespace_cleanup_sidetrack_report_20260611.md similarity index 100% rename from docs/reports/namespace_cleanup_sidetrack_report_20260611.md rename to docs/reports/2026-06-08/namespace_cleanup_sidetrack_report_20260611.md diff --git a/docs/reports/proposed_new_tracks_20260608.md b/docs/reports/2026-06-08/proposed_new_tracks_20260608.md similarity index 100% rename from docs/reports/proposed_new_tracks_20260608.md rename to docs/reports/2026-06-08/proposed_new_tracks_20260608.md diff --git a/docs/reports/qwen_llama_grok_followup_audit_20260611.md b/docs/reports/2026-06-08/qwen_llama_grok_followup_audit_20260611.md similarity index 100% rename from docs/reports/qwen_llama_grok_followup_audit_20260611.md rename to docs/reports/2026-06-08/qwen_llama_grok_followup_audit_20260611.md diff --git a/docs/reports/qwen_llama_grok_followup_deferred_work_20260611.md b/docs/reports/2026-06-08/qwen_llama_grok_followup_deferred_work_20260611.md similarity index 100% rename from docs/reports/qwen_llama_grok_followup_deferred_work_20260611.md rename to docs/reports/2026-06-08/qwen_llama_grok_followup_deferred_work_20260611.md diff --git a/docs/reports/qwen_llama_grok_followup_phase5_final_20260611.md b/docs/reports/2026-06-08/qwen_llama_grok_followup_phase5_final_20260611.md similarity index 100% rename from docs/reports/qwen_llama_grok_followup_phase5_final_20260611.md rename to docs/reports/2026-06-08/qwen_llama_grok_followup_phase5_final_20260611.md diff --git a/docs/reports/qwen_llama_grok_followup_session_end_20260611.md b/docs/reports/2026-06-08/qwen_llama_grok_followup_session_end_20260611.md similarity index 100% rename from docs/reports/qwen_llama_grok_followup_session_end_20260611.md rename to docs/reports/2026-06-08/qwen_llama_grok_followup_session_end_20260611.md diff --git a/docs/reports/rag_test_batch_failure_status_20260609_pm3.md b/docs/reports/2026-06-08/rag_test_batch_failure_status_20260609_pm3.md similarity index 100% rename from docs/reports/rag_test_batch_failure_status_20260609_pm3.md rename to docs/reports/2026-06-08/rag_test_batch_failure_status_20260609_pm3.md diff --git a/docs/reports/rag_test_fix_final_20260609.md b/docs/reports/2026-06-08/rag_test_fix_final_20260609.md similarity index 100% rename from docs/reports/rag_test_fix_final_20260609.md rename to docs/reports/2026-06-08/rag_test_fix_final_20260609.md diff --git a/docs/reports/rag_work_final_20260609_pm.md b/docs/reports/2026-06-08/rag_work_final_20260609_pm.md similarity index 100% rename from docs/reports/rag_work_final_20260609_pm.md rename to docs/reports/2026-06-08/rag_work_final_20260609_pm.md diff --git a/docs/reports/session_synthesis_20260608.md b/docs/reports/2026-06-08/session_synthesis_20260608.md similarity index 100% rename from docs/reports/session_synthesis_20260608.md rename to docs/reports/2026-06-08/session_synthesis_20260608.md diff --git a/docs/reports/test_bed_health_20260609.md b/docs/reports/2026-06-08/test_bed_health_20260609.md similarity index 100% rename from docs/reports/test_bed_health_20260609.md rename to docs/reports/2026-06-08/test_bed_health_20260609.md diff --git a/docs/reports/test_full_live_workflow_imgui_assert_20260608.md b/docs/reports/2026-06-08/test_full_live_workflow_imgui_assert_20260608.md similarity index 100% rename from docs/reports/test_full_live_workflow_imgui_assert_20260608.md rename to docs/reports/2026-06-08/test_full_live_workflow_imgui_assert_20260608.md diff --git a/docs/reports/test_full_live_workflow_progress_20260608_pm.md b/docs/reports/2026-06-08/test_full_live_workflow_progress_20260608_pm.md similarity index 100% rename from docs/reports/test_full_live_workflow_progress_20260608_pm.md rename to docs/reports/2026-06-08/test_full_live_workflow_progress_20260608_pm.md diff --git a/docs/reports/test_full_live_workflow_propagation_digest_20260608.md b/docs/reports/2026-06-08/test_full_live_workflow_propagation_digest_20260608.md similarity index 100% rename from docs/reports/test_full_live_workflow_propagation_digest_20260608.md rename to docs/reports/2026-06-08/test_full_live_workflow_propagation_digest_20260608.md diff --git a/docs/reports/test_full_live_workflow_root_cause_20260608.md b/docs/reports/2026-06-08/test_full_live_workflow_root_cause_20260608.md similarity index 100% rename from docs/reports/test_full_live_workflow_root_cause_20260608.md rename to docs/reports/2026-06-08/test_full_live_workflow_root_cause_20260608.md diff --git a/docs/reports/test_infra_hardening_foundation_20260608.md b/docs/reports/2026-06-08/test_infra_hardening_foundation_20260608.md similarity index 100% rename from docs/reports/test_infra_hardening_foundation_20260608.md rename to docs/reports/2026-06-08/test_infra_hardening_foundation_20260608.md diff --git a/docs/reports/test_infrastructure_hardening_batch_green_20260610.md b/docs/reports/2026-06-08/test_infrastructure_hardening_batch_green_20260610.md similarity index 100% rename from docs/reports/test_infrastructure_hardening_batch_green_20260610.md rename to docs/reports/2026-06-08/test_infrastructure_hardening_batch_green_20260610.md diff --git a/docs/reports/test_rag_batch_failure_investigation_20260608_pm2.md b/docs/reports/2026-06-08/test_rag_batch_failure_investigation_20260608_pm2.md similarity index 100% rename from docs/reports/test_rag_batch_failure_investigation_20260608_pm2.md rename to docs/reports/2026-06-08/test_rag_batch_failure_investigation_20260608_pm2.md diff --git a/docs/reports/test_rag_batch_failure_status_20260608.md b/docs/reports/2026-06-08/test_rag_batch_failure_status_20260608.md similarity index 100% rename from docs/reports/test_rag_batch_failure_status_20260608.md rename to docs/reports/2026-06-08/test_rag_batch_failure_status_20260608.md diff --git a/docs/reports/workflow_markdown_audit_20260608.md b/docs/reports/2026-06-08/workflow_markdown_audit_20260608.md similarity index 100% rename from docs/reports/workflow_markdown_audit_20260608.md rename to docs/reports/2026-06-08/workflow_markdown_audit_20260608.md diff --git a/docs/reports/ANY_TYPE_AUDIT_20260621.md b/docs/reports/2026-06-15/ANY_TYPE_AUDIT_20260621.md similarity index 100% rename from docs/reports/ANY_TYPE_AUDIT_20260621.md rename to docs/reports/2026-06-15/ANY_TYPE_AUDIT_20260621.md diff --git a/docs/reports/CAMPAIGN_CLOSE_OUT_video_analysis_20260621.md b/docs/reports/2026-06-15/CAMPAIGN_CLOSE_OUT_video_analysis_20260621.md similarity index 100% rename from docs/reports/CAMPAIGN_CLOSE_OUT_video_analysis_20260621.md rename to docs/reports/2026-06-15/CAMPAIGN_CLOSE_OUT_video_analysis_20260621.md diff --git a/docs/reports/CHRONOLOGY_MIGRATION_20260619.md b/docs/reports/2026-06-15/CHRONOLOGY_MIGRATION_20260619.md similarity index 100% rename from docs/reports/CHRONOLOGY_MIGRATION_20260619.md rename to docs/reports/2026-06-15/CHRONOLOGY_MIGRATION_20260619.md diff --git a/docs/reports/CHRONOLOGY_TRACK_HANDOVER_20260620.md b/docs/reports/2026-06-15/CHRONOLOGY_TRACK_HANDOVER_20260620.md similarity index 100% rename from docs/reports/CHRONOLOGY_TRACK_HANDOVER_20260620.md rename to docs/reports/2026-06-15/CHRONOLOGY_TRACK_HANDOVER_20260620.md diff --git a/docs/reports/EXCEPTION_HANDLING_AUDIT_20260616.md b/docs/reports/2026-06-15/EXCEPTION_HANDLING_AUDIT_20260616.md similarity index 100% rename from docs/reports/EXCEPTION_HANDLING_AUDIT_20260616.md rename to docs/reports/2026-06-15/EXCEPTION_HANDLING_AUDIT_20260616.md diff --git a/docs/reports/NEGATIVE_FLOWS_INVESTIGATION_20260617.md b/docs/reports/2026-06-15/NEGATIVE_FLOWS_INVESTIGATION_20260617.md similarity index 100% rename from docs/reports/NEGATIVE_FLOWS_INVESTIGATION_20260617.md rename to docs/reports/2026-06-15/NEGATIVE_FLOWS_INVESTIGATION_20260617.md diff --git a/docs/reports/NEGATIVE_FLOWS_INVESTIGATION_20260617_REFINED.md b/docs/reports/2026-06-15/NEGATIVE_FLOWS_INVESTIGATION_20260617_REFINED.md similarity index 100% rename from docs/reports/NEGATIVE_FLOWS_INVESTIGATION_20260617_REFINED.md rename to docs/reports/2026-06-15/NEGATIVE_FLOWS_INVESTIGATION_20260617_REFINED.md diff --git a/docs/reports/PHASE12_AUDIT_POST_FIX_20260617.json b/docs/reports/2026-06-15/PHASE12_AUDIT_POST_FIX_20260617.json similarity index 100% rename from docs/reports/PHASE12_AUDIT_POST_FIX_20260617.json rename to docs/reports/2026-06-15/PHASE12_AUDIT_POST_FIX_20260617.json diff --git a/docs/reports/PHASE12_TRIAGE_20260617.md b/docs/reports/2026-06-15/PHASE12_TRIAGE_20260617.md similarity index 100% rename from docs/reports/PHASE12_TRIAGE_20260617.md rename to docs/reports/2026-06-15/PHASE12_TRIAGE_20260617.md diff --git a/docs/reports/PHASE3_HYPOTHETICAL_PROMOTION.md b/docs/reports/2026-06-15/PHASE3_HYPOTHETICAL_PROMOTION.md similarity index 100% rename from docs/reports/PHASE3_HYPOTHETICAL_PROMOTION.md rename to docs/reports/2026-06-15/PHASE3_HYPOTHETICAL_PROMOTION.md diff --git a/docs/reports/PHASE6_ADDENDUM_result_migration_app_controller_20260618.md b/docs/reports/2026-06-15/PHASE6_ADDENDUM_result_migration_app_controller_20260618.md similarity index 100% rename from docs/reports/PHASE6_ADDENDUM_result_migration_app_controller_20260618.md rename to docs/reports/2026-06-15/PHASE6_ADDENDUM_result_migration_app_controller_20260618.md diff --git a/docs/reports/POST_CAMPAIGN_TEST_FIXES_3_FAILURES_20260621.md b/docs/reports/2026-06-15/POST_CAMPAIGN_TEST_FIXES_3_FAILURES_20260621.md similarity index 100% rename from docs/reports/POST_CAMPAIGN_TEST_FIXES_3_FAILURES_20260621.md rename to docs/reports/2026-06-15/POST_CAMPAIGN_TEST_FIXES_3_FAILURES_20260621.md diff --git a/docs/reports/POST_MORTEM_result_migration_cruft_removal_20260620.md b/docs/reports/2026-06-15/POST_MORTEM_result_migration_cruft_removal_20260620.md similarity index 100% rename from docs/reports/POST_MORTEM_result_migration_cruft_removal_20260620.md rename to docs/reports/2026-06-15/POST_MORTEM_result_migration_cruft_removal_20260620.md diff --git a/docs/reports/PROCESS_IMPROVEMENT_FALSE_COMPLETION_CLAIMS_20260621.md b/docs/reports/2026-06-15/PROCESS_IMPROVEMENT_FALSE_COMPLETION_CLAIMS_20260621.md similarity index 100% rename from docs/reports/PROCESS_IMPROVEMENT_FALSE_COMPLETION_CLAIMS_20260621.md rename to docs/reports/2026-06-15/PROCESS_IMPROVEMENT_FALSE_COMPLETION_CLAIMS_20260621.md diff --git a/docs/reports/PROGRESS_REPORT_result_migration_baseline_cleanup_20260620.md b/docs/reports/2026-06-15/PROGRESS_REPORT_result_migration_baseline_cleanup_20260620.md similarity index 100% rename from docs/reports/PROGRESS_REPORT_result_migration_baseline_cleanup_20260620.md rename to docs/reports/2026-06-15/PROGRESS_REPORT_result_migration_baseline_cleanup_20260620.md diff --git a/docs/reports/RESULT_MIGRATION_CAMPAIGN_STATUS_20260619.md b/docs/reports/2026-06-15/RESULT_MIGRATION_CAMPAIGN_STATUS_20260619.md similarity index 100% rename from docs/reports/RESULT_MIGRATION_CAMPAIGN_STATUS_20260619.md rename to docs/reports/2026-06-15/RESULT_MIGRATION_CAMPAIGN_STATUS_20260619.md diff --git a/docs/reports/RESULT_MIGRATION_REVIEW_PASS_20260617.md b/docs/reports/2026-06-15/RESULT_MIGRATION_REVIEW_PASS_20260617.md similarity index 100% rename from docs/reports/RESULT_MIGRATION_REVIEW_PASS_20260617.md rename to docs/reports/2026-06-15/RESULT_MIGRATION_REVIEW_PASS_20260617.md diff --git a/docs/reports/RESULT_MIGRATION_SMALL_FILES_20260617.md b/docs/reports/2026-06-15/RESULT_MIGRATION_SMALL_FILES_20260617.md similarity index 100% rename from docs/reports/RESULT_MIGRATION_SMALL_FILES_20260617.md rename to docs/reports/2026-06-15/RESULT_MIGRATION_SMALL_FILES_20260617.md diff --git a/docs/reports/RESULT_MIGRATION_SMALL_FILES_PHASE10_SITES.md b/docs/reports/2026-06-15/RESULT_MIGRATION_SMALL_FILES_PHASE10_SITES.md similarity index 100% rename from docs/reports/RESULT_MIGRATION_SMALL_FILES_PHASE10_SITES.md rename to docs/reports/2026-06-15/RESULT_MIGRATION_SMALL_FILES_PHASE10_SITES.md diff --git a/docs/reports/RESULT_MIGRATION_SUB_TRACK_2_PHASE12_REPORT_20260617.md b/docs/reports/2026-06-15/RESULT_MIGRATION_SUB_TRACK_2_PHASE12_REPORT_20260617.md similarity index 100% rename from docs/reports/RESULT_MIGRATION_SUB_TRACK_2_PHASE12_REPORT_20260617.md rename to docs/reports/2026-06-15/RESULT_MIGRATION_SUB_TRACK_2_PHASE12_REPORT_20260617.md diff --git a/docs/reports/RESULT_MIGRATION_SUB_TRACK_2_STATUS_20260617.md b/docs/reports/2026-06-15/RESULT_MIGRATION_SUB_TRACK_2_STATUS_20260617.md similarity index 100% rename from docs/reports/RESULT_MIGRATION_SUB_TRACK_2_STATUS_20260617.md rename to docs/reports/2026-06-15/RESULT_MIGRATION_SUB_TRACK_2_STATUS_20260617.md diff --git a/docs/reports/SESSION_REPORT_20260616.md b/docs/reports/2026-06-15/SESSION_REPORT_20260616.md similarity index 100% rename from docs/reports/SESSION_REPORT_20260616.md rename to docs/reports/2026-06-15/SESSION_REPORT_20260616.md diff --git a/docs/reports/SESSION_REPORT_superpowers_review_init_20260619.md b/docs/reports/2026-06-15/SESSION_REPORT_superpowers_review_init_20260619.md similarity index 100% rename from docs/reports/SESSION_REPORT_superpowers_review_init_20260619.md rename to docs/reports/2026-06-15/SESSION_REPORT_superpowers_review_init_20260619.md diff --git a/docs/reports/STATUS_REPORT_phase6_compact.md b/docs/reports/2026-06-15/STATUS_REPORT_phase6_compact.md similarity index 100% rename from docs/reports/STATUS_REPORT_phase6_compact.md rename to docs/reports/2026-06-15/STATUS_REPORT_phase6_compact.md diff --git a/docs/reports/THEME_BUG_ANALYSIS_send_result_to_send_20260616.md b/docs/reports/2026-06-15/THEME_BUG_ANALYSIS_send_result_to_send_20260616.md similarity index 100% rename from docs/reports/THEME_BUG_ANALYSIS_send_result_to_send_20260616.md rename to docs/reports/2026-06-15/THEME_BUG_ANALYSIS_send_result_to_send_20260616.md diff --git a/docs/reports/TIER1_REVIEW_phase9_dilemma_20260620.md b/docs/reports/2026-06-15/TIER1_REVIEW_phase9_dilemma_20260620.md similarity index 100% rename from docs/reports/TIER1_REVIEW_phase9_dilemma_20260620.md rename to docs/reports/2026-06-15/TIER1_REVIEW_phase9_dilemma_20260620.md diff --git a/docs/reports/TRACK_COMPLETION_ai_loop_regressions_20260615.md b/docs/reports/2026-06-15/TRACK_COMPLETION_ai_loop_regressions_20260615.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_ai_loop_regressions_20260615.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_ai_loop_regressions_20260615.md diff --git a/docs/reports/TRACK_COMPLETION_chronology_20260619.md b/docs/reports/2026-06-15/TRACK_COMPLETION_chronology_20260619.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_chronology_20260619.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_chronology_20260619.md diff --git a/docs/reports/TRACK_COMPLETION_doeh_test_thinking_cleanup_20260615.md b/docs/reports/2026-06-15/TRACK_COMPLETION_doeh_test_thinking_cleanup_20260615.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_doeh_test_thinking_cleanup_20260615.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_doeh_test_thinking_cleanup_20260615.md diff --git a/docs/reports/TRACK_COMPLETION_live_gui_test_fixes_20260618.md b/docs/reports/2026-06-15/TRACK_COMPLETION_live_gui_test_fixes_20260618.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_live_gui_test_fixes_20260618.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_live_gui_test_fixes_20260618.md diff --git a/docs/reports/TRACK_COMPLETION_nagent_review_v3_1_20260620.md b/docs/reports/2026-06-15/TRACK_COMPLETION_nagent_review_v3_1_20260620.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_nagent_review_v3_1_20260620.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_nagent_review_v3_1_20260620.md diff --git a/docs/reports/TRACK_COMPLETION_phase2_4_5_call_site_completion_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_phase2_4_5_call_site_completion_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_phase2_4_5_call_site_completion_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_phase2_4_5_call_site_completion_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_public_api_migration_and_ui_polish_20260615.md b/docs/reports/2026-06-15/TRACK_COMPLETION_public_api_migration_and_ui_polish_20260615.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_public_api_migration_and_ui_polish_20260615.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_public_api_migration_and_ui_polish_20260615.md diff --git a/docs/reports/TRACK_COMPLETION_rag_test_failures_20260615.md b/docs/reports/2026-06-15/TRACK_COMPLETION_rag_test_failures_20260615.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_rag_test_failures_20260615.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_rag_test_failures_20260615.md diff --git a/docs/reports/TRACK_COMPLETION_result_migration_app_controller_20260618.md b/docs/reports/2026-06-15/TRACK_COMPLETION_result_migration_app_controller_20260618.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_result_migration_app_controller_20260618.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_result_migration_app_controller_20260618.md diff --git a/docs/reports/TRACK_COMPLETION_result_migration_baseline_cleanup_20260620.md b/docs/reports/2026-06-15/TRACK_COMPLETION_result_migration_baseline_cleanup_20260620.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_result_migration_baseline_cleanup_20260620.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_result_migration_baseline_cleanup_20260620.md diff --git a/docs/reports/TRACK_COMPLETION_result_migration_cruft_removal_20260620.md b/docs/reports/2026-06-15/TRACK_COMPLETION_result_migration_cruft_removal_20260620.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_result_migration_cruft_removal_20260620.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_result_migration_cruft_removal_20260620.md diff --git a/docs/reports/TRACK_COMPLETION_result_migration_gui_2_20260619.md b/docs/reports/2026-06-15/TRACK_COMPLETION_result_migration_gui_2_20260619.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_result_migration_gui_2_20260619.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_result_migration_gui_2_20260619.md diff --git a/docs/reports/TRACK_COMPLETION_result_migration_review_pass_20260617.md b/docs/reports/2026-06-15/TRACK_COMPLETION_result_migration_review_pass_20260617.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_result_migration_review_pass_20260617.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_result_migration_review_pass_20260617.md diff --git a/docs/reports/TRACK_COMPLETION_result_migration_small_files_20260617.md b/docs/reports/2026-06-15/TRACK_COMPLETION_result_migration_small_files_20260617.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_result_migration_small_files_20260617.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_result_migration_small_files_20260617.md diff --git a/docs/reports/TRACK_COMPLETION_send_result_to_send_20260616.md b/docs/reports/2026-06-15/TRACK_COMPLETION_send_result_to_send_20260616.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_send_result_to_send_20260616.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_send_result_to_send_20260616.md diff --git a/docs/reports/TRACK_COMPLETION_test_sandbox_hardening_20260619.md b/docs/reports/2026-06-15/TRACK_COMPLETION_test_sandbox_hardening_20260619.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_test_sandbox_hardening_20260619.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_test_sandbox_hardening_20260619.md diff --git a/docs/reports/TRACK_COMPLETION_tier2_autonomous_sandbox_20260616.md b/docs/reports/2026-06-15/TRACK_COMPLETION_tier2_autonomous_sandbox_20260616.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_tier2_autonomous_sandbox_20260616.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_tier2_autonomous_sandbox_20260616.md diff --git a/docs/reports/TRACK_COMPLETION_tier2_leak_prevention_20260620.md b/docs/reports/2026-06-15/TRACK_COMPLETION_tier2_leak_prevention_20260620.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_tier2_leak_prevention_20260620.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_tier2_leak_prevention_20260620.md diff --git a/docs/reports/TRACK_COMPLETION_tier2_no_appdata_20260618.md b/docs/reports/2026-06-15/TRACK_COMPLETION_tier2_no_appdata_20260618.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_tier2_no_appdata_20260618.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_tier2_no_appdata_20260618.md diff --git a/docs/reports/TRACK_COMPLETION_tier2_sandbox_hardening_20260617.md b/docs/reports/2026-06-15/TRACK_COMPLETION_tier2_sandbox_hardening_20260617.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_tier2_sandbox_hardening_20260617.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_tier2_sandbox_hardening_20260617.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_brain_counterintuitive_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_brain_counterintuitive_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_brain_counterintuitive_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_brain_counterintuitive_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_campaign_20260621_phase0_1_2init.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_campaign_20260621_phase0_1_2init.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_campaign_20260621_phase0_1_2init.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_campaign_20260621_phase0_1_2init.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_creikey_dl_cv_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_creikey_dl_cv_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_creikey_dl_cv_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_creikey_dl_cv_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_cs229_building_llms_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_cs229_building_llms_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_cs229_building_llms_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_cs229_building_llms_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_cs336_architectures_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_cs336_architectures_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_cs336_architectures_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_cs336_architectures_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_deob_apply_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_deob_apply_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_deob_apply_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_deob_apply_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_deob_lexicon_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_deob_lexicon_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_deob_lexicon_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_deob_lexicon_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_deob_pilot_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_deob_pilot_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_deob_pilot_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_deob_pilot_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_deob_warmup_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_deob_warmup_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_deob_warmup_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_deob_warmup_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_entropy_epiplexity_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_entropy_epiplexity_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_entropy_epiplexity_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_entropy_epiplexity_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_free_lunches_levin_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_free_lunches_levin_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_free_lunches_levin_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_free_lunches_levin_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_generic_systems_fields_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_generic_systems_fields_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_generic_systems_fields_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_generic_systems_fields_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_multiscale_hoffman_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_multiscale_hoffman_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_multiscale_hoffman_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_multiscale_hoffman_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_neural_dynamics_miller_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_neural_dynamics_miller_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_neural_dynamics_miller_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_neural_dynamics_miller_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_platonic_intelligence_kumar_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_platonic_intelligence_kumar_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_platonic_intelligence_kumar_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_platonic_intelligence_kumar_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_probability_logic_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_probability_logic_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_probability_logic_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_probability_logic_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_score_dynamics_giorgini_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_score_dynamics_giorgini_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_score_dynamics_giorgini_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_score_dynamics_giorgini_20260621.md diff --git a/docs/reports/TRACK_COMPLETION_video_analysis_synthesis_20260621.md b/docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_synthesis_20260621.md similarity index 100% rename from docs/reports/TRACK_COMPLETION_video_analysis_synthesis_20260621.md rename to docs/reports/2026-06-15/TRACK_COMPLETION_video_analysis_synthesis_20260621.md diff --git a/docs/reports/license_cve_audit/2026-06-07/final.md b/docs/reports/license_cve_audit/final.md similarity index 100% rename from docs/reports/license_cve_audit/2026-06-07/final.md rename to docs/reports/license_cve_audit/final.md diff --git a/docs/reports/license_cve_audit/2026-06-07/initial.md b/docs/reports/license_cve_audit/initial.md similarity index 100% rename from docs/reports/license_cve_audit/2026-06-07/initial.md rename to docs/reports/license_cve_audit/initial.md diff --git a/scripts/organize_reports.py b/scripts/organize_reports.py new file mode 100644 index 00000000..b8db87f3 --- /dev/null +++ b/scripts/organize_reports.py @@ -0,0 +1,164 @@ +"""Organize docs/reports/ files into week folders named --
. + +Scheme: ShareX-style -- where resolves to the day-of-month +(zero-padded) of the week's Monday. Example: files from the week of Mon +June 22, 2026 land in docs/reports/2026-06-22/. + +Only moves files from OLD weeks; the current week's files stay in place +so in-flight work isn't buried. + +Date resolution per file: + 1. Parse an 8-digit YYYYMMDD or YYYY-MM-DD substring from the filename + (most reports embed one, e.g. TRACK_COMPLETION_..._20260627.md). + 2. Fall back to the file's mtime. NOTE: git checkout resets mtime, so + undated files may be mis-classified as 'current week' and left in place. + +Idempotent: subdirectories (existing week folders) are skipped; files +already at their computed destination are skipped. + +Non-recursive: only immediate children of the reports dir are scanned. +Subdirectories (existing week folders, category folders like +code_path_audit/ + license_cve_audit/) are never entered; their contents +are left untouched. Only loose .md (or other) files in the immediate +reports dir are candidates for moving. + +Usage: + python scripts/organize_reports.py # dry-run; print plan + python scripts/organize_reports.py --apply # actually move files + python scripts/organize_reports.py --json # dry-run; JSON output + python scripts/organize_reports.py --dir docs/reports +""" +from __future__ import annotations +import argparse +import json +import re +import shutil +import sys +from datetime import date, timedelta +from pathlib import Path + +_DATE_REGEX = re.compile(r"(\d{4})[-_]?(\d{2})[-_]?(\d{2})") + + +def _week_monday(d: date) -> date: + return d - timedelta(days=d.weekday()) + + +def _parse_filename_date(name: str) -> date | None: + m = _DATE_REGEX.search(name) + if not m: + return None + y, mo, dy = int(m.group(1)), int(m.group(2)), int(m.group(3)) + if not (2000 <= y < 2100 and 1 <= mo <= 12 and 1 <= dy <= 31): + return None + try: + return date(y, mo, dy) + except ValueError: + return None + + +def _file_week_monday(path: Path) -> date | None: + fd = _parse_filename_date(path.name) + if fd is not None: + return _week_monday(fd) + try: + mt = path.stat().st_mtime + except OSError: + return None + return _week_monday(date.fromtimestamp(mt)) + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Organize docs/reports/ files into week folders named --
(Monday of the file's week). Old weeks only; current week's files stay in place." + ) + parser.add_argument("--apply", action="store_true", help="Actually move files (default: dry-run)") + parser.add_argument("--json", action="store_true", help="Emit JSON instead of human-readable output") + parser.add_argument("--dir", default="docs/reports", help="Reports directory (default: docs/reports)") + args = parser.parse_args() + + reports = Path(args.dir) + if not reports.is_dir(): + print(f"error: {reports} is not a directory", file=sys.stderr) + return 1 + + subdirs_skipped = [e.name for e in sorted(reports.iterdir(), key=lambda e: e.name) if e.is_dir()] + current_monday = _week_monday(date.today()) + moves: list[dict] = [] + skipped_current_week: list[str] = [] + skipped_no_date: list[str] = [] + skipped_already_in_place: list[str] = [] + + for entry in sorted(reports.iterdir(), key=lambda e: e.name): + if entry.is_dir(): + continue + if not entry.is_file(): + continue + fm = _file_week_monday(entry) + if fm is None: + skipped_no_date.append(entry.name) + continue + if fm >= current_monday: + skipped_current_week.append(entry.name) + continue + dest_dir = reports / fm.isoformat() + dest = dest_dir / entry.name + if dest.exists(): + skipped_already_in_place.append(entry.name) + continue + moves.append({"src": str(entry), "dest": str(dest), "week": fm.isoformat()}) + + if args.json: + print(json.dumps({ + "current_week_monday": current_monday.isoformat(), + "reports_dir": str(reports), + "moves": moves, + "skipped_current_week": skipped_current_week, + "skipped_no_date": skipped_no_date, + "skipped_already_in_place": skipped_already_in_place, + "subdirs_skipped": subdirs_skipped, + }, indent=2)) + return 0 + + print(f"Current week Monday: {current_monday.isoformat()} (files from this week stay put)") + print(f"Reports dir: {reports}") + print() + if not moves: + print("Nothing to move.") + else: + tag = "MOVE" if args.apply else "DRY " + print(f"{tag}: {len(moves)} file(s)") + for mv in moves: + print(f" {tag} {mv['src']} -> {mv['dest']}") + + if skipped_current_week: + print(f"\nSkipped (current week): {len(skipped_current_week)}") + for n in skipped_current_week: + print(f" - {n}") + if skipped_already_in_place: + print(f"\nSkipped (already in destination): {len(skipped_already_in_place)}") + for n in skipped_already_in_place: + print(f" - {n}") + if skipped_no_date: + print(f"\nSkipped (no parseable date): {len(skipped_no_date)}") + for n in skipped_no_date: + print(f" - {n}") + if subdirs_skipped: + print(f"\nSubdirectories skipped (non-recursive; left untouched): {len(subdirs_skipped)}") + for n in subdirs_skipped: + print(f" - {n}") + + if args.apply and moves: + for mv in moves: + dest_path = Path(mv["dest"]) + dest_path.parent.mkdir(parents=True, exist_ok=True) + shutil.move(mv["src"], mv["dest"]) + print(f"\nApplied: moved {len(moves)} file(s).") + elif moves: + print("\nDry run. Pass --apply to move.") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file