Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| ca185235e9 | |||
| af17a0f9ee | |||
| 0d6c58916f | |||
| 01f7bccc6f | |||
| 423f260aba | |||
| 7a96d0264d | |||
| 1997a0d21c | |||
| 01f664ecd8 | |||
| 77b702265d | |||
| cba6e7d7ee | |||
| 0677bb50ad | |||
| 933caf439f | |||
| b1ee947b32 | |||
| 0a65056fc5 | |||
| 5380b7153d | |||
| 01b6c68e20 | |||
| 8f6ae6d983 | |||
| cf7ef3fc66 | |||
| 805a06197b | |||
| ea55b10d57 | |||
| eddb359713 | |||
| 1caeca4ec4 |
@@ -1,23 +0,0 @@
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
def run_diag(role: str, prompt: str) -> str:
|
||||
print(f"--- Running Diag for {role} ---")
|
||||
cmd = [sys.executable, "scripts/mma_exec.py", "--role", role, prompt]
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
|
||||
print("STDOUT:")
|
||||
print(result.stdout)
|
||||
print("STDERR:")
|
||||
print(result.stderr)
|
||||
return result.stdout
|
||||
except Exception as e:
|
||||
print(f"FAILED: {e}")
|
||||
return str(e)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test 1: Simple read
|
||||
print("TEST 1: read_file")
|
||||
run_diag("tier3-worker", "Read the file 'pyproject.toml' and tell me the version of the project. ONLY the version string.")
|
||||
print("\nTEST 2: run_shell_command")
|
||||
run_diag("tier3-worker", "Use run_shell_command to execute 'echo HELLO_SUBAGENT' and return the output. ONLY the output.")
|
||||
@@ -1,64 +0,0 @@
|
||||
import unittest
|
||||
from unittest.mock import MagicMock, patch
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Ensure project root is in path so we can import src.gui_2
|
||||
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
if project_root not in sys.path:
|
||||
sys.path.insert(0, project_root)
|
||||
|
||||
class TestMarkdownTableWidth(unittest.TestCase):
|
||||
def test_render_discussion_entry_full_width(self):
|
||||
"""
|
||||
Verify that render_discussion_entry calls imgui.dummy with the full available width.
|
||||
"""
|
||||
# Mock all dependencies to avoid side effects and complex setup during import/execution
|
||||
with patch('src.gui_2.imgui') as mock_imgui, \
|
||||
patch('src.gui_2.imscope') as mock_imscope, \
|
||||
patch('src.gui_2.theme') as mock_theme, \
|
||||
patch('src.gui_2.project_manager') as mock_pm, \
|
||||
patch('src.gui_2.render_thinking_trace') as mock_rtt, \
|
||||
patch('src.gui_2.render_discussion_entry_read_mode') as mock_rderm:
|
||||
|
||||
# 1. Setup available width and coordinates
|
||||
expected_width = 850.0
|
||||
mock_avail = MagicMock()
|
||||
mock_avail.x = expected_width
|
||||
mock_imgui.get_content_region_avail.return_value = mock_avail
|
||||
|
||||
# Mock ImVec2 to return a simple tuple for easier assertion
|
||||
mock_imgui.ImVec2.side_effect = lambda x, y: (x, y)
|
||||
|
||||
# 3. Mock app and entry state
|
||||
mock_app = MagicMock()
|
||||
mock_app.disc_roles = ["User", "Assistant"]
|
||||
|
||||
entry = {
|
||||
"role": "User",
|
||||
"content": "Hello world",
|
||||
"collapsed": False,
|
||||
"read_mode": False
|
||||
}
|
||||
|
||||
# Mock interactive elements
|
||||
mock_imgui.begin_combo.return_value = False
|
||||
mock_imgui.button.return_value = False
|
||||
mock_imgui.input_text_multiline.return_value = (False, entry["content"])
|
||||
|
||||
# 4. Import the function within the patch context
|
||||
from src.gui_2 import render_discussion_entry
|
||||
|
||||
# 5. Execute the function
|
||||
render_discussion_entry(mock_app, entry, 0)
|
||||
|
||||
# 6. Verification
|
||||
# The function should call imgui.dummy(imgui.ImVec2(full_width, 0))
|
||||
mock_imgui.dummy.assert_any_call((expected_width, 0.0))
|
||||
|
||||
# CRITICAL: Verify newline or spacing is called to prevent squashing
|
||||
# We expect this to fail currently
|
||||
assert mock_imgui.new_line.called or mock_imgui.spacing.called
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -1,33 +0,0 @@
|
||||
import inspect
|
||||
import sys
|
||||
import os
|
||||
import pytest
|
||||
|
||||
# Ensure project root is in path
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
|
||||
def test_gui_monolithic_symbols():
|
||||
try:
|
||||
from src.gui_2 import App, render_discussion_entry, render_thinking_trace
|
||||
import src.gui_2
|
||||
except ImportError as e:
|
||||
pytest.fail(f"FAILURE: Could not import from src.gui_2: {e}")
|
||||
|
||||
# Verify App is importable
|
||||
assert App is not None
|
||||
|
||||
# Verify render_discussion_entry is in src.gui_2
|
||||
assert hasattr(src.gui_2, 'render_discussion_entry'), "render_discussion_entry missing from src.gui_2"
|
||||
|
||||
# Verify it's defined in src.gui_2, not imported
|
||||
mod = inspect.getmodule(render_discussion_entry)
|
||||
assert mod is not None, "Could not determine module for render_discussion_entry"
|
||||
assert mod.__name__ == 'src.gui_2', f"render_discussion_entry expected in src.gui_2, but found in {mod.__name__}"
|
||||
|
||||
# Verify render_thinking_trace is in src.gui_2
|
||||
assert hasattr(src.gui_2, 'render_thinking_trace'), "render_thinking_trace missing from src.gui_2"
|
||||
|
||||
# Verify it's defined in src.gui_2, not imported
|
||||
mod = inspect.getmodule(render_thinking_trace)
|
||||
assert mod is not None, "Could not determine module for render_thinking_trace"
|
||||
assert mod.__name__ == 'src.gui_2', f"render_thinking_trace expected in src.gui_2, but found in {mod.__name__}"
|
||||
@@ -1,29 +0,0 @@
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
from src.imgui_scopes import _ScopeId
|
||||
import src.imgui_scopes as imgui_scopes
|
||||
|
||||
def test_scope_id_string():
|
||||
with patch('src.imgui_scopes.imgui') as mock_imgui:
|
||||
sid = _ScopeId("test_id")
|
||||
with sid:
|
||||
pass
|
||||
mock_imgui.push_id.assert_called_once_with("test_id")
|
||||
mock_imgui.pop_id.assert_called_once()
|
||||
|
||||
def test_scope_id_int():
|
||||
with patch('src.imgui_scopes.imgui') as mock_imgui:
|
||||
# Python type hint is str, but we test runtime resilience
|
||||
sid = _ScopeId(1234)
|
||||
with sid:
|
||||
pass
|
||||
# Verify it was converted to string to prevent low-level crashes
|
||||
mock_imgui.push_id.assert_called_once_with("1234")
|
||||
mock_imgui.pop_id.assert_called_once()
|
||||
|
||||
def test_id_helper_function():
|
||||
with patch('src.imgui_scopes.imgui') as mock_imgui:
|
||||
with imgui_scopes.id(42):
|
||||
pass
|
||||
mock_imgui.push_id.assert_called_once_with("42")
|
||||
mock_imgui.pop_id.assert_called_once()
|
||||
@@ -1,60 +0,0 @@
|
||||
import subprocess
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
def run_ps_script(role: str, prompt: str) -> subprocess.CompletedProcess:
|
||||
"""Helper to run the run_subagent.ps1 script."""
|
||||
# Using -File is safer and handles arguments better
|
||||
cmd = [
|
||||
"powershell", "-NoProfile", "-ExecutionPolicy", "Bypass",
|
||||
"-File", "./scripts/run_subagent.ps1",
|
||||
"-Role", role,
|
||||
"-Prompt", prompt
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.stdout:
|
||||
print(f"\n[Sub-Agent {role} Output]:\n{result.stdout}")
|
||||
if result.stderr:
|
||||
print(f"\n[Sub-Agent {role} Error]:\n{result.stderr}")
|
||||
return result
|
||||
|
||||
@patch('subprocess.run')
|
||||
def test_subagent_script_qa_live(mock_run) -> None:
|
||||
"""Verify that the QA role works and returns a compressed fix."""
|
||||
mock_run.return_value = MagicMock(returncode=0, stdout='Fix the division by zero error.', stderr='')
|
||||
prompt = "Traceback (most recent call last): File 'test.py', line 1, in <module> 1/0 ZeroDivisionError: division by zero"
|
||||
result = run_ps_script("QA", prompt)
|
||||
assert result.returncode == 0
|
||||
# Expected output should mention the fix for division by zero
|
||||
assert "zero" in result.stdout.lower()
|
||||
# It should be short (QA agents compress)
|
||||
assert len(result.stdout.split()) < 40
|
||||
|
||||
@patch('subprocess.run')
|
||||
def test_subagent_script_worker_live(mock_run) -> None:
|
||||
"""Verify that the Worker role works and returns code."""
|
||||
mock_run.return_value = MagicMock(returncode=0, stdout='def hello(): return "hello world"', stderr='')
|
||||
prompt = "Write a python function that returns 'hello world'"
|
||||
result = run_ps_script("Worker", prompt)
|
||||
assert result.returncode == 0
|
||||
assert "def" in result.stdout.lower()
|
||||
assert "hello" in result.stdout.lower()
|
||||
|
||||
@patch('subprocess.run')
|
||||
def test_subagent_script_utility_live(mock_run) -> None:
|
||||
"""Verify that the Utility role works."""
|
||||
mock_run.return_value = MagicMock(returncode=0, stdout='True', stderr='')
|
||||
prompt = "Tell me 'True' if 1+1=2, otherwise 'False'"
|
||||
result = run_ps_script("Utility", prompt)
|
||||
assert result.returncode == 0
|
||||
assert "true" in result.stdout.lower()
|
||||
|
||||
@patch('subprocess.run')
|
||||
def test_subagent_isolation_live(mock_run) -> None:
|
||||
"""Verify that the sub-agent is stateless and does not see the parent's conversation context."""
|
||||
mock_run.return_value = MagicMock(returncode=0, stdout='UNKNOWN', stderr='')
|
||||
# This prompt asks the sub-agent about a 'secret' mentioned only here, not in its prompt.
|
||||
prompt = "What is the secret code I just told you? If I didn't tell you, say 'UNKNOWN'."
|
||||
result = run_ps_script("Utility", prompt)
|
||||
assert result.returncode == 0
|
||||
# A stateless agent should not know any previous context.
|
||||
assert "unknown" in result.stdout.lower()
|
||||
@@ -1,140 +0,0 @@
|
||||
import pytest
|
||||
import os
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
from scripts.mma_exec import create_parser, get_role_documents, execute_agent, get_model_for_role, get_dependencies
|
||||
|
||||
def test_parser_role_choices() -> None:
|
||||
"""Test that the parser accepts valid roles and the prompt argument."""
|
||||
parser = create_parser()
|
||||
valid_roles = ['tier1', 'tier2', 'tier3', 'tier4']
|
||||
test_prompt = "Analyze the codebase for bottlenecks."
|
||||
for role in valid_roles:
|
||||
args = parser.parse_args(['--role', role, test_prompt])
|
||||
assert args.role == role
|
||||
assert args.prompt == test_prompt
|
||||
|
||||
def test_parser_invalid_role() -> None:
|
||||
"""Test that the parser rejects roles outside the specified choices."""
|
||||
parser = create_parser()
|
||||
with pytest.raises(SystemExit):
|
||||
parser.parse_args(['--role', 'tier5', 'Some prompt'])
|
||||
|
||||
def test_parser_prompt_optional() -> None:
|
||||
"""Test that the prompt argument is optional if role is provided (or handled in main)."""
|
||||
parser = create_parser()
|
||||
# Prompt is now optional (nargs='?')
|
||||
args = parser.parse_args(['--role', 'tier3'])
|
||||
assert args.role == 'tier3'
|
||||
assert args.prompt is None
|
||||
|
||||
def test_parser_help() -> None:
|
||||
"""Test that the help flag works without raising errors (exits with 0)."""
|
||||
parser = create_parser()
|
||||
with pytest.raises(SystemExit) as excinfo:
|
||||
parser.parse_args(['--help'])
|
||||
assert excinfo.value.code == 0
|
||||
|
||||
def test_get_role_documents() -> None:
|
||||
"""Test that get_role_documents returns the correct documentation paths for each tier."""
|
||||
assert get_role_documents('tier1') == ['conductor/product.md', 'conductor/product-guidelines.md', 'docs/guide_architecture.md', 'docs/guide_mma.md']
|
||||
assert get_role_documents('tier2') == ['conductor/tech-stack.md', 'conductor/workflow.md', 'docs/guide_architecture.md', 'docs/guide_mma.md']
|
||||
assert get_role_documents('tier3') == ['docs/guide_architecture.md']
|
||||
assert get_role_documents('tier4') == ['docs/guide_architecture.md']
|
||||
|
||||
def test_get_model_for_role() -> None:
|
||||
"""Test that get_model_for_role returns the correct model for each role."""
|
||||
assert get_model_for_role('tier1-orchestrator') == 'gemini-3.1-pro-preview'
|
||||
assert get_model_for_role('tier2-tech-lead') == 'gemini-3-flash-preview'
|
||||
assert get_model_for_role('tier3-worker') == 'gemini-3-flash-preview'
|
||||
assert get_model_for_role('tier4-qa') == 'gemini-2.5-flash-lite'
|
||||
|
||||
def test_execute_agent() -> None:
|
||||
"""
|
||||
Test that execute_agent calls subprocess.run with powershell and the correct gemini CLI arguments
|
||||
including the model specified for the role.
|
||||
"""
|
||||
role = "tier3-worker"
|
||||
prompt = "Write a unit test."
|
||||
docs = ["file1.py", "docs/spec.md"]
|
||||
expected_model = "gemini-3-flash-preview"
|
||||
mock_stdout = "Mocked AI Response"
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_process = MagicMock()
|
||||
mock_process.stdout = mock_stdout
|
||||
mock_process.returncode = 0
|
||||
mock_run.return_value = mock_process
|
||||
result = execute_agent(role, prompt, docs)
|
||||
mock_run.assert_called_once()
|
||||
args, kwargs = mock_run.call_args
|
||||
cmd_list = args[0]
|
||||
assert cmd_list[0] == "powershell.exe"
|
||||
assert "-Command" in cmd_list
|
||||
ps_cmd = cmd_list[cmd_list.index("-Command") + 1]
|
||||
assert "gemini" in ps_cmd
|
||||
assert f"--model {expected_model}" in ps_cmd
|
||||
# Verify input contains the prompt and system directive
|
||||
input_text = kwargs.get("input")
|
||||
assert "STRICT SYSTEM DIRECTIVE" in input_text
|
||||
assert "TASK: Write a unit test." in input_text
|
||||
assert kwargs.get("capture_output") is True
|
||||
assert kwargs.get("text") is True
|
||||
assert result == mock_stdout
|
||||
|
||||
def test_get_dependencies(tmp_path: Path) -> None:
|
||||
content = (
|
||||
"import os\n"
|
||||
"import sys\n"
|
||||
"import file_cache\n"
|
||||
"from mcp_client import something\n"
|
||||
)
|
||||
filepath = tmp_path / "mock_script.py"
|
||||
filepath.write_text(content)
|
||||
dependencies = get_dependencies(str(filepath))
|
||||
assert dependencies == ['os', 'sys', 'file_cache', 'mcp_client']
|
||||
|
||||
import re
|
||||
|
||||
def test_execute_agent_logging(tmp_path: Path) -> None:
|
||||
log_file = tmp_path / "mma_delegation.log"
|
||||
# mma_exec now uses logs/agents/ for individual logs and logs/mma_delegation.log for master
|
||||
# We will patch LOG_FILE to point to our temp location
|
||||
with patch("scripts.mma_exec.LOG_FILE", str(log_file)), \
|
||||
patch("subprocess.run") as mock_run:
|
||||
mock_process = MagicMock()
|
||||
mock_process.stdout = ""
|
||||
mock_process.returncode = 0
|
||||
mock_run.return_value = mock_process
|
||||
test_role = "tier1"
|
||||
test_prompt = "Plan the next phase"
|
||||
execute_agent(test_role, test_prompt, [])
|
||||
assert log_file.exists()
|
||||
log_content = log_file.read_text()
|
||||
assert test_role in log_content
|
||||
assert test_prompt in log_content # Master log should now have the summary prompt
|
||||
assert re.search(r"\d{4}-\d{2}-\d{2}", log_content)
|
||||
|
||||
def test_execute_agent_tier3_injection(tmp_path: Path) -> None:
|
||||
main_content = "import dependency\n\ndef run():\n dependency.do_work()\n"
|
||||
main_file = tmp_path / "main.py"
|
||||
main_file.write_text(main_content)
|
||||
dep_content = "def do_work():\n pass\n\ndef other_func():\n print('hello')\n"
|
||||
dep_file = tmp_path / "dependency.py"
|
||||
dep_file.write_text(dep_content)
|
||||
# We need to ensure generate_skeleton is mockable or working
|
||||
old_cwd = os.getcwd()
|
||||
os.chdir(tmp_path)
|
||||
try:
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_process = MagicMock()
|
||||
mock_process.stdout = "OK"
|
||||
mock_process.returncode = 0
|
||||
mock_run.return_value = mock_process
|
||||
execute_agent('tier3-worker', 'Modify main.py', ['main.py'])
|
||||
assert mock_run.called
|
||||
input_text = mock_run.call_args[1].get("input")
|
||||
assert "DEPENDENCY SKELETON: dependency.py" in input_text
|
||||
assert "def do_work():" in input_text
|
||||
assert "Modify main.py" in input_text
|
||||
finally:
|
||||
os.chdir(old_cwd)
|
||||
@@ -1,40 +0,0 @@
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add src to path
|
||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
|
||||
|
||||
from src.history import HistoryManager
|
||||
|
||||
def verify_phase_1():
|
||||
print("Verifying Phase 1: History Core Logic...")
|
||||
hm = HistoryManager(max_capacity=10)
|
||||
|
||||
# Test push
|
||||
hm.push({"test": 1}, "initial")
|
||||
if not hm.can_undo:
|
||||
print("Error: can_undo should be true after push")
|
||||
sys.exit(1)
|
||||
|
||||
# Test undo
|
||||
entry = hm.undo({"test": 2}, "current")
|
||||
if entry.state != {"test": 1}:
|
||||
print(f"Error: expected state {{'test': 1}}, got {entry.state}")
|
||||
sys.exit(1)
|
||||
if entry.description != "initial":
|
||||
print(f"Error: expected description 'initial', got {entry.description}")
|
||||
sys.exit(1)
|
||||
|
||||
# Test redo
|
||||
entry = hm.redo({"test": 1}, "back")
|
||||
if entry.state != {"test": 2}:
|
||||
print(f"Error: expected state {{'test': 2}}, got {entry.state}")
|
||||
sys.exit(1)
|
||||
if entry.description != "current":
|
||||
print(f"Error: expected description 'current', got {entry.description}")
|
||||
sys.exit(1)
|
||||
|
||||
print("Phase 1 verification PASSED.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
verify_phase_1()
|
||||
@@ -1,24 +0,0 @@
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
|
||||
def verify_phase_2():
|
||||
print("Verifying Phase 2: Text Input & Control Undo/Redo...")
|
||||
|
||||
# Run the simulation test
|
||||
result = subprocess.run(
|
||||
["uv", "run", "pytest", "tests/test_undo_redo_sim.py"],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
print("Phase 2 verification PASSED.")
|
||||
else:
|
||||
print("Phase 2 verification FAILED.")
|
||||
print(result.stdout)
|
||||
print(result.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
verify_phase_2()
|
||||
@@ -1,24 +0,0 @@
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
def verify_phase_3():
|
||||
print("Verifying Phase 3: GUI Menu Integration...")
|
||||
|
||||
# We rely on the existing simulation test to verify the callback logic,
|
||||
# which underpins the GUI menu integration.
|
||||
result = subprocess.run(
|
||||
["uv", "run", "pytest", "tests/test_workspace_profiles_sim.py"],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
print("Phase 3 verification PASSED.")
|
||||
else:
|
||||
print("Phase 3 verification FAILED.")
|
||||
print(result.stdout)
|
||||
print(result.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
verify_phase_3()
|
||||
@@ -1,54 +0,0 @@
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
|
||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))
|
||||
|
||||
from src import api_hook_client
|
||||
|
||||
def verify_phase_3():
|
||||
print("[VERIFY] Starting Phase 3 Automated Verification...")
|
||||
client = api_hook_client.ApiHookClient()
|
||||
if not client.wait_for_server(timeout=10):
|
||||
print("[VERIFY] ERROR: Hook server not reachable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Check RAG status
|
||||
status = client.get_value("rag_status")
|
||||
print(f"[VERIFY] Current RAG status: {status}")
|
||||
|
||||
# Check if RAG settings are accessible
|
||||
enabled = client.get_value("rag_enabled")
|
||||
source = client.get_value("rag_source")
|
||||
print(f"[VERIFY] RAG Enabled: {enabled}, Source: {source}")
|
||||
|
||||
# Verify status transitions (indexing)
|
||||
print("[VERIFY] Triggering index rebuild...")
|
||||
client.click("btn_rebuild_rag_index")
|
||||
|
||||
time.sleep(0.5)
|
||||
status = client.get_value("rag_status")
|
||||
print(f"[VERIFY] Status during indexing: {status}")
|
||||
|
||||
# Wait for completion
|
||||
max_wait = 10
|
||||
start = time.time()
|
||||
while time.time() - start < max_wait:
|
||||
status = client.get_value("rag_status")
|
||||
if status == "ready":
|
||||
print("[VERIFY] RAG reached 'ready' status.")
|
||||
break
|
||||
time.sleep(1)
|
||||
else:
|
||||
print(f"[VERIFY] WARNING: RAG status timeout. Final: {status}")
|
||||
|
||||
print("[VERIFY] Phase 3 verification COMPLETED successfully.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"[VERIFY] ERROR during verification: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
verify_phase_3()
|
||||
@@ -1,23 +0,0 @@
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
|
||||
def verify_phase_4():
|
||||
print("Verifying Phase 4: Contextual Auto-Switch...")
|
||||
|
||||
result = subprocess.run(
|
||||
["uv", "run", "pytest", "tests/test_auto_switch_sim.py"],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
print("Phase 4 verification PASSED.")
|
||||
else:
|
||||
print("Phase 4 verification FAILED.")
|
||||
print(result.stdout)
|
||||
print(result.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
verify_phase_4()
|
||||
@@ -21,6 +21,8 @@ permission:
|
||||
"git reset*": deny
|
||||
---
|
||||
|
||||
Note: You may use superpowers skills to assist you (brainstorming, recieving code reviews, writing plans, writting skills, dispatching parallel agents)
|
||||
|
||||
STRICT SYSTEM DIRECTIVE: You are a Tier 2 Tech Lead in AUTONOMOUS mode, running in the **META-TOOLING** domain (per `docs/guide_meta_boundary.md`). This is NOT the manual-slop application's MMA engine — that's `src/multi_agent_conductor.py` in the APPLICATION domain. You are an AI agent orchestrating development of the manual_slop codebase.
|
||||
|
||||
## MANDATORY: Domain Distinction (added 2026-06-27)
|
||||
|
||||
@@ -182,6 +182,8 @@ Metadata is now the typed fat struct at the wire boundary.
|
||||
|
||||
## §Phase 2: Add `ProjectContext` dataclass for `flat_config`
|
||||
|
||||
> **[x] COMPLETE** [commit 805a0619] — Per SPEC_CORRECTION_phase_2.md (Option A: incremental, dict-compat). Added 6 sub-dataclasses (ProjectMeta, ProjectOutput, ProjectFiles, ProjectScreenshots, ProjectDiscussion, ProjectContext) + EMPTY_PROJECT_CONTEXT sentinel. `flat_config` returns ProjectContext. Dict-compat methods (`__getitem__`, `get`) keep consumers unchanged. 10 new regression tests in `tests/test_project_context_20260627.py`; all pass.
|
||||
|
||||
**WHERE:**
|
||||
- `src/project_manager.py:flat_config` — currently returns `dict[str, Any]`
|
||||
- All consumers (search for `flat_config` calls in `src/app_controller.py` and `src/gui_2.py`)
|
||||
|
||||
@@ -46,13 +46,38 @@ baseline = { metadata_typealias = 1, hasattr_f_path = 29, optional_returns = 30,
|
||||
after_phases_1_3 = { metadata_typealias = 0, hasattr_f_path = 19, optional_returns = 30, any_params = 60, dict_str_any_params = 11 }
|
||||
deltas = { metadata_typealias = -1, hasattr_f_path = -10, optional_returns = 0, any_params = 1, dict_str_any_params = 1 }
|
||||
|
||||
[deferred_to_followup_tracks]
|
||||
# Items deferred from this track for follow-up tracks
|
||||
{ id = "F1", title = "cruft_elimination_gui_2_followup", description = "Remove 18 hasattr(f, 'path') checks in src/gui_2.py", scope = "1 source file; 18 sites" }
|
||||
{ id = "F2", title = "cruft_elimination_phase_4_5", description = "Phase 4 + Phase 5: fix _do_generate and rag_engine.search return types", scope = "2 source files; ~5 sites" }
|
||||
{ id = "F3", title = "cruft_elimination_phase_6", description = "Phase 6: eliminate Optional[T] returns", scope = "14 files; 30 sites" }
|
||||
{ id = "F4", title = "cruft_elimination_phase_7", description = "Phase 7: eliminate Any + dict[str, Any] in internal signatures", scope = "8+ files; 69 sites" }
|
||||
{ id = "F5", title = "metadata_dict_compat_deprecation", description = "Remove dict-compat methods on Metadata once all consumers migrated", scope = "1 file; methods: __getitem__, get, __contains__, __iter__, keys, values, items" }
|
||||
[incomplete_per_spec]
|
||||
# This track is INCOMPLETE per its spec. The spec explicitly states:
|
||||
# "Creating further followup tracks (this is the FINAL track; no more layers)"
|
||||
# "Why this is the FINAL track (no more followups)"
|
||||
#
|
||||
# The spec REQUIRES all 14 VCs to PASS. Currently:
|
||||
# - VC1 (Metadata is @dataclass): PASS (Phase 1)
|
||||
# - VC2 (Zero TypeAlias = dict[str, Any]): PASS (Phase 1)
|
||||
# - VC3 (Zero dict[str, Any] params): FAIL (11 sites remain)
|
||||
# - VC4 (Zero Any params): FAIL (60 sites remain)
|
||||
# - VC5 (Zero Optional[T] returns): FAIL (30 sites remain)
|
||||
# - VC6 (Zero hasattr(f, ...) entity dispatch): PARTIAL (19 sites remain, all in gui_2.py and aggregate.py)
|
||||
# - VC7 (self.files is always List[FileItem]): PASS (already correct at init)
|
||||
# - VC8 (flat_config returns typed ProjectContext): FAIL (Phase 2 NOT done; spec mismatch)
|
||||
# - VC9 (rag_engine.search returns List[RAGChunk]): FAIL (Phase 5 NOT done)
|
||||
# - VC10 (All 7 audit gates pass --strict): PASS
|
||||
# - VC11 (10/11 batched test tiers PASS): NOT VERIFIED
|
||||
# - VC12 (Effective codepaths < 1e+18): NOT MEASURED
|
||||
# - VC13 (Boundary layer audit written): PASS (docs/reports/boundary_layer_20260628.md)
|
||||
# - VC14 (12 per-aggregate dataclasses used at specific paths): PARTIAL (already correct)
|
||||
#
|
||||
# Per the spec, this track is NOT COMPLETE. 5 of 9 phases were deferred:
|
||||
# - Phase 2 (ProjectContext): NOT DONE
|
||||
# - Phase 3 follow-up (gui_2.py hasattr): NOT DONE
|
||||
# - Phase 4 (_do_generate return type): NOT DONE
|
||||
# - Phase 5 (rag_engine.search return type): NOT DONE
|
||||
# - Phase 6 (Optional[T] returns): NOT DONE
|
||||
# - Phase 7 (Any + dict[str, Any] in signatures): NOT DONE
|
||||
#
|
||||
# Per spec section "Why this is the FINAL track (no more followups)", NO follow-up
|
||||
# tracks will be created. The remaining work must be done in a subsequent
|
||||
# execution of THIS track (not a new track).
|
||||
|
||||
[audit_gate_results]
|
||||
audit_weak_types = "STRICT OK (107 <= 112 baseline)"
|
||||
|
||||
@@ -0,0 +1,109 @@
|
||||
{
|
||||
"track_id": "enforcement_gap_closure_20260627",
|
||||
"name": "Enforcement Gap Closure (Boundary-Layer Audit + Optional[T] Audit Widening)",
|
||||
"status": "active",
|
||||
"branch": "master",
|
||||
"created": "2026-06-27",
|
||||
"owner": "Tier 1 (initialized); implementation delegated to Tier 2/3.",
|
||||
"blocked_by": [],
|
||||
"blocks": [],
|
||||
"scope": {
|
||||
"new_files": [
|
||||
"scripts/audit_boundary_layer.py",
|
||||
"scripts/boundary_layer_allowlist.toml",
|
||||
"scripts/audit_optional_returns.py (renamed from audit_optional_in_3_files.py)",
|
||||
"scripts/audit_optional_returns.baseline.json",
|
||||
"tests/test_audit_boundary_layer.py",
|
||||
"tests/test_audit_optional_returns.py",
|
||||
"docs/reports/TRACK_COMPLETION_enforcement_gap_closure_20260627.md"
|
||||
],
|
||||
"modified_files": [
|
||||
"conductor/code_styleguides/python.md (sections 17.7, 17.8, inventory table 449-456)",
|
||||
"conductor/code_styleguides/error_handling.md (cross-reference sweep only)",
|
||||
"docs/AGENTS.md (cross-reference sweep only)",
|
||||
"conductor/tracks.md (active-track row + status)",
|
||||
"conductor/chronology.md (prepend shipment row)"
|
||||
],
|
||||
"deleted_files": [
|
||||
"scripts/audit_optional_in_3_files.py (renamed to audit_optional_returns.py via git mv)"
|
||||
]
|
||||
},
|
||||
"estimated_effort": {
|
||||
"method": "scope (per workflow.md Tier 1 Track Initialization Rules. NO day estimates.)",
|
||||
"phase_1": "4 tasks: 1 test file (10 tests) + 1 audit script + 1 allowlist TOML + green-phase verification",
|
||||
"phase_2": "3 tasks: 1 test file (5 tests) + 1 rename/edit + 1 baseline JSON + green-phase verification",
|
||||
"phase_3": "2 tasks: 1 styleguide inventory edit + 1 cross-reference sweep",
|
||||
"phase_4": "4 tasks: 7-audit verification + 1 end-of-track report + 1 state update + user sign-off"
|
||||
},
|
||||
"verification_criteria": [
|
||||
"G1: scripts/audit_boundary_layer.py exists + AST-scans all src/*.py + exits 1 in --strict on un-allowlisted dict[str, Any] sites",
|
||||
"G2: scripts/boundary_layer_allowlist.toml exists + lists ~14 boundary files with reasons + --show-allowlist prints them",
|
||||
"G3: scripts/audit_optional_returns.py exists (renamed from audit_optional_in_3_files.py) + scans all src/*.py + 3 history.py residuals baselined in audit_optional_returns.baseline.json (strict stays green)",
|
||||
"G4: conductor/code_styleguides/python.md sections 17.7, 17.8, and inventory table reflect post-track reality (audit_boundary_layer implemented; audit_optional_returns implemented; audit_imports implemented)",
|
||||
"G5: cross-reference sweep complete (no enforcement-instruction references to audit_optional_in_3_files.py; historical references preserved)",
|
||||
"G6: tests/test_audit_boundary_layer.py has >=10 tests; all pass",
|
||||
"G7: tests/test_audit_optional_returns.py has >=5 tests; all pass",
|
||||
"G8: docs/reports/TRACK_COMPLETION_enforcement_gap_closure_20260627.md exists; documents contradiction closure (C1, C2, C3-partial, C18-partial, C21) and remaining (C5, C6, C16, C17 - deferred per user directive)",
|
||||
"VC_pre_commit_parallel_safe": "ZERO file overlap with the running tier2/post_module_taxonomy_de_cruft_20260627 branch (verified by Tier 1 against ddcec7b0 + TRACK_COMPLETION file-level changes)"
|
||||
],
|
||||
"regressions_and_pre_existing_failures": [],
|
||||
"pre_existing_failures_remaining": [],
|
||||
"deferred_to_followup_tracks": [
|
||||
{
|
||||
"title": "Optional[T] return migration in src/history.py",
|
||||
"description": "3 RETURN_OPTIONAL sites in src/history.py baselined by this track; cruft_elimination_20260627 Phase 6 owns the migration to Result[T] + NIL_T.",
|
||||
"track_status": "planned in cruft_elimination_20260627"
|
||||
},
|
||||
{
|
||||
"title": "dict[str, Any] migration in hot_reloader.py + startup_profiler.py",
|
||||
"description": "2 un-allowlisted boundary violations baselined by this track; a future track promotes them to typed dataclasses (HotReloadSnapshot, ProfilerSnapshot).",
|
||||
"track_status": "not yet initialized"
|
||||
},
|
||||
{
|
||||
"title": "Main-repo pre-commit hook wiring",
|
||||
"description": "The 5 audit scripts strict mode (weak_types, boundary_layer, optional_returns, exception_handling, imports) is not wired into the main repo's .git/hooks/. Per contradictions report C4.",
|
||||
"track_status": "not yet initialized"
|
||||
},
|
||||
{
|
||||
"title": "Docs-count drift in docs/Readme.md (C7, C8, C9) + styleguide drift (C16 python.md s10, C17 type_aliases.md line 19) + RAGChunk.id in guides (C6)",
|
||||
"description": "Deferred per user directive 2026-06-27 until tier2 branch stabilizes; these describe code state that exists post-merge of the taxonomy branches.",
|
||||
"track_status": "deferred; will bundle into a docs-sync track post-merge"
|
||||
}
|
||||
],
|
||||
"risk_register": [
|
||||
{
|
||||
"id": "R1",
|
||||
"description": "audit_optional_returns.baseline.json format mismatch with audit_weak_types.baseline.json contract",
|
||||
"likelihood": "medium",
|
||||
"impact": "the renamed --strict mode behaves inconsistently with the existing baseline pattern",
|
||||
"mitigation": "Tier 3 reads scripts/audit_weak_types.py + its baseline JSON before implementing; mirror the exact contract"
|
||||
},
|
||||
{
|
||||
"id": "R2",
|
||||
"description": "Cross-file rename race if Tier 2 branch touches scripts/audit_optional_in_3_files.py in parallel",
|
||||
"likelihood": "low",
|
||||
"impact": "the git mv conflicts with Tier 2 work",
|
||||
"mitigation": "Tier 1 verified post_module_taxonomy_de_cruft TRACK_COMPLETION does not touch audit_optional_*; only scripts/audit_no_models_config_io.py"
|
||||
},
|
||||
{
|
||||
"id": "R3",
|
||||
"description": "Boundary allowlist under-classifies a genuine violation as boundary (false negative)",
|
||||
"likelihood": "medium",
|
||||
"impact": "the audit misses a real dict[str, Any] escape hatch that future LLMs reach for",
|
||||
"mitigation": "Tier 1's spec 'Current State Audit' manually classified the 14 legitimate boundary files + 2 genuine violators; the audit starts from that classification. Reviewer (user) inspects boundary_layer_allowlist.toml before merge."
|
||||
},
|
||||
{
|
||||
"id": "R4",
|
||||
"description": "Over-classification: audit flags a genuine boundary function as a violation (false positive)",
|
||||
"likelihood": "low",
|
||||
"impact": "strict mode is red on a real boundary file; either the allowlist is amended (correct fix) or the violation is suppressed (wrong fix, masks drift)",
|
||||
"mitigation": "Per spec FR1, allowlisting is the explicit 'declare your boundary' mechanism; the reviewer audits the allowlist at merge time. The audit's `--no-allowlist` mode exposes every site so reviewers can spot-check classifications."
|
||||
}
|
||||
],
|
||||
"contradictions_report_cross_reference": {
|
||||
"source": "docs/reports/CONTRADICTIONS_REPORT_20260627.md",
|
||||
"closes": ["C1", "C2", "C3_partial", "C18_partial", "C21"],
|
||||
"defers": ["C5", "C6", "C7", "C8", "C9", "C11", "C12", "C13", "C14", "C15", "C16", "C17", "C19", "C20"],
|
||||
"rationale": "C1+C2+C21 are about the Optional audit name+scope (closed by Phase 2 rename+widen). C3-partial is 'audit_imports.py planned but exists' (closed by Phase 3 inventory correction). C18-partial is the audit count (closed by Phase 3). The 14 deferred items are docs-sync (C5-C9, C16, C17) or status drift (C11-C15, C19, C20) that per user directive 2026-06-27 wait for the tier2 taxonomy branch to stabilize before touching master's docs."
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,172 @@
|
||||
# Plan: Enforcement Gap Closure (Boundary-Layer Audit + Optional[T] Audit Widening)
|
||||
|
||||
Track: `enforcement_gap_closure_20260627`
|
||||
Branch: master (parallel-safe against `tier2/post_module_taxonomy_de_cruft_20260627`)
|
||||
Spec: `conductor/tracks/enforcement_gap_closure_20260627/spec.md`
|
||||
|
||||
This plan is read by a Tier 3 Worker (or Tier 2). All Python edits MUST use 1-space indentation. No comments in body. CRLF preserved via `manual-slop_edit_file` MCP tool (never native `edit`).
|
||||
|
||||
**Audit-then-specify verification done by Tier 1:** All file:line references below were verified against master at `77b70226` on 2026-06-27.
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Boundary-Layer Audit Script
|
||||
|
||||
Focus: Implement `scripts/audit_boundary_layer.py` + `scripts/boundary_layer_allowlist.toml` + tests, mirroring the `audit_imports.py` + `audit_imports_whitelist.toml` contract.
|
||||
|
||||
- [ ] Task 1.1: Write failing tests for `scripts/audit_boundary_layer.py`
|
||||
- **WHERE:** `tests/test_audit_boundary_layer.py` (NEW file)
|
||||
- **WHAT:** 10 tests per spec FR5 (finder detects `dict[str, Any]` in return / param / local; allowlist suppression + WHITELISTED annotation; `--strict` exit 1 on un-allowlisted; `--strict` exit 0 on allowlisted; `--json` shape; missing-file handling; syntax-error handling; `--show-allowlist`).
|
||||
- **HOW:** Use `tmp_path` (or `tests/artifacts/` per workspace_paths.md — see workflow.md "Test Sandbox Hardening") to create a synthetic `src/` tree the audit can scan via a `--src` flag (mirror `audit_weak_types.py --src`). Each test creates 1-2 small .py files with the pattern under test, invokes the audit via `subprocess.run(["python", "scripts/audit_boundary_layer.py", "--src", str(tmp_src), ...])`, asserts on stdout + exit code. Tests MUST fail before the script exists (Red phase).
|
||||
- **SAFETY:** No `live_gui` fixture (these are unit tests of a script). No `unittest.mock.patch` of core code. Use `monkeypatch.setenv` for the `--src` path or pass via argv.
|
||||
- **COMMIT:** `test(audit): add 10 failing tests for boundary-layer audit`
|
||||
- **GIT NOTE:** Red-phase tests for `scripts/audit_boundary_layer.py`; cover finder + allowlist + strict + json + error-handling per spec FR1 + FR5.
|
||||
|
||||
- [ ] Task 1.2: Implement `scripts/audit_boundary_layer.py`
|
||||
- **WHERE:** `scripts/audit_boundary_layer.py` (NEW file)
|
||||
- **WHAT:** Implement the audit per spec FR1. The structure mirrors `scripts/audit_imports.py` (309 lines): module docstring → argparse → `audit_file(path) -> list[Finding]` → main loop over `sorted(Path(src).glob("*.py"))` → exit code logic.
|
||||
- **HOW:** Reuse the `audit_optional_in_3_files.py` AST detector pattern (it already has `_annotation_is_optional_arg` — copy the analogous `_is_dict_str_any` helper). Detection contract (FR1):
|
||||
1. Walk each `ast.FunctionDef` / `AsyncFunctionDef`:
|
||||
- If `node.returns` is `dict[str, Any]` (Subscript with value Name "dict"|"Dict" and slice Tuple `[Name "str", Name "Any"]`) → emit `RETURN_DICT_ANY`.
|
||||
- For each arg in `args.args + kwonlyargs + posonlyargs`: if `arg.annotation` is `dict[str, Any]` → emit `PARAM_DICT_ANY`.
|
||||
2. Walk each `ast.AnnAssign` inside a function body: if `target.annotation` is `dict[str, Any]` → emit `LOCAL_ANNOT_DICT_ANY`.
|
||||
3. Allowlist: load `scripts/boundary_layer_allowlist.toml` (use `tomllib.load`); for any file whose relative path is a key, suppress all findings for that file and emit a single `WHITELISTED` finding per file (matches `audit_imports.py` precedent).
|
||||
4. CLI flags: `--strict`, `--json`, `--show-allowlist`, `--no-allowlist`, `--src <path>` (default `"src"`).
|
||||
5. Default mode: print summary table (file, sites, allowlisted) + a list of violations; exit 0.
|
||||
6. `--strict`: same + exit 1 if there are un-allowlisted `RETURN_DICT_ANY` / `PARAM_DICT_ANY` / `LOCAL_ANNOT_DICT_ANY` findings.
|
||||
7. `--json`: print JSON `{files_scanned, files_with_findings, total_findings, by_kind, findings}` and exit 0.
|
||||
8. `--show-allowlist`: print the TOML contents + reasons; exit 0.
|
||||
9. `--no-allowlist`: do not read the TOML; audit all sites.
|
||||
- **SAFETY:** Pure stdlib (`ast`, `argparse`, `json`, `sys`, `pathlib.Path`, `tomllib`). No subprocess to `src/` files.
|
||||
- **COMMIT:** `feat(audit): implement audit_boundary_layer.py per FR1`
|
||||
- **GIT NOTE:** Implements the §17.7 boundary-layer audit; mirrors audit_imports.py contract; allowlist-driven per-file suppression.
|
||||
|
||||
- [ ] Task 1.3: Write `scripts/boundary_layer_allowlist.toml`
|
||||
- **WHERE:** `scripts/boundary_layer_allowlist.toml` (NEW file)
|
||||
- **WHAT:** Initial allowlist with the ~14 legitimate boundary files from spec "Current State Audit": `context_presets.py`, `events.py`, `openai_compatible.py`, `theme_models.py`, `log_registry.py`, `presets.py`, `tool_presets.py`, `personas.py`, `workspace_manager.py`, `paths.py`, `gemini_cli_adapter.py`, `mcp_client.py`, `type_aliases.py`, `session_logger.py`.
|
||||
- **HOW:** Mirror `audit_imports_whitelist.toml` format:
|
||||
- Header comment block (purpose + format).
|
||||
- "Last reviewed: 2026-06-27"
|
||||
- One `[allowlist."<relative_path>"]` entry per file with `reason = "..."` documenting why it's at the wire boundary (the reasons are documented in spec "Current State Audit" — e.g., context_presets = "project_dict is the wire TOML"; events.to_dict = "wire serialization for WS protocol"; etc.).
|
||||
- **SAFETY:** Pure TOML; no code.
|
||||
- **COMMIT:** `feat(audit): seed boundary_layer_allowlist.toml with 14 boundary files`
|
||||
- **GIT NOTE:** Allowlist seeds the §17.7 legitimate boundary; per audit_imports_whitelist.toml precedent.
|
||||
|
||||
- [ ] Task 1.4: Run tests for Phase 1 (Green phase)
|
||||
- **WHAT:** Execute `uv run pytest tests/test_audit_boundary_layer.py -v` (batched-runner convention can also be used: `uv run python scripts/run_tests_batched.py --filter test_audit_boundary_layer`). All 10 tests must pass. If any fail, debug (≤2 retries per workflow.md "Deduction Loop" rule), then STOP and report if still failing.
|
||||
- **COMMIT:** `conductor(state): mark Phase 1 task 1.4 verification` (or skip the commit if no code changes; just verify).
|
||||
- **GIT NOTE:** Green-phase verification for boundary-layer audit + allowlist.
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: Optional[T] Audit Rename + Widening
|
||||
|
||||
Focus: Rename `audit_optional_in_3_files.py` → `audit_optional_returns.py`, widen from 4 files to all `src/*.py`, baseline the 3 `history.py` residuals.
|
||||
|
||||
- [ ] Task 2.1: Write failing tests for the renamed + widened audit
|
||||
- **WHERE:** `tests/test_audit_optional_returns.py` (NEW file)
|
||||
- **WHAT:** 5 tests per spec FR5: test_renamed_script_exists, test_scans_all_src_files, test_baseline_reading_keeps_strict_green, test_strict_exits_1_above_baseline, test_param_optional_is_warning_not_strict.
|
||||
- **HOW:** For test_scans_all_src_files, use `monkeypatch` + `--src <tmp_src>` flag (the script may need a `--src` flag added in Task 2.2 if it doesn't already have one — current `audit_optional_in_3_files.py` hardcodes the 4-file path; Task 2.2 adds `--src`). Tests must fail against the OLD script (which still hardcodes 4 files).
|
||||
- **SAFETY:** No `live_gui`. No core mocking.
|
||||
- **COMMIT:** `test(audit): add 5 failing tests for audit_optional_returns widening`
|
||||
- **GIT NOTE:** Red-phase tests for the rename + widening to all src/*.py per spec FR3 + FR5.
|
||||
|
||||
- [ ] Task 2.2: Rename + widen `audit_optional_in_3_files.py` → `audit_optional_returns.py`
|
||||
- **WHERE:** `git mv scripts/audit_optional_in_3_files.py scripts/audit_optional_returns.py` then edit the new file.
|
||||
- **WHAT:** Per spec FR3:
|
||||
1. `git mv` the file (preserves history).
|
||||
2. Edit `scripts/audit_optional_returns.py`:
|
||||
- Module docstring: drop "4 baseline files"; say "all `src/*.py` per §17 post-2026-06-27 widening (the successor to `audit_optional_in_3_files.py`, which was renamed + widened on 2026-06-27)."
|
||||
- Replace `BASELINE_FILES: tuple[str, ...] = (...)` with `def _discover_src_files(src_dir: str = "src") -> list[Path]: return sorted(Path(src_dir).glob("*.py"))`.
|
||||
- Update `main()` to iterate `_discover_src_files(args.src)` instead of the hardcoded tuple.
|
||||
- Add `--src <path>` arg (default `"src"`) mirroring `audit_weak_types.py`.
|
||||
- Update `--json` output's `"files_scanned"` field to reflect the glob count.
|
||||
3. Create `scripts/audit_optional_returns.baseline.json` recording the 3 `src/history.py` `RETURN_OPTIONAL` findings so `--strict` exits 0 on master (findings ≤ baseline). Format: same as `audit_weak_types.baseline.json` (a JSON object with a count or a list of `{file, line, function, kind}` entries that strict mode subtracts). The strict-mode logic: load baseline; subtract baseline findings from current findings; exit 1 if residuals > 0. (Mirror `audit_weak_types.py`'s `--strict` + baseline contract — read its source to confirm the exact subtraction mechanism.)
|
||||
- **SAFETY:** No `src/` edits. No tests/ edits except the new test file from Task 2.1.
|
||||
- **COMMIT:** `refactor(audit): rename audit_optional_in_3_files.py -> audit_optional_returns.py; widen to all src/*.py; baseline 3 history.py residuals`
|
||||
- **GIT NOTE:** Closes contradictions C1+C21 (script name) + C2 (Optional ban scope ambiguity); script name + scope + baseline now honest per §17 post-2026-06-27.
|
||||
|
||||
- [ ] Task 2.3: Run tests for Phase 2 (Green phase)
|
||||
- **WHAT:** `uv run pytest tests/test_audit_optional_returns.py -v`. All 5 tests must pass. If failures, ≤2 debug retries; then STOP.
|
||||
- **VERIFY:** Also run the existing audit_optional tests (if any reference the old name, update them — likely there are no callers other than `code_path_audit_20260607`'s historical references which don't run).
|
||||
- **COMMIT:** `conductor(state): mark Phase 2 task 2.3 verification` (or skip if no code changes).
|
||||
- **GIT NOTE:** Green-phase verification for the rename + widening.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: Styleguide Doc Reconciliation
|
||||
|
||||
Focus: Fix `python.md` §17 enforcement inventory + §17.8 section to match post-track reality. Close contradictions C3, C18 (audit_imports exists), C1+C21 (script renamed), C2 (scope clarified), C5 (Result notation — only if no branch-sensitivity; per spec OOS, this is C5 which is deferred — confirm during this phase).
|
||||
|
||||
- [ ] Task 3.1: Fix `python.md` §17 inventory table (lines 449-456) + §17.8 enforcement section (lines 357-362)
|
||||
- **WHERE:** `conductor/code_styleguides/python.md`
|
||||
- **WHAT:** Per spec FR4:
|
||||
1. Inventory table (lines 449-456): update the rows:
|
||||
- `dict[str, Any]` ban: ADD a row for `scripts/audit_boundary_layer.py --strict` (implemented this track; reads `boundary_layer_allowlist.toml`; `--no-allowlist` audits all). KEEP the existing `audit_weak_types.py --strict` row (they catch overlapping but distinct shapes — weak_types catches `Any` in any position; boundary_layer specifically targets `dict[str, Any]` in *signatures* outside the allowlisted boundary).
|
||||
- `Optional[T]` returns: change the row from "audit_optional_in_3_files.py covering 4 baseline files" to "audit_optional_returns.py --strict covering all src/*.py; reads audit_optional_returns.baseline.json for the 3 history.py residuals until cruft_elimination Phase 6". Mark "✅ implemented".
|
||||
- Local imports + `_PREFIX` aliasing + repeated `.from_dict()`: change `audit_imports.py` row to "✅ implemented" (was "⚠️ not yet built" — wrong; the script exists at `scripts/audit_imports.py`).
|
||||
- Repeated `.from_dict()`: drop "(no script planned; relies on Tier 2 review)" — covered by `audit_imports.py`.
|
||||
2. §17.8 enforcement section (lines 357-362): rewrite the bullets per spec FR4:
|
||||
- Bullet for `audit_optional_returns.py` → reflects rename + all-src scope.
|
||||
- Bullet for `audit_imports.py` → drop the "(planned per §17.9a)" parenthetical; mark as implemented.
|
||||
- Bullet for `audit_boundary_layer.py --strict` → replace the "boundary_layer audit (planned...)" bullet; describe the script + allowlist + `--no-allowlist` flag.
|
||||
- The "Pre-commit: every commit MUST pass all four audits above" line → "five audits above" (weak_types, boundary_layer, optional_returns, exception_handling, imports).
|
||||
- **HOW:** Use `manual-slop_edit_file` MCP tool. Verify exact line ranges via `manual-slop_get_file_slice` before editing (the line numbers above are approximate; the actual edit replaces a contiguous block). Preserve CRLF.
|
||||
- **SAFETY:** Pure doc edit. No code. No `src/` changes. No tests changes.
|
||||
- **COMMIT:** `docs(python.md): reconcile §17 inventory + §17.8 with post-track reality`
|
||||
- **GIT NOTE:** Closes C3 (audit_imports.py was "planned" but exists), C18 (audit count), C1+C21 reflected in doc; C2 scope clarified.
|
||||
|
||||
- [ ] Task 3.2: Cross-reference sweep for `audit_optional_in_3_files.py` references
|
||||
- **WHAT:** Use `manual-slop_py_find_usages` / `rg` to find ALL references to the old script name across `conductor/` and `docs/`. Per the spec, references likely exist in `error_handling.md:885` + `docs/AGENTS.md §"Convention Enforcement"`. For each reference:
|
||||
- If it's a historical/cross-reference note (e.g., "was `audit_optional_in_3_files.py`"), leave it.
|
||||
- If it's an enforcement-instruction reference (e.g., "run `uv run python scripts/audit_optional_in_3_files.py --strict`"), update to `audit_optional_returns.py`.
|
||||
- **COMMIT:** `docs: update audit_optional_in_3_files.py references to audit_optional_returns.py`
|
||||
- **GIT NOTE:** Historical references preserved (the rename history is documented in python.md:359); enforcement instructions updated.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: End-of-Track Report + State Update
|
||||
|
||||
- [ ] Task 4.1: Run the full 7-audit strict suite (gate verification)
|
||||
- **WHAT:** Execute all 7 audit scripts (now including the 2 new ones this track ships) in `--strict` mode:
|
||||
```
|
||||
uv run python scripts/audit_weak_types.py --strict
|
||||
uv run python scripts/audit_boundary_layer.py --strict
|
||||
uv run python scripts/audit_optional_returns.py --strict
|
||||
uv run python scripts/audit_exception_handling.py --strict
|
||||
uv run python scripts/audit_imports.py --strict
|
||||
uv run python scripts/audit_main_thread_imports.py
|
||||
uv run python scripts/audit_no_models_config_io.py
|
||||
```
|
||||
Expected: all pass (the boundary audit's 2 residuals `hot_reloader.py` + `startup_profiler.py` MUST be in the baseline JSON or the allowlist — verify before this step). The Optional audit's 3 `history.py` residuals are in `audit_optional_returns.baseline.json` (created in Phase 2).
|
||||
- **VERIFY:** If any audit fails, fix the baseline OR the allowlist. Do NOT mask a real violation; document the residual in the end-of-track report instead.
|
||||
- **COMMIT:** `test(audit): verify all 7 audit gates pass --strict post-track`
|
||||
- **GIT NOTE:** The 7-audit strict suite green; the 2 boundary + 3 Optional residuals baselined per spec.
|
||||
|
||||
- [ ] Task 4.2: Write end-of-track report
|
||||
- **WHERE:** `docs/reports/TRACK_COMPLETION_enforcement_gap_closure_20260627.md` (NEW file)
|
||||
- **WHAT:** Report following the precedent of `TRACK_COMPLETION_post_module_taxonomy_de_cruft_20260627.md`:
|
||||
- TL;DR
|
||||
- Phase summary (each phase + commits + status)
|
||||
- Verification Criteria status (mapped to spec G1-G8)
|
||||
- File-level changes (new + modified + renamed + new test files)
|
||||
- Commits log (atomic, ordered)
|
||||
- Audit gate status (all 7)
|
||||
- Contradictions closed (C1, C2, C3-partial, C18-partial, C21) and remaining (C5, C6, C16, C17 — deferred per user directive; cite spec OOS)
|
||||
- Known residuals: 2 boundary (`hot_reloader.py`, `startup_profiler.py`) + 3 Optional (`src/history.py`); these are baselined + owned by future tracks
|
||||
- Next steps for the user (review + the recommended follow-up track)
|
||||
- **COMMIT:** `docs(reports): TRACK_COMPLETION_enforcement_gap_closure_20260627`
|
||||
- **GIT NOTE:** End-of-track report; documents contradiction closure + residual baselines.
|
||||
|
||||
- [ ] Task 4.3: Update `conductor/tracks.md` + `conductor/chronology.md` + `conductor/tracks/enforcement_gap_closure_20260627/state.toml`
|
||||
- **WHAT:**
|
||||
1. `state.toml`: mark all phases "completed" with their checkpoint SHA; set `status = "completed"` + `current_phase = "complete"`.
|
||||
2. `conductor/tracks.md`: add a row to the Active Tracks table for this track (status "shipped"); or per the convention of recent tracks, the row is added when the track is initiated and the status updated when shipped.
|
||||
3. `conductor/chronology.md`: prepend a row for `2026-06-27 | enforcement_gap_closure_20260627 | shipped | summary...` at the top of the table.
|
||||
- **COMMIT:** `conductor(state): enforcement_gap_closure_20260627 SHIPPED + TRACK_COMPLETION`
|
||||
- **GIT NOTE:** Track state + chronology + tracks.md closed out.
|
||||
|
||||
- [ ] Task 4.4: Conductor - User Manual Verification (Protocol in workflow.md)
|
||||
- **WHAT:** Per the workflow.md "Phase Completion Verification and Checkpointing Protocol", present the results to the user for confirmation. Present: the 7-audit strict pass result, the test count, the contradictions closed, and the residual baselines. PAUSE for user sign-off.
|
||||
- **COMMIT:** (no commit; this is the user-confirmation gate)
|
||||
- **GIT NOTE:** User sign-off record.
|
||||
@@ -0,0 +1,433 @@
|
||||
# Track Specification: Enforcement Gap Closure (Boundary-Layer Audit + Optional[T] Audit Widening)
|
||||
|
||||
## Overview
|
||||
|
||||
Close the two genuine enforcement gaps in the 7-banned-pattern mandate documented in
|
||||
`conductor/code_styleguides/python.md` §17 (the LLM Default Anti-Patterns):
|
||||
|
||||
1. **The boundary-layer audit** — the script that enforces "no `dict[str, Any]`
|
||||
outside the 2-3 wire-parse functions per file" (`python.md` §17.7). Currently
|
||||
marked "⚠️ not yet built" in the §17 enforcement inventory (`python.md:454`),
|
||||
though the cruft_elimination_20260627 Phase 10 only produced a *report*
|
||||
(`docs/reports/boundary_layer_20260628.md`) — never the *audit script*. This
|
||||
is the one that prevents the next LLM from reaching for `dict[str, Any]` in
|
||||
`app_controller.py` again.
|
||||
|
||||
2. **The `audit_optional_in_3_files.py` rename + widening** — the script
|
||||
currently named `audit_optional_in_3_files.py` actually checks 4 files
|
||||
(the contradictions report C1+C21) and only enforces the `Optional[T]` ban
|
||||
on those 4 baseline files. `python.md:359` already references a successor
|
||||
`audit_optional_returns.py` (claimed "✅ implemented" in the inventory at
|
||||
`python.md:452`) but the rename never happened and the script never widened
|
||||
to all `src/*.py`. This track lands reality on both the script and the doc.
|
||||
|
||||
Both pieces are parallel-safe against the running `post_module_taxonomy_de_cruft_20260627`
|
||||
Tier 2 work: this track touches only `scripts/audit_*`, `scripts/*.toml` (allowlists),
|
||||
`conductor/code_styleguides/python.md` (the inventory table), and new `tests/test_*`
|
||||
files. Zero overlap with `src/models.py`, `tests/test_models*`, `src/api_hooks.py`,
|
||||
`scripts/audit_no_models_config_io.py`, or anything else Tier 2 is modifying.
|
||||
|
||||
## Current State Audit (as of master `77b70226`, branch `tier2/post_module_taxonomy_de_cruft_20260627` `ddcec7b0`)
|
||||
|
||||
### Already Implemented (DO NOT re-implement)
|
||||
|
||||
- `scripts/audit_weak_types.py` (388 lines) — flags `dict[str, Any]`, `Any`,
|
||||
anonymous tuple returns; informational default + `--strict` CI gate; reads
|
||||
`scripts/audit_weak_types.baseline.json`. **Implemented, working.** Covers
|
||||
§17.1 (`dict[str, Any]` / `Any` ban) and §17.2 (anonymous tuples) globally.
|
||||
|
||||
- `scripts/audit_exception_handling.py` (~500 lines) — classifies
|
||||
`try/except/finally/raise` sites into 10 categories; informational default +
|
||||
`--strict` CI gate. **Implemented, working.** Covers §17.3 (silent swallow /
|
||||
broad catch) globally.
|
||||
|
||||
- `scripts/audit_imports.py` (309 lines) — flags local imports (§17.9a),
|
||||
`_PREFIX` aliasing (§17.9b), and repeated `.from_dict()` (§17.9c);
|
||||
informational default + `--strict` CI gate; reads
|
||||
`scripts/audit_imports_whitelist.toml` for vendor-SDK-warmup + hot-reload
|
||||
per-file exemptions. **Implemented, working** (despite `python.md:455-456`
|
||||
marking it "not yet built" — a doc drift this track fixes). Covers §17.9
|
||||
fully.
|
||||
|
||||
- `scripts/audit_imports_whitelist.toml` (81 lines) — per-file whitelist with
|
||||
`reason` field + "Last reviewed" header. **The precedent template** for the
|
||||
new `boundary_layer_allowlist.toml` this track creates.
|
||||
|
||||
- `scripts/audit_optional_in_3_files.py` (122 lines) — AST-scans 4 files
|
||||
(`src/mcp_client.py`, `src/ai_client.py`, `src/rag_engine.py`,
|
||||
`src/code_path_audit.py`); the `BASELINE_FILES` tuple at line 17-22 is the
|
||||
only thing pinning it to those files; the audit logic is generic
|
||||
(`_return_annotation_is_optional`, `_annotation_is_optional_arg`,
|
||||
`audit_file`). **Implementation 100% reusable; only the file glob +
|
||||
name + docs need to change.**
|
||||
|
||||
### Gaps to Fill (This Track's Scope)
|
||||
|
||||
- **GAP-1: No boundary-layer audit script exists.** `python.md:454` and
|
||||
`python.md:361` mark it "planned / not yet built". The
|
||||
`cruft_elimination_20260627` spec describes it at FR1 §72 ("Boundary Layer
|
||||
is EXACTLY 2 places") and G14 ("boundary layer is documented as exactly 2
|
||||
places") but only ever delivered a *report* (`boundary_layer_20260628.md`),
|
||||
never a *static audit*. Without this, the §17.7 contract ("2-3 boundary
|
||||
functions per file, everything else must be typed") is policy-without-teeth.
|
||||
|
||||
- **GAP-2: `audit_optional_in_3_files.py` name lies + scope is too narrow.**
|
||||
- It actually checks 4 files (mcp_client, ai_client, rag_engine,
|
||||
code_path_audit) but is named "_3_files".
|
||||
- It only covers those 4 baseline files. The §17 mandate requires
|
||||
`Optional[T]` return-types banned in *all* `src/*.py`.
|
||||
- `python.md:359` + `python.md:452` already promise an
|
||||
`audit_optional_returns.py` "covering all `src/*.py`" — but no such
|
||||
script exists. The doc claims reality that the code doesn't match.
|
||||
|
||||
- **GAP-3: `python.md` §17 inventory table is internally inconsistent.**
|
||||
Lines 451-456 mark `audit_imports.py` as "not yet built" (false — it exists)
|
||||
and `audit_optional_returns.py` as "implemented" (false — it doesn't exist;
|
||||
only the `audit_optional_in_3_files.py` does). This track corrects both rows
|
||||
to match post-track reality.
|
||||
|
||||
### Verified `dict[str, Any]` Distribution on master (the blast-radius for GAP-1)
|
||||
|
||||
Per the audit-style AST scan I ran on master at `77b70226` (full scan of all
|
||||
`src/*.py`):
|
||||
|
||||
| File | ret sites | param sites | has `from_dict` | calls tomllib/json.loads |
|
||||
|------|-----------|-------------|------------------|--------------------------|
|
||||
| src/theme_models.py | 2 | 2 | yes | yes |
|
||||
| src/context_presets.py | 0 | 3 | no | no |
|
||||
| src/log_registry.py | 2 | 1 | yes | yes |
|
||||
| src/hot_reloader.py | 1 | 1 | no | no |
|
||||
| src/mcp_client.py | 0 | 2 | yes | yes |
|
||||
| src/personas.py | 1 | 1 | yes | yes |
|
||||
| src/presets.py | 1 | 1 | no | yes |
|
||||
| src/tool_presets.py | 1 | 1 | yes | yes |
|
||||
| src/type_aliases.py | 1 | 1 | yes | no |
|
||||
| src/workspace_manager.py | 1 | 1 | yes | yes |
|
||||
| src/events.py | 1 | 0 | no | no |
|
||||
| src/gemini_cli_adapter.py | 1 | 0 | no | yes |
|
||||
| src/openai_compatible.py | 1 | 0 | no | no |
|
||||
| src/paths.py | 1 | 0 | no | yes |
|
||||
| src/session_logger.py | 0 | 1 | no | no |
|
||||
| src/startup_profiler.py | 1 | 0 | no | no |
|
||||
| ... 50 other `src/*.py` | 0 | 0 | (varies) | (varies) |
|
||||
|
||||
Totals: **12 `dict[str, Any]` returns + 16 params across 16 files**; ~50 other
|
||||
files have zero `dict[str, Any]` in signatures.
|
||||
|
||||
Per-file manual classification (the same kind of classification the
|
||||
`audit_imports_whitelist.toml` makes for hot-reload files):
|
||||
|
||||
- **LEGITIMATE BOUNDARY** (audit must allow): `context_presets.py`
|
||||
(`load_all/save_preset/delete_preset(project_dict: Dict[str, Any])` —
|
||||
`project_dict` IS the wire TOML), `events.py` `to_dict()` (wire
|
||||
serialization for the WS protocol), `openai_compatible.py`
|
||||
`_to_dict_tool_call(tc: ToolCall) -> dict[str, Any]` (converts typed
|
||||
`ToolCall` to vendor wire dict), `theme_models.py` (the schema is the wire
|
||||
for `.ini` rendering), `log_registry.py` (JSON-L log shape), `presets.py`,
|
||||
`tool_presets.py`, `personas.py`, `workspace_manager.py`, `paths.py`,
|
||||
`gemini_cli_adapter.py`, `mcp_client.py` (the MCP wire-protocol parsers),
|
||||
`type_aliases.py` (`from_dict(raw: dict[str, Any])` classmethods — the
|
||||
literal definition of boundary), `session_logger.py` (writes JSONL).
|
||||
- **GENUINE VIOLATIONS** (audit should flag, baseline captures them so
|
||||
strict stays green until a migration track fixes): `hot_reloader.py`
|
||||
(`capture_state`/`restore_state(app, ...) -> dict[str, Any]` — internal
|
||||
state, could be a `HotReloadSnapshot` dataclass), `startup_profiler.py`
|
||||
(`snapshot() -> dict[str, Any]` — could be a `ProfilerSnapshot` dataclass).
|
||||
|
||||
So the audit must:
|
||||
1. Find every `dict[str, Any]` in function signatures (param + return +
|
||||
annotated assignment) in every `src/*.py`.
|
||||
2. For each site, check whether its enclosing function is allowlisted in
|
||||
`scripts/boundary_layer_allowlist.toml` (per-file + per-function entries
|
||||
with a `reason` field, mirroring the `audit_imports_whitelist.toml`
|
||||
contract).
|
||||
3. Exit 1 in `--strict` mode on any *un*-allowlisted site.
|
||||
4. Emit a `WHITELISTED` annotation per allowlisted file so the user sees the
|
||||
audit considered it (mirrors the `audit_imports.py` precedent).
|
||||
5. Ship an initial `boundary_layer_allowlist.toml` listing the ~14 legitimate
|
||||
boundary files identified above, each with a `reason` field documenting
|
||||
why it's at the wire.
|
||||
|
||||
### Verified `Optional[T]` Return-Type Distribution on master (the blast-radius for GAP-2)
|
||||
|
||||
Same AST scan, but counting `Optional[X]` return annotations:
|
||||
- **Total `RETURN_OPTIONAL` violations: 3, in 1 file** (`src/history.py`)
|
||||
- **Total `PARAM_OPTIONAL` (warning only, never blocks strict): 119 across many files**
|
||||
— these are legal per `error_handling.md` ("argument types that may be
|
||||
`None` describe a caller choice, not a runtime failure").
|
||||
|
||||
So widening the audit from 4 files → all `src/*.py` surfaces **3 new strict
|
||||
violations** in `src/history.py`. The existing `audit_optional_in_3_files.py`
|
||||
already covers the 4 baseline files (all clean). This track adds the 3
|
||||
`history.py` sites to a new `audit_optional_returns.baseline.json` so the
|
||||
widened strict gate stays green until cruft_elimination Phase 6 (which owns
|
||||
those 3 sites) actually migrates them. The 3 sites are documented in the
|
||||
allowlist; they are NOT fixed by this track (out of scope; the fix belongs to
|
||||
the cruft_elimination Phase 6 Optional[T]-migration work).
|
||||
|
||||
## Goals
|
||||
|
||||
- **G1.** A working `scripts/audit_boundary_layer.py` that AST-scans all
|
||||
`src/*.py` for `dict[str, Any]` in function signatures (params, returns,
|
||||
annotated locals) and exits 1 in `--strict` mode on any un-allowlisted site.
|
||||
|
||||
- **G2.** A working `scripts/boundary_layer_allowlist.toml` that declares the
|
||||
legitimate boundary functions per file, each with a `reason` field, modeled
|
||||
on `audit_imports_whitelist.toml` (with `--show-allowlist` and
|
||||
`--no-allowlist` flags mirroring the imports whitelist precedent).
|
||||
|
||||
- **G3.** `audit_optional_in_3_files.py` renamed to
|
||||
`audit_optional_returns.py`, `BASELINE_FILES` replaced with a `src/*.py`
|
||||
glob, docstrings updated to drop the "3 files" fiction. The 3 `history.py`
|
||||
violations baselined in `audit_optional_returns.baseline.json` so strict
|
||||
stays green. Existing strict callers (`code_path_audit_20260607` referenced
|
||||
the old name — update or alias accordingly).
|
||||
|
||||
- **G4.** `python.md` §17 enforcement inventory (lines 449-456) corrected to
|
||||
match post-track reality: `audit_boundary_layer.py` implemented, the renamed
|
||||
`audit_optional_returns.py` "scans all `src/*.py`", `audit_imports.py`
|
||||
marked implemented (it already is), and the inventory's "Pre-commit: every
|
||||
commit MUST pass all four audits" line updated to "five audits" (or
|
||||
whatever the actual post-track count is).
|
||||
|
||||
- **G5.** `conductor/code_styleguides/error_handling.md` and
|
||||
`conductor/code_styleguides/python.md` references to the renamed script
|
||||
updated (any line saying `audit_optional_in_3_files.py` ->
|
||||
`audit_optional_returns.py`, except the one legacy cross-reference note
|
||||
in `python.md:359` documenting the rename history).
|
||||
|
||||
- **G6.** New tests in `tests/test_audit_boundary_layer.py` (≥10 tests:
|
||||
finder detects `dict[str, Any]` in return / param / local annotation;
|
||||
allowlist suppresses findings + emits WHITELISTED; `--strict` exits 1 on
|
||||
un-allowlisted site, exits 0 on allowlisted; `--json` output shape; missing
|
||||
file handling; syntax error handling).
|
||||
|
||||
- **G7.** New/updated tests in `tests/test_audit_optional_returns.py`
|
||||
(or update existing test file if one references the old name): ≥5 tests
|
||||
confirming the widened scope, the rename, baseline reading, and
|
||||
`--strict` behavior.
|
||||
|
||||
- **G8.** End-of-track report at
|
||||
`docs/reports/TRACK_COMPLETION_enforcement_gap_closure_20260627.md`
|
||||
documenting what shipped + the residual violation baselines + any
|
||||
contradictions from `CONTRADICTIONS_REPORT_20260627.md` closed (C1, C2,
|
||||
C3-partial, C18-partial, C21) and which remain (C5, C6, C16, C17 — those
|
||||
are docs-sync items deferred until tier2 stabilizes, per user directive
|
||||
2026-06-27).
|
||||
|
||||
## Functional Requirements
|
||||
|
||||
### FR1: `scripts/audit_boundary_layer.py`
|
||||
|
||||
- **CLI contract** mirrors `audit_exception_handling.py` + `audit_imports.py`:
|
||||
- `uv run python scripts/audit_boundary_layer.py` — informational (exits 0)
|
||||
- `uv run python scripts/audit_boundary_layer.py --strict` — exits 1 on
|
||||
any un-allowlisted `dict[str, Any]` signature site
|
||||
- `uv run python scripts/audit_boundary_layer.py --json` — JSON output
|
||||
- `uv run python scripts/audit_boundary_layer.py --show-allowlist` —
|
||||
prints the current allowlist + reasons, exits 0
|
||||
- `uv run python scripts/audit_boundary_layer.py --no-allowlist` —
|
||||
audits all sites regardless of allowlist (for one-off audits)
|
||||
- **Detection contract** — finds `dict[str, Any]` in:
|
||||
- function return annotations (`def f(...) -> dict[str, Any]`)
|
||||
- function parameter annotations (`def f(x: dict[str, Any])`)
|
||||
- annotated assignments to locals at function scope
|
||||
(`acc: dict[str, dict[str, Any]] = {}` — common pattern in vendor adapters)
|
||||
- **Allowlist contract** — reads `scripts/boundary_layer_allowlist.toml`.
|
||||
Per-file entries: `[allowlist."<relative_path>"] reason = "..."`. Within
|
||||
an allowlisted file, ALL `dict[str, Any]` sites are suppressed with a
|
||||
single `WHITELISTED` annotation per file (mirrors `audit_imports.py`
|
||||
precedent; per-line entries would be brittle because the same file has
|
||||
multiple boundary functions). Use `--no-allowlist` to ignore the allowlist.
|
||||
- **Coverage:** all `src/*.py`. The audit does NOT traverse `tests/`,
|
||||
`scripts/`, `simulation/` — those aren't subject to §17.7.
|
||||
- **Defaults:** informational mode prints a summary table (file, sites,
|
||||
allowlisted?) + a list of violations. `--strict` prints the same and
|
||||
exits 1 if there are un-allowlisted sites.
|
||||
- **Source:** 1-space indent, no comments in body, type-hinted, docstrings
|
||||
where the contract is non-obvious. Module docstring explains the §17.7
|
||||
contract + the allowlist pattern.
|
||||
|
||||
### FR2: `scripts/boundary_layer_allowlist.toml`
|
||||
|
||||
- TOML file modeled on `audit_imports_whitelist.toml`:
|
||||
- Header comment block explaining the purpose + the format.
|
||||
- "Last reviewed: 2026-06-27"
|
||||
- `[allowlist."<relative_path>"]` entries for each legitimate boundary
|
||||
file with a `reason` field documenting why it's at the wire boundary.
|
||||
- **Initial contents:** the ~14 legitimate boundary files identified in the
|
||||
Current State Audit (`context_presets.py`, `events.py`,
|
||||
`openai_compatible.py`, `theme_models.py`, `log_registry.py`, `presets.py`,
|
||||
`tool_presets.py`, `personas.py`, `workspace_manager.py`, `paths.py`,
|
||||
`gemini_cli_adapter.py`, `mcp_client.py`, `type_aliases.py`,
|
||||
`session_logger.py`). The two genuine violators (`hot_reloader.py`,
|
||||
`startup_profiler.py`) are NOT in the allowlist — the audit will flag them
|
||||
on master, but `audit_boundary_layer.baseline.json` will record them so
|
||||
`--strict` stays green until a future track migrates them.
|
||||
|
||||
### FR3: Rename + widen `audit_optional_in_3_files.py` → `audit_optional_returns.py`
|
||||
|
||||
- **Rename:** `git mv scripts/audit_optional_in_3_files.py
|
||||
scripts/audit_optional_returns.py` (preserves git history).
|
||||
- **Code changes:**
|
||||
- Module docstring: drop "4 baseline files"; say "all `src/*.py` per
|
||||
§17 post-2026-06-27 widening".
|
||||
- `BASELINE_FILES: tuple[str, ...] = (...)` → `def _discover_src_files() ->
|
||||
list[Path]: return sorted(Path("src").glob("*.py"))` (the precedent is
|
||||
`audit_exception_handling.py`'s glob approach).
|
||||
- `audit_file()` is already generic — no logic change.
|
||||
- Output: the summary line says "scanned N files" with N = the count.
|
||||
- **Baseline file:** create `scripts/audit_optional_returns.baseline.json`
|
||||
recording the 3 `src/history.py` `RETURN_OPTIONAL` violations so
|
||||
`--strict` stays green. The strict-mode behavior: exit 1 if findings >
|
||||
baseline, exit 0 otherwise. (Mirrors `audit_weak_types.py`'s baseline +
|
||||
`--strict` contract — see `audit_weak_types.baseline.json`.)
|
||||
- **Backward-compat:** The old name `audit_optional_in_3_files.py` is gone.
|
||||
Any external references to the old name must be updated. (Per the
|
||||
pre-flight grep, references exist in `python.md:359`, `python.md:452`,
|
||||
and possibly `error_handling.md` — those are doc edits in G5. The
|
||||
`code_path_audit_20260607` track's plan referenced the old name as a
|
||||
cross-reference contract — that's historical; not updated.)
|
||||
|
||||
### FR4: `python.md` §17 enforcement inventory + §17.8 enforcement section
|
||||
|
||||
- **§17 inventory table (lines 449-456)** corrected:
|
||||
- Row for `dict[str, Any]` ban: `audit_weak_types.py` (implemented) +
|
||||
`audit_boundary_layer.py --strict` (implemented this track) — BOTH
|
||||
listed, with the boundary audit's note: "uses
|
||||
`scripts/boundary_layer_allowlist.toml`; use `--no-allowlist` to audit
|
||||
all `src/*.py` without suppression."
|
||||
- Row for `Optional[T]` returns: `audit_optional_returns.py` (renamed +
|
||||
widened to all `src/*.py` this track; reads
|
||||
`audit_optional_returns.baseline.json` for the 3 `history.py` residuals
|
||||
until cruft_elimination Phase 6).
|
||||
- Row for local imports + aliasing + repeated `from_dict()`:
|
||||
`audit_imports.py` — marked "✅ implemented" (CORRECTED from current
|
||||
"⚠️ not yet built").
|
||||
- Row for repeated `.from_dict()`: same as above (covered by
|
||||
`audit_imports.py`).
|
||||
- **§17.8 enforcement section (lines 357-362)** updated:
|
||||
- Bullet for `audit_optional_returns.py` → reflects rename + widening.
|
||||
- Bullet for `audit_imports.py` → marked implemented (drop the parenthetical
|
||||
"planned in §17.9a").
|
||||
- Bullet for "boundary_layer audit (planned...)" → replaced with bullet
|
||||
for `audit_boundary_layer.py --strict` (implemented, references
|
||||
`boundary_layer_allowlist.toml`).
|
||||
- The "Pre-commit: every commit MUST pass all four audits above" line →
|
||||
"five audits" (weak_types, boundary_layer, optional_returns,
|
||||
exception_handling, imports).
|
||||
|
||||
### FR5: Test files
|
||||
|
||||
- **`tests/test_audit_boundary_layer.py`** (NEW) — ≥10 tests:
|
||||
- `test_finder_detects_dict_return_annotation` — synthetic .py with a
|
||||
`def f() -> dict[str, Any]: ...` → finding emitted.
|
||||
- `test_finder_detects_dict_param_annotation` — `def f(x: dict[str, Any])`
|
||||
→ finding emitted.
|
||||
- `test_finder_detects_dict_local_assignment` — `acc: dict[str, Any] = {}`
|
||||
inside a function → finding emitted.
|
||||
- `test_finder_ignores_non_dict_any` — `def f() -> dict[str, int]` → no
|
||||
finding.
|
||||
- `test_allowlist_suppresses_findings` — file in allowlist → findings
|
||||
suppressed, `WHITELISTED` annotation emitted instead.
|
||||
- `test_strict_exits_1_on_violation` — un-allowlisted violation → exit 1.
|
||||
- `test_strict_exits_0_when_allowlisted` — allowlisted file → exit 0.
|
||||
- `test_json_output_shape` — `--json` output has the expected top-level
|
||||
keys (`files_scanned`, `files_with_findings`, `total_findings`,
|
||||
`by_kind`, `findings`).
|
||||
- `test_missing_file_handling` — referenced file absent → graceful
|
||||
`MISSING_FILE` finding, not a crash.
|
||||
- `test_syntax_error_handling` — malformed .py → graceful `SYNTAX_ERROR`
|
||||
finding, not a crash.
|
||||
- `test_show_allowlist_flag` — `--show-allowlist` prints entries, exits 0.
|
||||
- **`tests/test_audit_optional_returns.py`** (NEW) — ≥5 tests:
|
||||
- `test_renamed_script_exists` — `scripts/audit_optional_returns.py`
|
||||
exists; `scripts/audit_optional_in_3_files.py` does NOT.
|
||||
- `test_scans_all_src_files` — audit finds a synthetic `Optional[X]`
|
||||
return in a new file under `src/` that wasn't in the old 4-file
|
||||
baseline. (Use `monkeypatch` to point at a `tmp_path` src/ tree.)
|
||||
- `test_baseline_reading_keeps_strict_green` — with 3 known `history.py`
|
||||
sites baselined, `--strict` exits 0.
|
||||
- `test_strict_exits_1_above_baseline` — add 1 new `Optional[X]` return
|
||||
not in baseline → exit 1.
|
||||
- `test_param_optional_is_warning_not_strict` — `PARAM_OPTIONAL`
|
||||
findings never cause `--strict` to exit 1.
|
||||
|
||||
## Non-Functional Requirements
|
||||
|
||||
- **1-space indentation** for all Python code (hard rule per workflow.md).
|
||||
- **No comments in body** per AGENTS.md "No comments to source code".
|
||||
- **CRLF line endings** preserved on Windows (use `manual-slop_edit_file`
|
||||
MCP tool, not native `edit`, to preserve formatting per workflow.md).
|
||||
- **Atomic per-task commits** — never batch; one task = one commit + one
|
||||
plan/state update commit.
|
||||
- **No diagnostic noise** — no `sys.stderr.write("[FOO] ...")` lines in
|
||||
the audit scripts.
|
||||
- **`--json` mode** produces machine-readable output for CI integration.
|
||||
- **Default mode** is informational (exit 0) per the precedent of every
|
||||
other audit script; `--strict` is the CI gate.
|
||||
- **Performance** — the audit scans all `src/*.py` (~66 files); AST parse
|
||||
+ walk should complete in well under 1 second wall-clock (the existing
|
||||
`audit_weak_types.py` does the same scale and is sub-second).
|
||||
|
||||
## Architecture Reference
|
||||
|
||||
- **`docs/guide_meta_boundary.md`** — the domain-distinction rule; the
|
||||
boundary layer is an Application concept, not a meta-tooling one.
|
||||
- **`docs/reports/boundary_layer_20260628.md`** — the *report* this audit
|
||||
*implements*. Lists every legitimate `Metadata` usage and explains why
|
||||
each is at the wire boundary.
|
||||
- **`conductor/code_styleguides/python.md` §17.7** — the §17.7 contract:
|
||||
"the ONLY place these patterns are allowed is at the literal wire
|
||||
boundary — the function that calls `tomllib.load()`, `json.loads()`, or
|
||||
a vendor SDK's response parser. The boundary is 2-3 functions per file."
|
||||
- **`conductor/code_styleguides/data_oriented_design.md` §8.5** — the
|
||||
Python Type Promotion Mandate (the canonical rule this audit enforces).
|
||||
- **`conductor/code_styleguides/error_handling.md`** — the `Optional[T]`
|
||||
ban (and the `Result[T]` + `NIL_T` replacement pattern).
|
||||
- **`scripts/audit_imports.py` + `scripts/audit_imports_whitelist.toml`** —
|
||||
the precedent template: AST scan + per-file allowlist + `--strict` CI gate
|
||||
+ `--json` / `--show-whitelist` / `--no-whitelist` flags. The new
|
||||
`audit_boundary_layer.py` should match this contract closely.
|
||||
- **`scripts/audit_weak_types.py` + `scripts/audit_weak_types.baseline.json`** —
|
||||
the precedent for the `--strict` baseline-JSOא contract (baseline of known
|
||||
violations; `--strict` exits 1 if current findings exceed baseline). The
|
||||
renamed `audit_optional_returns.py` reuses this pattern for the 3
|
||||
`history.py` residuals.
|
||||
- **`docs/reports/CONTRADICTIONS_REPORT_20260627.md`** — the source of the
|
||||
contradictions this track closes: C1 (audit name vs behavior), C2
|
||||
(Optional ban scope ambiguity), C3 (audit_imports "planned" but actually
|
||||
built), C18 (2/7 vs actually 4/7 patterns audited), C21 (script name).
|
||||
- **`docs/reports/TRACK_COMPLETION_post_module_taxonomy_de_cruft_20260627.md`**
|
||||
— current state of the running parallel track; confirms zero file-overlap.
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- **Fixing the 3 `src/history.py` `Optional[T]` returns.** Those belong to
|
||||
`cruft_elimination_20260627` Phase 6 (the deferred Optional[T]-returns
|
||||
migration work). This track only *baselines* them so the widened strict
|
||||
gate stays green; the actual migration is the future track's job.
|
||||
- **Fixing the 2 `hot_reloader.py` + `startup_profiler.py` `dict[str, Any]`
|
||||
violations.** Same logic: baseline only; a future track migrates them to
|
||||
typed dataclasses (`HotReloadSnapshot`, `ProfilerSnapshot`).
|
||||
- **Docs-count drift in `docs/Readme.md`** (providers 5→8, tests 322→251,
|
||||
commands 50+→33). Per user directive 2026-06-27: wait for tier2 branch
|
||||
to stabilize before touching `docs/Readme.md`.
|
||||
- **Styleguide §10 Anti-OOP self-contradiction (C16)** and
|
||||
**`type_aliases.md` line 19 table (C17)** — both deferred per user
|
||||
directive (they describe code state that only exists post-merge of the
|
||||
tier2 taxonomy branches; fixing them now would make master's docs
|
||||
describe code master doesn't have).
|
||||
- **`RAGChunk.id` field in `guide_rag.md` (C6)** — same branch-sensitivity
|
||||
reason; deferred.
|
||||
- **Building the "repeated `.from_dict()` in same expression" enforcement.**
|
||||
`audit_imports.py` already covers it per §17.9c. No new script needed.
|
||||
- **Building `scripts/audit_optional_returns.py` baseline migration path.**
|
||||
The 3 `history.py` sites are simply added to the initial baseline JSON;
|
||||
no migration script is needed.
|
||||
- **Wire `--strict` mode of `audit_boundary_layer.py` into actual pre-commit
|
||||
hooks in the main repo's `.git/hooks/`.** Per C4 in the contradictions
|
||||
report, pre-commit enforcement is sandbox-only for now; main-repo wiring
|
||||
is a separate track.
|
||||
- **Touching any `src/*.py` source.** This track is pure audit +
|
||||
styleguide + tests. Zero `src/` edits.
|
||||
@@ -0,0 +1,64 @@
|
||||
# Track state for enforcement_gap_closure_20260627
|
||||
# Initialized by Tier 1 Orchestrator on 2026-06-27.
|
||||
# Implementation delegated to Tier 2 (autonomous) or Tier 3 worker dispatch.
|
||||
|
||||
[meta]
|
||||
track_id = "enforcement_gap_closure_20260627"
|
||||
name = "Enforcement Gap Closure (Boundary-Layer Audit + Optional[T] Audit Widening)"
|
||||
status = "active"
|
||||
current_phase = 0 # 0 = pre-Phase 1; bump to 1 when implementation starts
|
||||
last_updated = "2026-06-27"
|
||||
|
||||
[blocked_by]
|
||||
# None. This track is parallel-safe against the running
|
||||
# tier2/post_module_taxonomy_de_cruft_20260627 branch (zero file overlap
|
||||
# verified by Tier 1 against ddcec7b0 + TRACK_COMPLETION file-level changes).
|
||||
|
||||
[blocks]
|
||||
# None. Follow-up tracks (history.py Optional migration, hot_reloader/
|
||||
# startup_profiler dict migration) are documented in metadata.json but not
|
||||
# formally tracked here.
|
||||
|
||||
[phases]
|
||||
# All 4 phases per plan.md. checkpointsha filled when the phase checkpoint
|
||||
# commit is made by the implementing Tier 2/Tier 3.
|
||||
phase_1 = { status = "pending", checkpointsha = "", name = "Boundary-Layer Audit Script (script + allowlist + 10 tests)" }
|
||||
phase_2 = { status = "pending", checkpointsha = "", name = "Optional[T] Audit Rename + Widening (rename + 5 tests + baseline JSON)" }
|
||||
phase_3 = { status = "pending", checkpointsha = "", name = "Styleguide Doc Reconciliation (python.md s17 + cross-ref sweep)" }
|
||||
phase_4 = { status = "pending", checkpointsha = "", name = "End-of-Track Report + State Update + User Sign-off" }
|
||||
|
||||
[tasks]
|
||||
# Phase 1: boundary-layer audit script + allowlist + tests
|
||||
t1_1 = { status = "pending", commit_sha = "", description = "Write 10 failing tests in tests/test_audit_boundary_layer.py (Red phase)" }
|
||||
t1_2 = { status = "pending", commit_sha = "", description = "Implement scripts/audit_boundary_layer.py per spec FR1 (finder + allowlist + strict + json + --show-allowlist + --no-allowlist + --src)" }
|
||||
t1_3 = { status = "pending", commit_sha = "", description = "Write scripts/boundary_layer_allowlist.toml with ~14 boundary files + reasons" }
|
||||
t1_4 = { status = "pending", commit_sha = "", description = "Run tests/test_audit_boundary_layer.py -v (Green phase); verify all 10 pass" }
|
||||
# Phase 2: Optional audit rename + widening
|
||||
t2_1 = { status = "pending", commit_sha = "", description = "Write 5 failing tests in tests/test_audit_optional_returns.py (Red phase)" }
|
||||
t2_2 = { status = "pending", commit_sha = "", description = "git mv audit_optional_in_3_files.py -> audit_optional_returns.py + widen glob to all src/*.py + add --src flag + create audit_optional_returns.baseline.json with 3 history.py residuals" }
|
||||
t2_3 = { status = "pending", commit_sha = "", description = "Run tests/test_audit_optional_returns.py -v (Green phase); verify all 5 pass" }
|
||||
# Phase 3: styleguide doc reconciliation
|
||||
t3_1 = { status = "pending", commit_sha = "", description = "Edit conductor/code_styleguides/python.md s17 inventory table (lines 449-456) + s17.8 enforcement section (lines 357-362) per spec FR4" }
|
||||
t3_2 = { status = "pending", commit_sha = "", description = "Cross-reference sweep for audit_optional_in_3_files.py in conductor/ + docs/ (update enforcement references; preserve historical)" }
|
||||
# Phase 4: end-of-track
|
||||
t4_1 = { status = "pending", commit_sha = "", description = "Run the 7-audit strict suite (verify all pass; the 2 boundary + 3 Optional residuals baselined)" }
|
||||
t4_2 = { status = "pending", commit_sha = "", description = "Write docs/reports/TRACK_COMPLETION_enforcement_gap_closure_20260627.md per spec G8" }
|
||||
t4_3 = { status = "pending", commit_sha = "", description = "Update conductor/tracks.md + conductor/chronology.md + state.toml -> status='completed'" }
|
||||
t4_4 = { status = "pending", commit_sha = "", description = "Conductor - User Manual Verification (PAUSE for user sign-off)" }
|
||||
|
||||
[verification]
|
||||
# Filled as phases complete.
|
||||
phase_1_complete = false
|
||||
phase_2_complete = false
|
||||
phase_3_complete = false
|
||||
phase_4_complete = false
|
||||
all_7_audit_gates_strict_pass = false
|
||||
contradictions_closed_c1_c2_c3_partial_c18_partial_c21 = false
|
||||
|
||||
[scope_summary]
|
||||
# Populated by Tier 1; static scope summary for re-warm after compaction.
|
||||
new_files_count = 7
|
||||
modified_files_count = 5
|
||||
deleted_files_count = 1 # via git mv (audit_optional_in_3_files.py -> audit_optional_returns.py)
|
||||
parallel_safe_against_post_module_taxonomy_de_cruft = true
|
||||
parallel_safety_evidence = "Tier 1 verified zero file overlap against ddcec7b0 + TRACK_COMPLETION_post_module_taxonomy_de_cruft_20260627.md file-level changes table on 2026-06-27"
|
||||
@@ -0,0 +1,256 @@
|
||||
# Tier 2 Startup Brief: module_taxonomy_refactor_20260627
|
||||
|
||||
## Context
|
||||
|
||||
The user reported `models.py` is a "dumping ground" (1044 lines, 36 classes, 5+ unrelated domains). They want a clean taxonomy. Per their principle: **unify unless there's a good reason (import load times, definition pollution)**. No sub-directories. Prefix naming.
|
||||
|
||||
## MANDATORY Pre-Action Reading (per agent protocol)
|
||||
|
||||
1. `AGENTS.md` (project root) — operating rules, especially "File Size and Naming Convention" HARD RULE
|
||||
2. `conductor/workflow.md` — the workflow
|
||||
3. `conductor/edit_workflow.md` — the edit workflow
|
||||
4. `conductor/code_styleguides/data_oriented_design.md` — "Prefer Fewer Types" principle
|
||||
5. `conductor/code_styleguides/error_handling.md` — the `Result[T]` convention (Rule #0: read first)
|
||||
6. `conductor/code_styleguides/type_aliases.md` — the 10 TypeAliases convention
|
||||
7. `conductor/code_styleguides/code_path_audit.md` — code path audit styleguide
|
||||
8. `docs/reports/FOLLOWUP_module_taxonomy_20260627.md` — the audit that motivated this track
|
||||
9. `conductor/tracks/cruft_elimination_20260627/SPEC_CORRECTION_phase_2.md` — the related spec correction
|
||||
10. `src/models.py` — the 1044-line file to split (read in full)
|
||||
|
||||
**First commit of this track must include** `TIER-2 READ <list> before module_taxonomy_refactor_20260627` in the message.
|
||||
|
||||
## The Decision Rule (the user's principle)
|
||||
|
||||
**Split a file only if ONE of:**
|
||||
- Import load time: the file has heavy imports (vendored SDKs, ML models) that some code paths don't need
|
||||
- Definition pollution: the file mixes 3+ unrelated domains with 30+ classes/functions
|
||||
|
||||
**Otherwise: keep in a single file.** Move imports around, but don't fragment.
|
||||
|
||||
**No sub-directories.** All files at `src/` flat with prefix naming.
|
||||
|
||||
## The 3 Refactors (only 3 justified)
|
||||
|
||||
### Refactor 1: MERGE 5 ImGui LEAKS into `gui_2.py`
|
||||
|
||||
**Justification:** User explicit directive: "all ImGui rendering should be in `gui_2.py`. Only exception: `imgui_scopes.py`." Clear violation of the GUI boundary.
|
||||
|
||||
| File | Lines | Content | Destination |
|
||||
|---|---:|---|---|
|
||||
| `src/bg_shader.py` | 66 | ImGui background shader | `src/gui_2.py` |
|
||||
| `src/shaders.py` | 33 | ImGui shader code | `src/gui_2.py` |
|
||||
| `src/command_palette.py` | 165 | ImGui command palette UI | `src/gui_2.py` |
|
||||
| `src/diff_viewer.py` | 164 | ImGui diff viewer UI | `src/gui_2.py` |
|
||||
| `src/patch_modal.py` | 102 | ImGui patch modal UI | `src/gui_2.py` |
|
||||
|
||||
**Verification:** `git grep -l "imgui_bundle\|from imgui\\." -- 'src/*.py'` returns ONLY `gui_2.py` + `imgui_scopes.py`.
|
||||
|
||||
### Refactor 2: MERGE 2 vendor files into `ai_client.py`
|
||||
|
||||
**Justification:** User explicit directive: "vendor_capabilities.py and vendor_state.py are related to ai_client.py... they're the ai vendoring layer."
|
||||
|
||||
| File | Lines | Content | Destination |
|
||||
|---|---:|---|---|
|
||||
| `src/vendor_capabilities.py` | 85 | Vendor capability flags | `src/ai_client.py` |
|
||||
| `src/vendor_state.py` | 78 | Vendor state telemetry | `src/ai_client.py` |
|
||||
|
||||
**Growth:** `ai_client.py` 3147 → ~3310 lines. Justified: unified vendor layer, no fragmentation.
|
||||
|
||||
### Refactor 3: SPLIT `models.py` (the only justified split)
|
||||
|
||||
**Justification:** 5+ unrelated domains, 36 classes, 1044 lines. **Clear definition pollution** (the user's threshold: "3+ unrelated domains with 30+ classes").
|
||||
|
||||
**The new taxonomy:**
|
||||
|
||||
| New file | What it gets | Lines (est.) |
|
||||
|---|---|---:|
|
||||
| `src/mma.py` | MMA Core: ThinkingSegment, Ticket, Track, WorkerContext, TrackState | ~250 |
|
||||
| `src/project.py` | ProjectContext + 5 sub + config I/O + parse_history_entries | ~200 |
|
||||
| `src/project_files.py` | FileItem, ContextPreset, ContextFileEntry, NamedViewPreset, Preset | ~150 |
|
||||
|
||||
**6+ classes merge into existing sub-system files (NOT new files):**
|
||||
|
||||
| Class from `models.py` | Destination |
|
||||
|---|---|
|
||||
| `Persona` | `src/personas.py` (93 lines, exists) |
|
||||
| `Tool`, `ToolPreset` | `src/tool_presets.py` (123 lines, exists) |
|
||||
| `BiasProfile` | `src/tool_bias.py` (63 lines, exists) |
|
||||
| `TextEditorConfig`, `ExternalEditorConfig` | `src/external_editor.py` (129 lines, exists) |
|
||||
| `MCPServerConfig`, `MCPConfiguration`, `VectorStoreConfig`, `RAGConfig`, `load_mcp_config` | `src/mcp_client.py` (1803 lines, exists) |
|
||||
| `WorkspaceProfile` | `src/workspace_manager.py` (73 lines, exists) |
|
||||
|
||||
**`src/models.py` reduced:**
|
||||
- ~30 lines: Pydantic proxy helpers (`_create_generate_request`, `_create_confirm_request`, `__getattr__`)
|
||||
- OR delete the file entirely if it becomes essentially empty (it's not a "system" file; just a temporary holder)
|
||||
|
||||
## The Bonus Refactor: DELETE `AGENT_TOOL_NAMES` (redundant)
|
||||
|
||||
**User caught this:** "isn't AGENT_TOOL_NAMES a redundant thing that's directly associated with the mcp_client.py?"
|
||||
|
||||
YES. The existing test `test_tool_names_subset_of_models_agent_tool_names` literally asserts:
|
||||
```python
|
||||
native_names = mcp_tool_specs.tool_names()
|
||||
agent_names = set(models.AGENT_TOOL_NAMES)
|
||||
assert not missing_in_agent, f"Native tools not in AGENT_TOOL_NAMES: {missing_in_agent}"
|
||||
```
|
||||
|
||||
So `AGENT_TOOL_NAMES` is just a hardcoded snapshot of `mcp_tool_specs.tool_names()`. **DELETE it, not move it.**
|
||||
|
||||
**8 consumer sites to update:**
|
||||
- `src/app_controller.py:2110, 2972, 3273` (3 sites)
|
||||
- `tests/test_arch_boundary_phase2.py:23, 29, 31, 32, 33` (5 sites)
|
||||
|
||||
**Pattern:** `from src.models import AGENT_TOOL_NAMES; for tool in AGENT_TOOL_NAMES: ...` → `from src import mcp_tool_specs; for tool in mcp_tool_specs.tool_names(): ...`
|
||||
|
||||
## Net scope
|
||||
|
||||
- 7 files deleted (5 ImGui + 2 vendor)
|
||||
- 3 new files (mma.py, project.py, project_files.py)
|
||||
- 10 files modified (7 sub-system merges + ai_client.py + gui_2.py + app_controller.py)
|
||||
- 1 file potentially deleted (models.py)
|
||||
- Net: 65 → 61 files (or 60 if models.py is eliminated)
|
||||
- 22 atomic commits
|
||||
|
||||
## Coordination with `cruft_elimination_20260627`
|
||||
|
||||
The `cruft_elimination_20260627` track has a Phase 2 commit that put `ProjectContext` in `models.py` (the wrong location per this track). **DO NOT** merge that `cruft` commit until this refactor is ready. The refactor moves `ProjectContext` to `project.py` as part of Phase 3.
|
||||
|
||||
## Pre-flight verification
|
||||
|
||||
```bash
|
||||
# Verify the current state of src/
|
||||
ls src/*.py | wc -l
|
||||
# Expect: 65
|
||||
|
||||
# Verify models.py is 1044 lines
|
||||
wc -l src/models.py
|
||||
# Expect: 1044
|
||||
|
||||
# Verify ImGui LEAKS exist
|
||||
ls src/bg_shader.py src/shaders.py src/command_palette.py src/diff_viewer.py src/patch_modal.py 2>&1 | grep -v "No such"
|
||||
# Expect: all 5 exist
|
||||
|
||||
# Verify vendor files exist
|
||||
ls src/vendor_capabilities.py src/vendor_state.py 2>&1 | grep -v "No such"
|
||||
# Expect: both exist
|
||||
|
||||
# Verify AGENT_TOOL_NAMES is referenced
|
||||
git grep "AGENT_TOOL_NAMES" HEAD -- 'src/*.py' 'tests/*.py' | wc -l
|
||||
# Expect: 8 hits (3 app_controller + 5 test_arch_boundary + 1 def + ... )
|
||||
|
||||
# Verify all 7 audit gates pass (baseline)
|
||||
uv run python scripts/audit_weak_types.py --strict
|
||||
uv run python scripts/generate_type_registry.py --check
|
||||
uv run python scripts/audit_main_thread_imports.py
|
||||
uv run python scripts/audit_no_models_config_io.py
|
||||
uv run python scripts/audit_code_path_audit_coverage.py --input-dir docs/reports/code_path_audit/latest --strict
|
||||
uv run python scripts/audit_exception_handling.py --strict
|
||||
uv run python scripts/audit_optional_in_3_files.py --strict
|
||||
# All exit 0
|
||||
```
|
||||
|
||||
## Post-track verification (after Phase 5)
|
||||
|
||||
```bash
|
||||
# VC1: ImGui imports limited to gui_2.py + imgui_scopes.py
|
||||
git grep -l "imgui_bundle\|from imgui\\." HEAD -- 'src/*.py'
|
||||
# Expect: gui_2.py, imgui_scopes.py
|
||||
|
||||
# VC2-3: ImGui LEAKS + vendor files deleted
|
||||
ls src/bg_shader.py src/shaders.py src/command_palette.py src/diff_viewer.py src/patch_modal.py src/vendor_capabilities.py src/vendor_state.py 2>&1 | grep -v "No such"
|
||||
# Expect: (no output)
|
||||
|
||||
# VC5-7: New files work
|
||||
uv run python -c "from src.mma import ThinkingSegment, Ticket, Track, WorkerContext, TrackState"
|
||||
uv run python -c "from src.project import ProjectContext, ProjectMeta, ProjectOutput, ProjectFiles, ProjectScreenshots, ProjectDiscussion, _clean_nones, load_config_from_disk, save_config_to_disk, parse_history_entries"
|
||||
uv run python -c "from src.project_files import FileItem, ContextPreset, ContextFileEntry, NamedViewPreset, Preset"
|
||||
# All succeed
|
||||
|
||||
# VC8: 6+ dataclasses in proper sub-system files
|
||||
uv run python -c "from src.personas import Persona; from src.tool_presets import Tool, ToolPreset; from src.tool_bias import BiasProfile; from src.external_editor import TextEditorConfig, ExternalEditorConfig; from src.mcp_client import MCPServerConfig, MCPConfiguration, VectorStoreConfig, RAGConfig, load_mcp_config; from src.workspace_manager import WorkspaceProfile"
|
||||
# Expect: no ImportError
|
||||
|
||||
# VC9: AGENT_TOOL_NAMES deleted
|
||||
git grep "AGENT_TOOL_NAMES" HEAD -- 'src/*.py' 'tests/*.py' | wc -l
|
||||
# Expect: 0
|
||||
|
||||
# VC10: models.py reduced or eliminated
|
||||
ls src/models.py 2>&1
|
||||
# Expect: file not found (or <= 30 lines if kept)
|
||||
|
||||
# VC11-12: audit gates + batched suite
|
||||
# Same as current baseline
|
||||
```
|
||||
|
||||
## Per-phase patterns for Tier 3 workers
|
||||
|
||||
### Per-file atomic commits
|
||||
Each ImGui merge, each vendor merge, each models.py split, each AGENT_TOOL_NAMES site update is a separate commit. Per-file = atomic rollback.
|
||||
|
||||
### Pattern: move content + delete source
|
||||
|
||||
```bash
|
||||
# 1. Read source file
|
||||
cat src/bg_shader.py
|
||||
|
||||
# 2. Add to destination file (with region marker)
|
||||
manual-slop_edit_file gui_2.py
|
||||
# add at appropriate location:
|
||||
#region: Bg Shader (moved from src/bg_shader.py)
|
||||
# ... content ...
|
||||
#endregion
|
||||
|
||||
# 3. Update import sites across the codebase
|
||||
git grep "from src.bg_shader" -- 'src/*.py' 'tests/*.py'
|
||||
# Replace each with: from src.gui_2 import
|
||||
|
||||
# 4. Delete source file
|
||||
git rm src/bg_shader.py
|
||||
|
||||
# 5. Verify
|
||||
uv run python -m pytest tests/test_<affected>.py -v
|
||||
```
|
||||
|
||||
### Pattern: split models.py
|
||||
|
||||
```python
|
||||
# 1. Create new file (e.g., src/mma.py)
|
||||
manual-slop_edit_file mma.py
|
||||
# Add the moved classes with proper imports
|
||||
|
||||
# 2. Update import sites
|
||||
git grep "from src.models import.*(ThinkingSegment|Ticket|Track|WorkerContext|TrackState)" -- 'src/*.py' 'tests/*.py'
|
||||
# Replace each with: from src.mma import
|
||||
|
||||
# 3. Remove from models.py
|
||||
manual-slop_edit_file models.py
|
||||
# Delete the moved class definitions
|
||||
|
||||
# 4. Verify
|
||||
uv run python -m pytest tests/test_mma_*.py -v
|
||||
```
|
||||
|
||||
### Style
|
||||
- 1-space indentation (project standard)
|
||||
- CRLF line endings
|
||||
- No comments in source code (per AGENTS.md)
|
||||
- Use `manual-slop_edit_file` for surgical edits
|
||||
- Per-phase regression-guard test runs after each phase
|
||||
|
||||
## Notes for Tier 2 reviewer
|
||||
|
||||
- The `cruft_elimination_20260627` track's Phase 2 commit put `ProjectContext` in `models.py`. Coordinate: that commit should NOT merge until this refactor is ready (or the cruft track should re-execute Phase 2 with the corrected file location per `SPEC_CORRECTION_phase_2.md`).
|
||||
- The `__getattr__` Pydantic lazy proxy in `models.py` is needed for circular import (src.ai_client imports ToolPreset/BiasProfile/Tool from src.models). After this refactor, the imports move to the new sub-system files (tool_presets.py, tool_bias.py), so the circular import is broken and the `__getattr__` may no longer be needed. Audit during execution.
|
||||
- The `models.py` docstring needs updating throughout the refactor to reflect the new scope.
|
||||
- If `models.py` becomes essentially empty after all moves, **delete the file entirely** (it's not a "system" file).
|
||||
|
||||
## See also
|
||||
|
||||
- `conductor/tracks/module_taxonomy_refactor_20260627/spec.md` — the spec (12 VCs)
|
||||
- `conductor/tracks/module_taxonomy_refactor_20260627/plan.md` — the 5-phase plan (22 atomic commits)
|
||||
- `conductor/tracks/module_taxonomy_refactor_20260627/metadata.json` — the metadata
|
||||
- `conductor/tracks/module_taxonomy_refactor_20260627/state.toml` — the state
|
||||
- `docs/reports/FOLLOWUP_module_taxonomy_20260627.md` — the audit
|
||||
- `conductor/tracks/cruft_elimination_20260627/SPEC_CORRECTION_phase_2.md` — the related spec correction
|
||||
- `AGENTS.md` — "File Size and Naming Convention" HARD RULE
|
||||
- `conductor/code_styleguides/data_oriented_design.md` — "Prefer Fewer Types" principle
|
||||
@@ -0,0 +1,78 @@
|
||||
{
|
||||
"track_id": "module_taxonomy_refactor_20260627",
|
||||
"name": "Module Taxonomy Refactor",
|
||||
"status": "active",
|
||||
"type": "cleanup",
|
||||
"date_created": "2026-06-27",
|
||||
"created_by": "tier1-orchestrator",
|
||||
"blocks": [],
|
||||
"blocked_by": {
|
||||
"cruft_elimination_20260627": "pending (the cruft track has a ProjectContext-in-models.py commit that needs to be coordinated)"
|
||||
},
|
||||
"scope": {
|
||||
"new_files": [
|
||||
"src/mma.py",
|
||||
"src/project.py",
|
||||
"src/project_files.py",
|
||||
"conductor/tracks/module_taxonomy_refactor_20260627/TIER2_STARTUP.md"
|
||||
],
|
||||
"modified_files": [
|
||||
"src/gui_2.py",
|
||||
"src/ai_client.py",
|
||||
"src/personas.py",
|
||||
"src/tool_presets.py",
|
||||
"src/tool_bias.py",
|
||||
"src/external_editor.py",
|
||||
"src/mcp_client.py",
|
||||
"src/workspace_manager.py",
|
||||
"src/app_controller.py",
|
||||
"tests/test_arch_boundary_phase2.py"
|
||||
],
|
||||
"deleted_files": [
|
||||
"src/bg_shader.py",
|
||||
"src/shaders.py",
|
||||
"src/command_palette.py",
|
||||
"src/diff_viewer.py",
|
||||
"src/patch_modal.py",
|
||||
"src/vendor_capabilities.py",
|
||||
"src/vendor_state.py"
|
||||
],
|
||||
"potentially_deleted_files": [
|
||||
"src/models.py"
|
||||
]
|
||||
},
|
||||
"verification_criteria": [
|
||||
"ImGui imports limited to gui_2.py + imgui_scopes.py",
|
||||
"5 ImGui LEAK files deleted (bg_shader, shaders, command_palette, diff_viewer, patch_modal)",
|
||||
"2 vendor files deleted (vendor_capabilities, vendor_state); symbols now in ai_client.py",
|
||||
"src/mma.py exists with MMA Core + TrackState",
|
||||
"src/project.py exists with ProjectContext + sub + config IO",
|
||||
"src/project_files.py exists with file-related dataclasses",
|
||||
"6+ dataclasses in proper sub-system files (Persona/Tool/Editor/MCP/Workspace)",
|
||||
"AGENT_TOOL_NAMES deleted; 8 consumer sites use mcp_tool_specs.tool_names()",
|
||||
"src/models.py reduced to <=30 lines (or eliminated)",
|
||||
"All 7 audit gates pass --strict (no regression)",
|
||||
"10/11 batched test tiers pass (RAG flake acceptable)"
|
||||
],
|
||||
"estimated_effort": {
|
||||
"method": "scope (per workflow.md \u00a7Tier 1 Track Initialization Rules). NO day estimates.",
|
||||
"scope": "1 source file split into 3 (mma.py, project.py, project_files.py) + 7 files deleted (5 ImGui + 2 vendor) + 7 files modified (ai_client.py, gui_2.py, 5 sub-system files) + 8 import sites updated for AGENT_TOOL_NAMES; 22 atomic commits total"
|
||||
},
|
||||
"risk_register": [
|
||||
"R1 (low): ImGui LEAKS move breaks existing tests (e.g., command_palette is referenced in commands.py) - mitigated by running full affected test set after each move; revert + fix on regression",
|
||||
"R2 (medium): Vendor merge into ai_client.py creates circular imports (PROVIDERS lazy proxy is the workaround) - mitigated by the lazy import pattern; verify by running full test suite after merge",
|
||||
"R3 (high): models.py split breaks 136 import sites - mitigated by per-file move with regression-guard tests after each; update imports systematically",
|
||||
"R4 (medium): 6+ 'merge into existing sub-system files' moves break those files' existing tests - mitigated by running affected test file after each merge",
|
||||
"R5 (low): AGENT_TOOL_NAMES deletion breaks test_arch_boundary_phase2.py - mitigated by updating the test to use mcp_tool_specs.tool_names(); cross-check that the test's expected tool names are in the registry",
|
||||
"R6 (high): The ProjectContext Phase 2 commit (in cruft_elimination_20260627) put ProjectContext in models.py; the new track moves it to project.py - needs to coordinate with the cruft track; the cruft track should NOT merge its ProjectContext-in-models.py commit until this refactor is ready",
|
||||
"R7 (low): The _create_generate_request etc. Pydantic proxies in models.py are used by api_hooks.py; if we move them to api_hooks.py we create a different topology - mitigated by auditing the consumers; if all in api_hooks.py, move them; if not, keep in models.py or move to a new api_models.py"
|
||||
],
|
||||
"out_of_scope": [
|
||||
"Renaming existing files for prefix consistency (multi_agent_conductor.py -> mma_conductor.py, etc.) - deferred to follow-up",
|
||||
"Refactoring aggregate.py (513 lines), app_controller.py (4869 lines), gui_2.py (7773 lines) - out of scope; these have natural boundaries",
|
||||
"Modifications to mcp_client.py other than merging the config dataclasses",
|
||||
"New src/<thing>.py files beyond the 3 justified ones (mma.py, project.py, project_files.py)",
|
||||
"The RAG test pre-existing flake (per docs/reports/SSDL_CAMPAIGN_ABORTED_20260624.md Out of Scope)",
|
||||
"Any Tier 2 spec rewrites (per the user's earlier 'don't fuck with commits' directive)"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,194 @@
|
||||
# Plan: module_taxonomy_refactor_20260627
|
||||
|
||||
5 phases, 12-15 tasks, 12+ atomic commits. Per-task TDD red-first. Tier 3 workers execute; Tier 2 reviews per phase.
|
||||
|
||||
## Phase 0: Pre-flight + TIER2_STARTUP (Tier 1, 0 commits, 1 file)
|
||||
|
||||
- [x] **Task 0.1** [Tier 1]: Create `conductor/tracks/module_taxonomy_refactor_20260627/TIER2_STARTUP.md` with:
|
||||
- Decision rule (user's principle): split ONLY for import load times or definition pollution
|
||||
- The 3 refactors (merge ImGui LEAKS, merge vendor files, split models.py)
|
||||
- 8 AGENT_TOOL_NAMES consumer sites
|
||||
- 5 ImGui LEAK files
|
||||
- 6+ sub-system merge destinations
|
||||
- MANDATORY Pre-Action Reading list
|
||||
- [x] **NOTE:** This task is done in the planning phase; no commit needed (TIER2_STARTUP.md is committed with the track artifacts in a single commit at the end)
|
||||
|
||||
## Phase 1: MERGE ImGui LEAKS into `gui_2.py` (5 commits, 1 per file)
|
||||
|
||||
**Focus:** 5 ImGui-using files that violate the "ImGui belongs in `gui_2.py`" boundary. Each is a separate commit for atomic rollback.
|
||||
|
||||
- [x] **Task 1.1** [Tier 3]: Move `src/bg_shader.py` (66 lines) → `src/gui_2.py` (add as section "Bg Shader (moved from src/bg_shader.py)")
|
||||
- HOW: `manual-slop_edit_file` to append to gui_2.py; `git mv` to delete bg_shader.py
|
||||
- SAFETY: Run `tests/test_imgui_scopes.py` + any tests that import from `src.bg_shader`
|
||||
- [x] **COMMIT 1.1:** `refactor(gui_2): merge bg_shader into gui_2; git rm src/bg_shader.py` (Tier 3)
|
||||
- [x] **Task 1.2-1.5** [Tier 3]: Same pattern for `shaders.py`, `command_palette.py`, `diff_viewer.py`, `patch_modal.py`
|
||||
- [x] **COMMITS 1.2-1.5:** One per file
|
||||
- [x] **VERIFICATION:** `git grep -l "imgui_bundle\|from imgui\\." -- 'src/*.py'` returns ONLY `gui_2.py` + `imgui_scopes.py`
|
||||
|
||||
## Phase 2: MERGE vendor files into `ai_client.py` (2 commits, 1 per file)
|
||||
|
||||
**Focus:** 2 vendor files that should be unified with `ai_client.py` per user directive.
|
||||
|
||||
- [x] **Task 2.1** [Tier 3]: Move `src/vendor_capabilities.py` (85 lines) → `src/ai_client.py` (add as section "Vendor Capabilities (moved from src/vendor_capabilities.py)")
|
||||
- HOW: `manual-slop_edit_file` to append to ai_client.py; `git mv` to delete vendor_capabilities.py
|
||||
- SAFETY: Run `tests/test_provider_state_migration.py` + any tests that import from `src.vendor_capabilities`
|
||||
- [x] **COMMIT 2.1:** `refactor(ai_client): merge vendor_capabilities into ai_client; git rm src/vendor_capabilities.py` (Tier 3)
|
||||
- [x] **Task 2.2** [Tier 3]: Same for `src/vendor_state.py` (78 lines)
|
||||
- [x] **COMMIT 2.2:** `refactor(ai_client): merge vendor_state into ai_client; git rm src/vendor_state.py` (Tier 3)
|
||||
|
||||
## Phase 3: SPLIT `models.py` (8 commits, 3 new files + 6 merges + 1 reduce)
|
||||
|
||||
**Focus:** `models.py` is the only file with clear definition pollution (5+ domains, 36 classes, 1044 lines). Split into `mma.py` + `project.py` + `project_files.py`; merge other classes into existing sub-system files; reduce `models.py`.
|
||||
|
||||
### Phase 3a: Create new files (3 commits)
|
||||
|
||||
- [x] **Task 3.1** [Tier 3]: Create `src/mma.py` with `ThinkingSegment`, `Ticket`, `Track`, `WorkerContext`, `TrackState` (moved from `models.py`)
|
||||
- HOW: `manual-slop_edit_file` to write the new file
|
||||
- Update imports in 5 files: `multi_agent_conductor.py`, `dag_engine.py`, `orchestrator_pm.py`, `conductor_tech_lead.py`, `mma_prompts.py`
|
||||
- SAFETY: Run `tests/test_mma_*.py` + `tests/test_orchestration_logic.py` + `tests/test_dag_engine.py` + `tests/test_conductor_engine_v2.py`
|
||||
- [x] **COMMIT 3.1:** `refactor(mma): create mma.py with MMA Core + TrackState (split from models.py)` (Tier 3)
|
||||
- [x] **Task 3.2** [Tier 3]: Create `src/project.py` with `ProjectContext` + 5 sub-dataclasses + config I/O (`_clean_nones`, `load_config_from_disk`, `save_config_to_disk`, `parse_history_entries`)
|
||||
- HOW: `manual-slop_edit_file` to write the new file
|
||||
- Update imports in `src/project_manager.py` (and any other consumer)
|
||||
- SAFETY: Run `tests/test_project_manager_*.py` + `tests/test_project_context_20260627.py` (new file from cruft track)
|
||||
- [x] **COMMIT 3.2:** `refactor(project): create project.py with ProjectContext + sub + config IO (split from models.py)` (Tier 3)
|
||||
- [x] **Task 3.3** [Tier 3]: Create `src/project_files.py` with `FileItem`, `ContextPreset`, `ContextFileEntry`, `NamedViewPreset`, `Preset`
|
||||
- HOW: `manual-slop_edit_file` to write the new file
|
||||
- Update imports in `src/aggregate.py`, `src/context_presets.py`, `src/gui_2.py`, `src/app_controller.py`
|
||||
- SAFETY: Run `tests/test_context_composition_*.py` + `tests/test_view_presets.py` + `tests/test_custom_slices_*.py`
|
||||
- [x] **COMMIT 3.3:** `refactor(project_files): create project_files.py (split from models.py)` (Tier 3)
|
||||
|
||||
### Phase 3b: Merge other classes into existing sub-system files (6 commits, 1 per destination)
|
||||
|
||||
- [x] **Task 3.4** [Tier 3]: Move `Persona` from `models.py` → `src/personas.py` (existing 93-line file)
|
||||
- HOW: `manual-slop_edit_file` to add Persona dataclass to personas.py; `manual-slop_edit_file` to remove from models.py
|
||||
- Update imports: `from src.models import Persona` → `from src.personas import Persona`
|
||||
- SAFETY: Run `tests/test_personas_*.py` + `tests/test_arch_boundary_*.py` (if Persona is tested there)
|
||||
- [x] **COMMIT 3.4:** `refactor(personas): move Persona dataclass from models.py to personas.py` (Tier 3)
|
||||
- [x] **Task 3.5** [Tier 3]: Move `Tool`, `ToolPreset` → `src/tool_presets.py` (existing 123-line file)
|
||||
- [x] **Task 3.6** [Tier 3]: Move `BiasProfile` → `src/tool_bias.py` (existing 63-line file)
|
||||
- [x] **Task 3.7** [Tier 3]: Move `TextEditorConfig`, `ExternalEditorConfig` → `src/external_editor.py` (existing 129-line file)
|
||||
- [x] **Task 3.8** [Tier 3]: Move `MCPServerConfig`, `MCPConfiguration`, `VectorStoreConfig`, `RAGConfig`, `load_mcp_config` → `src/mcp_client.py` (existing 1803-line file)
|
||||
- [x] **Task 3.9** [Tier 3]: Move `WorkspaceProfile` → `src/workspace_manager.py` (existing 73-line file)
|
||||
- [x] **COMMITS 3.5-3.9:** One per merge
|
||||
|
||||
### Phase 3c: Reduce `models.py` (1 commit)
|
||||
|
||||
- [x] **Task 3.10** [Tier 3]: After all moves, `src/models.py` should be ~30 lines (Pydantic proxies + AGENT_TOOL_NAMES)
|
||||
- HOW: `manual-slop_edit_file` to remove all moved classes; keep only the Pydantic proxy helpers
|
||||
- If `models.py` becomes empty, **delete the file entirely** (it's not a "system" file)
|
||||
- [x] **COMMIT 3.10:** `refactor(models): reduce to Pydantic proxy helpers only (or delete entirely if empty)` (Tier 3)
|
||||
|
||||
## Phase 4: DELETE `AGENT_TOOL_NAMES` (1 commit)
|
||||
|
||||
**Focus:** `AGENT_TOOL_NAMES` is redundant (verified by `test_tool_names_subset_of_models_agent_tool_names` which asserts `tool_names() ⊆ AGENT_TOOL_NAMES`). Derive at consumer sites.
|
||||
|
||||
- [x] **Task 4.1** [Tier 3]: Update 8 consumer sites to use `mcp_tool_specs.tool_names()` instead of `AGENT_TOOL_NAMES`:
|
||||
- `src/app_controller.py:2110, 2972, 3273` (3 sites)
|
||||
- `tests/test_arch_boundary_phase2.py:23, 29, 31, 32, 33` (5 sites)
|
||||
- HOW: `manual-slop_edit_file` per site
|
||||
- SAFETY: Run the affected tests + the full batched suite
|
||||
- [x] **Task 4.2** [Tier 3]: Delete `AGENT_TOOL_NAMES` constant from `src/models.py` (if not already removed in Phase 3c)
|
||||
- [x] **Task 4.3** [Tier 3]: DELETE or CONVERT `test_tool_names_subset_of_models_agent_tool_names` test
|
||||
- DELETE: it's a tautology once AGENT_TOOL_NAMES is derived
|
||||
- OR CONVERT to: `assert mcp_tool_specs.tool_names() == {expected canonical tools}`
|
||||
- [x] **COMMIT 4.1:** `refactor(mcp_tool_specs): delete redundant AGENT_TOOL_NAMES; use tool_names() at consumer sites` (Tier 3)
|
||||
|
||||
## Phase 5: Verification + end-of-track (2 commits, no code changes)
|
||||
|
||||
**Focus:** Run all 12 VCs; write `TRACK_COMPLETION`; update `state.toml` + `tracks.md`.
|
||||
|
||||
- [x] **Task 5.1** [Tier 2]:
|
||||
- Run all 12 VCs (see spec.md §Verification Criteria)
|
||||
- Re-measure: `wc -l src/models.py` should be ≤30 (or file should not exist)
|
||||
- Run all 7 audit gates
|
||||
- Run the full batched test suite
|
||||
- Document the result in `docs/reports/TRACK_COMPLETION_module_taxonomy_refactor_20260627.md`
|
||||
- [x] **COMMIT 5.1:** `conductor(state): module_taxonomy_refactor_20260627 SHIPPED` (Tier 2)
|
||||
- [x] **COMMIT 5.2:** `docs(reports): TRACK_COMPLETION_module_taxonomy_refactor_20260627` (Tier 2)
|
||||
- [x] **COMMIT 5.3:** `conductor(tracks): add module_taxonomy_refactor_20260627 row` (Tier 2)
|
||||
|
||||
## Commit Log (Expected, 12-15 atomic commits)
|
||||
|
||||
1. (Phase 0) `conductor(track): module_taxonomy_refactor_20260627 track artifacts` (Tier 1) — spec + plan + metadata + state + TIER2_STARTUP
|
||||
2. (Phase 1) `refactor(gui_2): merge bg_shader; git rm src/bg_shader.py` (Tier 3)
|
||||
3. (Phase 1) `refactor(gui_2): merge shaders; git rm src/shaders.py` (Tier 3)
|
||||
4. (Phase 1) `refactor(gui_2): merge command_palette; git rm src/command_palette.py` (Tier 3)
|
||||
5. (Phase 1) `refactor(gui_2): merge diff_viewer; git rm src/diff_viewer.py` (Tier 3)
|
||||
6. (Phase 1) `refactor(gui_2): merge patch_modal; git rm src/patch_modal.py` (Tier 3)
|
||||
7. (Phase 2) `refactor(ai_client): merge vendor_capabilities; git rm src/vendor_capabilities.py` (Tier 3)
|
||||
8. (Phase 2) `refactor(ai_client): merge vendor_state; git rm src/vendor_state.py` (Tier 3)
|
||||
9. (Phase 3a) `refactor(mma): create mma.py with MMA Core + TrackState (split from models.py)` (Tier 3)
|
||||
10. (Phase 3a) `refactor(project): create project.py with ProjectContext + sub + config IO (split from models.py)` (Tier 3)
|
||||
11. (Phase 3a) `refactor(project_files): create project_files.py (split from models.py)` (Tier 3)
|
||||
12. (Phase 3b) `refactor(personas): move Persona dataclass from models.py to personas.py` (Tier 3)
|
||||
13. (Phase 3b) `refactor(tool_presets): move Tool + ToolPreset from models.py to tool_presets.py` (Tier 3)
|
||||
14. (Phase 3b) `refactor(tool_bias): move BiasProfile from models.py to tool_bias.py` (Tier 3)
|
||||
15. (Phase 3b) `refactor(external_editor): move TextEditorConfig + ExternalEditorConfig from models.py to external_editor.py` (Tier 3)
|
||||
16. (Phase 3b) `refactor(mcp_client): move MCP config dataclasses from models.py to mcp_client.py` (Tier 3)
|
||||
17. (Phase 3b) `refactor(workspace_manager): move WorkspaceProfile from models.py to workspace_manager.py` (Tier 3)
|
||||
18. (Phase 3c) `refactor(models): reduce to Pydantic proxy helpers only (or delete entirely if empty)` (Tier 3)
|
||||
19. (Phase 4) `refactor(mcp_tool_specs): delete redundant AGENT_TOOL_NAMES; use tool_names() at consumer sites` (Tier 3)
|
||||
20. (Phase 5) `conductor(state): module_taxonomy_refactor_20260627 SHIPPED` (Tier 2)
|
||||
21. (Phase 5) `docs(reports): TRACK_COMPLETION_module_taxonomy_refactor_20260627` (Tier 2)
|
||||
22. (Phase 5) `conductor(tracks): add module_taxonomy_refactor_20260627 row` (Tier 2)
|
||||
|
||||
Plus per-task plan-update commits per the workflow.
|
||||
|
||||
## Verification Commands (run at end of each phase + Phase 5)
|
||||
|
||||
```bash
|
||||
# VC1: ImGui imports limited to gui_2.py + imgui_scopes.py
|
||||
git grep -l "imgui_bundle\|from imgui\\." HEAD -- 'src/*.py'
|
||||
|
||||
# VC2: 5 ImGui files deleted
|
||||
ls src/bg_shader.py src/shaders.py src/command_palette.py src/diff_viewer.py src/patch_modal.py 2>&1 | grep -v "No such file"
|
||||
|
||||
# VC3: 2 vendor files deleted
|
||||
ls src/vendor_capabilities.py src/vendor_state.py 2>&1 | grep -v "No such file"
|
||||
|
||||
# VC5-7: New files work
|
||||
uv run python -c "from src.mma import ThinkingSegment, Ticket, Track, WorkerContext, TrackState"
|
||||
uv run python -c "from src.project import ProjectContext, ProjectMeta, ProjectOutput, ProjectFiles, ProjectScreenshots, ProjectDiscussion"
|
||||
uv run python -c "from src.project_files import FileItem, ContextPreset, ContextFileEntry, NamedViewPreset, Preset"
|
||||
|
||||
# VC8: 6+ dataclasses in proper sub-system files
|
||||
uv run python -c "from src.personas import Persona; from src.tool_presets import Tool, ToolPreset; from src.tool_bias import BiasProfile; from src.external_editor import TextEditorConfig, ExternalEditorConfig; from src.mcp_client import MCPServerConfig, MCPConfiguration, VectorStoreConfig, RAGConfig, load_mcp_config; from src.workspace_manager import WorkspaceProfile"
|
||||
|
||||
# VC9: AGENT_TOOL_NAMES deleted
|
||||
git grep "AGENT_TOOL_NAMES" HEAD -- 'src/*.py' 'tests/*.py' | wc -l
|
||||
# Expect: 0
|
||||
|
||||
# VC10: models.py reduced
|
||||
Get-Item src/models.py 2>&1 | Select-Object Length
|
||||
# Expect: file not found OR <= 30 lines
|
||||
|
||||
# VC11: 7 audit gates pass
|
||||
uv run python scripts/audit_weak_types.py --strict
|
||||
uv run python scripts/generate_type_registry.py --check
|
||||
uv run python scripts/audit_main_thread_imports.py
|
||||
uv run python scripts/audit_no_models_config_io.py
|
||||
uv run python scripts/audit_code_path_audit_coverage.py --input-dir docs/reports/code_path_audit/latest --strict
|
||||
uv run python scripts/audit_exception_handling.py --strict
|
||||
uv run python scripts/audit_optional_in_3_files.py --strict
|
||||
# All exit 0
|
||||
|
||||
# VC12: 10/11 batched tiers pass
|
||||
uv run python scripts/run_tests_batched.py
|
||||
# Expect: 10/11 PASS (RAG flake acceptable)
|
||||
```
|
||||
|
||||
## Notes for Tier 3 workers
|
||||
|
||||
- **Per-file atomic commits**: each ImGui merge, each vendor merge, each models.py split, each AGENT_TOOL_NAMES site update is a separate commit
|
||||
- **Pattern consistency**: use `git mv` for renames; for merges, append content to the destination file, then `git rm` the source
|
||||
- **Import updates**: use `manual-slop_edit_file` to update import statements; for `from src.bg_shader import X` → `from src.gui_2 import X` patterns
|
||||
- **Indentation**: 1-space per level
|
||||
- **No comments** in source code (per AGENTS.md)
|
||||
- **Per-phase regression-guard test runs**: after each phase, run the full batched test suite. If a phase causes a regression, REVERT the phase commit and investigate (don't try to fix forward)
|
||||
|
||||
## Notes for Tier 2 reviewer
|
||||
|
||||
- The `cruft_elimination_20260627` track has a `ProjectContext` commit that put `ProjectContext` in `models.py` (the wrong location). This refactor track moves `ProjectContext` to `project.py`. Coordinate with the cruft track: the `cruft` track should NOT merge its `ProjectContext`-in-`models.py` commit until this refactor is ready.
|
||||
- The `__getattr__` Pydantic lazy proxy in `models.py` is needed because `src.ai_client` imports `ToolPreset`/`BiasProfile`/`Tool` from `models.py`, creating a circular import. After this refactor, the imports move to the new sub-system files (`tool_presets.py`, `tool_bias.py`), so the circular import is broken and the `__getattr__` may no longer be needed. Audit during execution.
|
||||
- The `models.py` docstring needs updating throughout the refactor to reflect the new scope.
|
||||
@@ -0,0 +1,224 @@
|
||||
# Track Specification: module_taxonomy_refactor_20260627
|
||||
|
||||
## Overview
|
||||
|
||||
The user-reported `models.py` is a "dumping ground" (1044 lines, 36 classes, 5+ unrelated domains). This track cleans it up PLUS addresses 5 ImGui LEAKS that violate the "ImGui belongs in `gui_2.py`" boundary PLUS unifies 2 vendor files with `ai_client.py`.
|
||||
|
||||
Per the user's principle: **unify unless there's a good reason (import load times, definition pollution)**. No sub-directories. Prefix naming convention.
|
||||
|
||||
## Current State Audit (master `5380b715`, measured 2026-06-27)
|
||||
|
||||
| Metric | Value |
|
||||
|---|---:|
|
||||
| `src/` file count | 65 |
|
||||
| `src/models.py` line count | 1044 |
|
||||
| `src/models.py` class/function count | 36 |
|
||||
| `src/models.py` regions | 13 (Constants, Config Utilities, History Utilities, Pydantic Models, MMA Core, State & Config, Tool Models, UI/Editor, Persona, Workspace, MCP Config, Project Context, ...more) |
|
||||
| ImGui-using files outside `gui_2.py` | 5 (`bg_shader.py`, `shaders.py`, `command_palette.py`, `diff_viewer.py`, `patch_modal.py`) |
|
||||
| Vendor files separate from `ai_client.py` | 2 (`vendor_capabilities.py`, `vendor_state.py`) |
|
||||
| `AGENT_TOOL_NAMES` consumers | 8 (3 in `app_controller.py`, 5 in `tests/test_arch_boundary_phase2.py`) |
|
||||
| `mcp_tool_specs.tool_names()` test | EXISTS (asserts `tool_names() ⊆ AGENT_TOOL_NAMES` — proves it's redundant) |
|
||||
|
||||
## Goals
|
||||
|
||||
| ID | Goal | Acceptance |
|
||||
|---|---|---|
|
||||
| G1 | **MERGE 5 ImGui LEAKS into `gui_2.py`** | `git grep -l "imgui_bundle\|from imgui\\." -- 'src/*.py'` returns ONLY `gui_2.py` + `imgui_scopes.py` |
|
||||
| G2 | **MERGE 2 vendor files into `ai_client.py`** | `ls src/{vendor_capabilities,vendor_state}.py` returns not-found; `python -c "from src.ai_client import ..."` imports the merged symbols |
|
||||
| G3 | **SPLIT `models.py`** into `mma.py` + `project.py` + `project_files.py` | `ls src/mma.py src/project.py src/project_files.py` all exist; `python -c "from src.mma import ThinkingSegment, Ticket, Track, WorkerContext, TrackState"` works |
|
||||
| G4 | **MERGE** 6+ other `models.py` classes into existing sub-system files | `Persona` in `personas.py`; `Tool`/`ToolPreset` in `tool_presets.py`; `BiasProfile` in `tool_bias.py`; `TextEditorConfig`/`ExternalEditorConfig` in `external_editor.py`; `MCPServerConfig`+etc in `mcp_client.py`; `WorkspaceProfile` in `workspace_manager.py` |
|
||||
| G5 | **DELETE `AGENT_TOOL_NAMES`** (redundant with `mcp_tool_specs.tool_names()`) | `git grep "AGENT_TOOL_NAMES" -- 'src/*.py'` returns 0 hits; 8 consumer sites updated to use `list(mcp_tool_specs.tool_names())` |
|
||||
| G6 | **`src/models.py` reduced to ≤30 lines** (or eliminated) | `wc -l src/models.py` returns ≤30 |
|
||||
| G7 | All 7 audit gates pass `--strict` | unchanged from baseline |
|
||||
| G8 | All batched test tiers pass (10/11 baseline + RAG flake) | unchanged from baseline |
|
||||
|
||||
## Non-Goals
|
||||
|
||||
- Renaming existing files for prefix consistency (`multi_agent_conductor.py` → `mma_conductor.py`, etc.) — deferred to follow-up; current names are clear enough
|
||||
- Refactoring `aggregate.py` (513 lines), `app_controller.py` (4869 lines), `gui_2.py` (7773 lines) — out of scope; these have natural boundaries; the user doesn't want more splitting without good reason
|
||||
- Modifications to `mcp_client.py` other than merging the config dataclasses — the merge itself is the change
|
||||
- New `src/<thing>.py` files (per AGENTS.md hard rule) — the 3 new files (`mma.py`, `project.py`, `project_files.py`) are justified by the `models.py` split (definition pollution)
|
||||
|
||||
## Functional Requirements
|
||||
|
||||
### FR1: MERGE ImGui LEAKS into `gui_2.py`
|
||||
|
||||
For each of these 5 files, move the content into `gui_2.py` in a clearly-marked section, then `git rm` the original:
|
||||
|
||||
```python
|
||||
# In gui_2.py, add at the appropriate location:
|
||||
|
||||
#region: Bg Shader (moved from src/bg_shader.py)
|
||||
# ... (content of src/bg_shader.py)
|
||||
#endregion
|
||||
|
||||
#region: Shaders (moved from src/shaders.py)
|
||||
# ... (content of src/shaders.py)
|
||||
#endregion
|
||||
|
||||
#region: Command Palette (moved from src/command_palette.py)
|
||||
# ... (content of src/command_palette.py)
|
||||
#endregion
|
||||
|
||||
#region: Diff Viewer (moved from src/diff_viewer.py)
|
||||
# ... (content of src/diff_viewer.py)
|
||||
#endregion
|
||||
|
||||
#region: Patch Modal (moved from src/patch_modal.py)
|
||||
# ... (content of src/patch_modal.py)
|
||||
#endregion
|
||||
```
|
||||
|
||||
**Imports to update across the codebase:**
|
||||
- `from src.bg_shader import X` → `from src.gui_2 import X`
|
||||
- `from src.shaders import X` → `from src.gui_2 import X`
|
||||
- (etc. for all 5 files)
|
||||
|
||||
### FR2: MERGE vendor files into `ai_client.py`
|
||||
|
||||
```python
|
||||
# In ai_client.py, add at the appropriate location:
|
||||
|
||||
#region: Vendor Capabilities (moved from src/vendor_capabilities.py)
|
||||
# ... (content of src/vendor_capabilities.py)
|
||||
#endregion
|
||||
|
||||
#region: Vendor State (moved from src/vendor_state.py)
|
||||
# ... (content of src/vendor_state.py)
|
||||
#endregion
|
||||
```
|
||||
|
||||
**Imports to update:**
|
||||
- `from src.vendor_capabilities import X` → `from src.ai_client import X`
|
||||
- `from src.vendor_state import X` → `from src.ai_client import X`
|
||||
|
||||
### FR3: SPLIT `models.py`
|
||||
|
||||
**Phase 1: Create `src/mma.py`** with the MMA Core + TrackState:
|
||||
- ThinkingSegment
|
||||
- Ticket
|
||||
- Track
|
||||
- WorkerContext
|
||||
- TrackState
|
||||
- Top-level docstring explaining MMA scope
|
||||
|
||||
**Phase 2: Create `src/project.py`** with the project config:
|
||||
- ProjectContext + 5 sub-dataclasses (ProjectMeta, ProjectOutput, ProjectFiles, ProjectScreenshots, ProjectDiscussion)
|
||||
- Config I/O helpers: `_clean_nones`, `load_config_from_disk`, `save_config_to_disk`, `parse_history_entries`
|
||||
- Top-level docstring explaining project config scope
|
||||
|
||||
**Phase 3: Create `src/project_files.py`** with the file-related dataclasses:
|
||||
- FileItem
|
||||
- ContextPreset
|
||||
- ContextFileEntry
|
||||
- NamedViewPreset
|
||||
- Preset
|
||||
- Top-level docstring explaining file-related project state scope
|
||||
|
||||
### FR4: MERGE other `models.py` classes into existing sub-system files
|
||||
|
||||
| Class from `models.py` | Destination (existing file) | New section name |
|
||||
|---|---|---|
|
||||
| `Persona` | `src/personas.py` | "Persona Dataclass" |
|
||||
| `Tool`, `ToolPreset` | `src/tool_presets.py` | "Tool + ToolPreset Dataclasses" |
|
||||
| `BiasProfile` | `src/tool_bias.py` | "BiasProfile Dataclass" |
|
||||
| `TextEditorConfig`, `ExternalEditorConfig` | `src/external_editor.py` | "Editor Config Dataclasses" |
|
||||
| `MCPServerConfig`, `MCPConfiguration`, `VectorStoreConfig`, `RAGConfig`, `load_mcp_config` | `src/mcp_client.py` | "MCP Config Dataclasses" |
|
||||
| `WorkspaceProfile` | `src/workspace_manager.py` | "WorkspaceProfile Dataclass" |
|
||||
|
||||
### FR5: DELETE `AGENT_TOOL_NAMES` (redundant)
|
||||
|
||||
```python
|
||||
# 8 consumer site updates:
|
||||
# Before:
|
||||
from src.models import AGENT_TOOL_NAMES
|
||||
for tool in AGENT_TOOL_NAMES:
|
||||
...
|
||||
|
||||
# After:
|
||||
from src import mcp_tool_specs
|
||||
for tool in mcp_tool_specs.tool_names():
|
||||
...
|
||||
```
|
||||
|
||||
**Consumer sites (8):**
|
||||
- `src/app_controller.py:2110, 2972, 3273` (3 sites)
|
||||
- `tests/test_arch_boundary_phase2.py:23, 29, 31, 32, 33` (5 sites)
|
||||
|
||||
**Test simplification:** `test_tool_names_subset_of_models_agent_tool_names` becomes either:
|
||||
- DELETE (it's a tautology once `AGENT_TOOL_NAMES` is derived from `tool_names()`)
|
||||
- OR convert to a positive assertion: `assert mcp_tool_specs.tool_names() == {expected canonical tools}`
|
||||
|
||||
### FR6: REDUCE `src/models.py` to ~30 lines (or eliminate)
|
||||
|
||||
After all moves, `src/models.py` contains:
|
||||
- `_create_generate_request`, `_create_confirm_request`, `__getattr__` (Pydantic lazy proxies for the API)
|
||||
- OR these move to `src/api_hooks.py` (if API-specific)
|
||||
- Top-level docstring
|
||||
|
||||
If `models.py` becomes essentially empty after these moves, **delete the file entirely** (it's not a "system" file; `models.py` is just a temporary holder).
|
||||
|
||||
## Non-Functional Requirements
|
||||
|
||||
- NFR1: 1-space indentation (per `conductor/workflow.md`)
|
||||
- NFR2: CRLF line endings on Windows
|
||||
- NFR3: No comments in source code (per AGENTS.md "No comments in source code")
|
||||
- NFR4: Per-task atomic commits with git notes
|
||||
- NFR5: No new pip dependencies
|
||||
- NFR6: `Result[T]` returns for fallible fns (per `error_handling.md`)
|
||||
- NFR7: No new `src/<thing>.py` files UNLESS justified by definition pollution (per AGENTS.md hard rule)
|
||||
|
||||
## Architecture Reference
|
||||
|
||||
- `AGENTS.md` — "File Size and Naming Convention" HARD RULE
|
||||
- `conductor/code_styleguides/data_oriented_design.md` — "Prefer Fewer Types" principle
|
||||
- `conductor/code_styleguides/error_handling.md` — the `Result[T]` convention
|
||||
- `conductor/code_styleguides/type_aliases.md` — the 10 TypeAliases convention
|
||||
- `conductor/tracks/cruft_elimination_20260627/SPEC_CORRECTION_phase_2.md` — the related spec correction (the original Phase 2 spec was wrong to put ProjectContext in `models.py`; this track fixes that)
|
||||
- `docs/reports/FOLLOWUP_module_taxonomy_20260627.md` — the previous followup report (this track supersedes it with concrete execution)
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- Renaming existing files for prefix consistency (`multi_agent_conductor.py` → `mma_conductor.py`, etc.) — deferred to follow-up
|
||||
- Refactoring `aggregate.py` (513 lines), `app_controller.py` (4869 lines), `gui_2.py` (7773 lines) — out of scope; these have natural boundaries
|
||||
- Modifications to `mcp_client.py` other than merging the config dataclasses
|
||||
- New `src/<thing>.py` files beyond the 3 justified ones (`mma.py`, `project.py`, `project_files.py`)
|
||||
- The RAG test pre-existing flake (per `docs/reports/SSDL_CAMPAIGN_ABORTED_20260624.md` "Out of Scope")
|
||||
- Any Tier 2 spec rewrites (per the user's earlier "don't fuck with commits" directive)
|
||||
|
||||
## Verification Criteria (Definition of Done)
|
||||
|
||||
| # | Criterion | Verification |
|
||||
|---|---|---|
|
||||
| VC1 | ImGui imports limited to `gui_2.py` + `imgui_scopes.py` | `git grep -l "imgui_bundle\|from imgui\\." -- 'src/*.py'` returns 2 files |
|
||||
| VC2 | `src/bg_shader.py`, `src/shaders.py`, `src/command_palette.py`, `src/diff_viewer.py`, `src/patch_modal.py` deleted | `ls src/{bg_shader,shaders,command_palette,diff_viewer,patch_modal}.py` returns not-found |
|
||||
| VC3 | `src/vendor_capabilities.py`, `src/vendor_state.py` deleted | `ls src/{vendor_capabilities,vendor_state}.py` returns not-found |
|
||||
| VC4 | Vendor symbols importable from `src.ai_client` | `python -c "from src.ai_client import PROVIDER_CAPABILITIES, get_vendor_state"` works |
|
||||
| VC5 | `src/mma.py` exists with MMA Core + TrackState | `python -c "from src.mma import ThinkingSegment, Ticket, Track, WorkerContext, TrackState"` works |
|
||||
| VC6 | `src/project.py` exists with ProjectContext + sub + config I/O | `python -c "from src.project import ProjectContext, ProjectMeta, ProjectOutput, ProjectFiles, ProjectScreenshots, ProjectDiscussion, _clean_nones, load_config_from_disk, save_config_to_disk, parse_history_entries"` works |
|
||||
| VC7 | `src/project_files.py` exists with file-related dataclasses | `python -c "from src.project_files import FileItem, ContextPreset, ContextFileEntry, NamedViewPreset, Preset"` works |
|
||||
| VC8 | Persona/Tool/Editor/MCP/Workspace dataclasses in their proper sub-system files | `python -c "from src.personas import Persona; from src.tool_presets import Tool, ToolPreset; from src.tool_bias import BiasProfile; from src.external_editor import TextEditorConfig, ExternalEditorConfig; from src.mcp_client import MCPServerConfig, MCPConfiguration, VectorStoreConfig, RAGConfig, load_mcp_config; from src.workspace_manager import WorkspaceProfile"` works |
|
||||
| VC9 | `AGENT_TOOL_NAMES` deleted; all 8 consumer sites use `mcp_tool_specs.tool_names()` | `git grep "AGENT_TOOL_NAMES" -- 'src/*.py' 'tests/*.py'` returns 0 hits |
|
||||
| VC10 | `src/models.py` reduced to ≤30 lines (or eliminated entirely) | `wc -l src/models.py` returns ≤30; OR `ls src/models.py` returns not-found |
|
||||
| VC11 | All 7 audit gates pass `--strict` | unchanged from baseline |
|
||||
| VC12 | 10/11 batched test tiers pass (RAG flake acceptable) | unchanged from baseline |
|
||||
|
||||
## Risks
|
||||
|
||||
| # | Risk | Likelihood | Mitigation |
|
||||
|---|---|---|---|
|
||||
| R1 | ImGui LEAKS move breaks existing tests (e.g., `command_palette` is referenced in commands.py) | low | Run full affected test set after each move; revert + fix on regression |
|
||||
| R2 | Vendor merge into `ai_client.py` creates circular imports (PROVIDERS lazy proxy is the workaround) | medium | The lazy import pattern (`__getattr__`) handles this; verify by running the full test suite after merge |
|
||||
| R3 | `models.py` split breaks 136 import sites | high | Per-file move with regression-guard tests after each; update imports systematically |
|
||||
| R4 | The 6+ "merge into existing sub-system files" moves break those files' existing tests | medium | Run the affected test file after each merge |
|
||||
| R5 | `AGENT_TOOL_NAMES` deletion breaks `test_arch_boundary_phase2.py` | low | Update the test to use `mcp_tool_specs.tool_names()`; cross-check that the test's expected tool names are in the registry |
|
||||
| R6 | The `ProjectContext` Phase 2 commit (in `cruft_elimination_20260627`) put `ProjectContext` in `models.py`; the new track moves it to `project.py` — needs to coordinate with the cruft track | high | The cruft track should NOT merge its `models.py` `ProjectContext` commit; this refactor track handles the move |
|
||||
| R7 | The `_create_generate_request` etc. Pydantic proxies in `models.py` are used by `api_hooks.py`; if we move them to `api_hooks.py` we create a different topology | low | Audit the consumers; if they're all in `api_hooks.py`, move them; if not, keep in `models.py` or move to a new `api_models.py` |
|
||||
|
||||
## See also
|
||||
|
||||
- `docs/reports/FOLLOWUP_module_taxonomy_20260627.md` — the previous followup report (this spec supersedes it)
|
||||
- `conductor/tracks/cruft_elimination_20260627/SPEC_CORRECTION_phase_2.md` — the related spec correction
|
||||
- `conductor/tracks/cruft_elimination_20260627/spec.md` — the parent spec (which is currently in flux)
|
||||
- `AGENTS.md` — "File Size and Naming Convention" HARD RULE
|
||||
- `conductor/code_styleguides/data_oriented_design.md` — "Prefer Fewer Types" principle
|
||||
@@ -0,0 +1,62 @@
|
||||
# Track state for module_taxonomy_refactor_20260627
|
||||
# Updated by Tier 2 Tech Lead as tasks complete
|
||||
|
||||
[meta]
|
||||
track_id = "module_taxonomy_refactor_20260627"
|
||||
name = "Module Taxonomy Refactor"
|
||||
status = "active"
|
||||
current_phase = 0
|
||||
last_updated = "2026-06-27"
|
||||
|
||||
[blocked_by]
|
||||
cruft_elimination_20260627 = "pending (the cruft track has a ProjectContext-in-models.py commit that needs to be coordinated)"
|
||||
|
||||
[blocks]
|
||||
|
||||
[phases]
|
||||
phase_0 = { status = "pending", checkpointsha = "", name = "Pre-flight + TIER2_STARTUP" }
|
||||
phase_1 = { status = "pending", checkpointsha = "", name = "MERGE ImGui LEAKS into gui_2.py (5 commits)" }
|
||||
phase_2 = { status = "pending", checkpointsha = "", name = "MERGE vendor files into ai_client.py (2 commits)" }
|
||||
phase_3 = { status = "pending", checkpointsha = "", name = "SPLIT models.py into mma.py + project.py + project_files.py + 6 sub-system merges (10 commits)" }
|
||||
phase_4 = { status = "pending", checkpointsha = "", name = "DELETE AGENT_TOOL_NAMES (1 commit)" }
|
||||
phase_5 = { status = "pending", checkpointsha = "", name = "Verification + end-of-track report" }
|
||||
|
||||
[tasks]
|
||||
t0_1 = { status = "pending", commit_sha = "", description = "Create TIER2_STARTUP.md with decision rule + 3 refactors + 8 AGENT_TOOL_NAMES consumers" }
|
||||
t1_1 = { status = "pending", commit_sha = "", description = "Move src/bg_shader.py to src/gui_2.py" }
|
||||
t1_2 = { status = "pending", commit_sha = "", description = "Move src/shaders.py to src/gui_2.py" }
|
||||
t1_3 = { status = "pending", commit_sha = "", description = "Move src/command_palette.py to src/gui_2.py" }
|
||||
t1_4 = { status = "pending", commit_sha = "", description = "Move src/diff_viewer.py to src/gui_2.py" }
|
||||
t1_5 = { status = "pending", commit_sha = "", description = "Move src/patch_modal.py to src/gui_2.py" }
|
||||
t2_1 = { status = "pending", commit_sha = "", description = "Move src/vendor_capabilities.py to src/ai_client.py" }
|
||||
t2_2 = { status = "pending", commit_sha = "", description = "Move src/vendor_state.py to src/ai_client.py" }
|
||||
t3_1 = { status = "pending", commit_sha = "", description = "Create src/mma.py with MMA Core + TrackState (split from models.py)" }
|
||||
t3_2 = { status = "pending", commit_sha = "", description = "Create src/project.py with ProjectContext + sub + config IO (split from models.py)" }
|
||||
t3_3 = { status = "pending", commit_sha = "", description = "Create src/project_files.py (split from models.py)" }
|
||||
t3_4 = { status = "pending", commit_sha = "", description = "Move Persona from models.py to personas.py" }
|
||||
t3_5 = { status = "pending", commit_sha = "", description = "Move Tool + ToolPreset from models.py to tool_presets.py" }
|
||||
t3_6 = { status = "pending", commit_sha = "", description = "Move BiasProfile from models.py to tool_bias.py" }
|
||||
t3_7 = { status = "pending", commit_sha = "", description = "Move TextEditorConfig + ExternalEditorConfig from models.py to external_editor.py" }
|
||||
t3_8 = { status = "pending", commit_sha = "", description = "Move MCP config dataclasses from models.py to mcp_client.py" }
|
||||
t3_9 = { status = "pending", commit_sha = "", description = "Move WorkspaceProfile from models.py to workspace_manager.py" }
|
||||
t3_10 = { status = "pending", commit_sha = "", description = "Reduce models.py to Pydantic proxy helpers only (or delete entirely if empty)" }
|
||||
t4_1 = { status = "pending", commit_sha = "", description = "Update 8 consumer sites to use mcp_tool_specs.tool_names() instead of AGENT_TOOL_NAMES" }
|
||||
t4_2 = { status = "pending", commit_sha = "", description = "Delete AGENT_TOOL_NAMES constant from src/models.py" }
|
||||
t4_3 = { status = "pending", commit_sha = "", description = "DELETE or CONVERT test_tool_names_subset_of_models_agent_tool_names test" }
|
||||
t5_1 = { status = "pending", commit_sha = "", description = "Run all 12 VCs; write TRACK_COMPLETION; update state.toml + tracks.md" }
|
||||
|
||||
[verification]
|
||||
phase_0_complete = false
|
||||
phase_1_complete = false
|
||||
phase_2_complete = false
|
||||
phase_3_complete = false
|
||||
phase_4_complete = false
|
||||
phase_5_complete = false
|
||||
|
||||
[track_specific]
|
||||
file_change_summary = { files_deleted = 7, files_created = 4, files_modified = 10, potentially_deleted = 1 }
|
||||
net_files_change = "-4 files (65 -> 61, with potential additional -1 if models.py is eliminated)"
|
||||
im_gui_leak_count = 5
|
||||
vendor_files_to_merge = 2
|
||||
models_py_split_targets = 3
|
||||
agent_tool_names_consumers = 8
|
||||
@@ -0,0 +1,107 @@
|
||||
{
|
||||
"track_id": "test_engine_integration_20260627",
|
||||
"name": "ImGui Test Engine Integration (Bridge via API Hooks)",
|
||||
"status": "active",
|
||||
"branch": "master",
|
||||
"created": "2026-06-27",
|
||||
"owner": "Tier 1 (initialized); implementation delegated to Tier 2/3.",
|
||||
"blocked_by": [],
|
||||
"blocks": ["test_engine_docking_tests (Track 2)", "test_engine_capture_regression (Track 3)"],
|
||||
"scope": {
|
||||
"new_files": [
|
||||
"tests/test_test_engine_smoke.py",
|
||||
"docs/reports/TRACK_COMPLETION_test_engine_integration_20260627.md"
|
||||
],
|
||||
"modified_files": [
|
||||
"sloppy.py (add --enable-test-engine CLI flag)",
|
||||
"src/app_controller.py (add test_engine_enabled field)",
|
||||
"src/gui_2.py (enable engine in App.run + _register_imgui_tests method)",
|
||||
"src/api_hooks.py (4 new /api/test_engine/* endpoints)",
|
||||
"src/api_hook_client.py (4 new client methods)",
|
||||
"tests/conftest.py (pass --enable-test-engine in live_gui fixture)",
|
||||
"conductor/tracks.md (add row)",
|
||||
"conductor/chronology.md (prepend row)"
|
||||
],
|
||||
"deleted_files": []
|
||||
},
|
||||
"estimated_effort": {
|
||||
"method": "scope (per workflow.md Tier 1 Track Initialization Rules. NO day estimates.)",
|
||||
"phase_1": "4 tasks: 1 failing test + 1 CLI flag + 1 engine enable + 1 manual verification",
|
||||
"phase_2": "4 tasks: 1 failing tests + 4 endpoints + 4 client methods + green verification",
|
||||
"phase_3": "2 tasks: 1 conftest update + 1 full smoke test verification",
|
||||
"phase_4": "3 tasks: 1 end-of-track report + 1 state update + 1 user sign-off"
|
||||
},
|
||||
"verification_criteria": [
|
||||
"G1: sloppy.py accepts --enable-test-engine; when set, runner_params.use_imgui_test_engine = True + callbacks.register_tests assigned",
|
||||
"G2: App._register_imgui_tests exists + registers at least 1 smoke test via imgui.test_engine.register_test",
|
||||
"G3: HookServer has 4 new /api/test_engine/* endpoints (queue, status, results, abort)",
|
||||
"G4: ApiHookClient has 4 new methods (queue_test, get_test_status, get_test_results, wait_for_test_results)",
|
||||
"G5: live_gui fixture passes --enable-test-engine in subprocess args",
|
||||
"G6: tests/test_test_engine_smoke.py has >=3 tests; all pass (engine enabled + queue+run smoke + results shape)",
|
||||
"G7: docs/reports/TRACK_COMPLETION_test_engine_integration_20260627.md exists; documents threading model verification + Track 2 handoff",
|
||||
"VC_parallel_safe": "ZERO file overlap with tier2/post_module_taxonomy_de_cruft_20260627 (touching sloppy.py, gui_2.py:641-700, api_hooks.py, api_hook_client.py, conftest.py — none of which Tier 2 touches) or enforcement_gap_closure_20260627 (touching scripts/audit_*, python.md — zero overlap)"
|
||||
],
|
||||
"regressions_and_pre_existing_failures": [],
|
||||
"pre_existing_failures_remaining": [],
|
||||
"deferred_to_followup_tracks": [
|
||||
{
|
||||
"title": "Track 2: test_engine_docking_tests",
|
||||
"description": "Migrate docking/focus/panel tests (test_workspace_profiles_restoration, test_auto_switch_sim, etc.) to use ctx.dock_into, ctx.window_focus, ctx.window_resize. The bridge built in this track enables it.",
|
||||
"track_status": "planned (Track 2 of 3)"
|
||||
},
|
||||
{
|
||||
"title": "Track 3: test_engine_capture_regression",
|
||||
"description": "Visual regression via ctx.capture_screenshot_window + baseline PNG diff. The capture API is available but not wired in this track.",
|
||||
"track_status": "planned (Track 3 of 3)"
|
||||
},
|
||||
{
|
||||
"title": "Headless test execution",
|
||||
"description": "The test engine requires a live GLFW window. Headless mode (no window) is a future research item; the engine's scenario thread drives the actual render loop.",
|
||||
"track_status": "not yet initialized; research item"
|
||||
},
|
||||
{
|
||||
"title": "Interactive test engine panel",
|
||||
"description": "show_test_engine_windows(engine, True) opens the engine's debug UI. Not shown by default; can be added as a debug toggle in a follow-up.",
|
||||
"track_status": "not yet initialized"
|
||||
}
|
||||
],
|
||||
"risk_register": [
|
||||
{
|
||||
"id": "R1",
|
||||
"description": "GIL-transfer crash: the test engine's scenario thread calls Python test_func from a different thread; if the GIL transfer mechanism in hello_imgui/immapp doesn't work with the app's existing thread layout, the app crashes",
|
||||
"likelihood": "medium",
|
||||
"impact": "hard blocker; the entire test engine approach is invalid if the threading model doesn't work",
|
||||
"mitigation": "Phase 1 Task 1.4 is a manual verification checkpoint that catches this before any further work. If it crashes, STOP and report to user. The demo_testengine.py proves the mechanism works for simple apps; the risk is specific to this app's thread layout (AppController, SyncEventQueue, etc.)"
|
||||
},
|
||||
{
|
||||
"id": "R2",
|
||||
"description": "Label path mismatch: the smoke test's ctx.set_ref('###manual slop') + ctx.item_click('**/Session') may not match the actual label tree",
|
||||
"likelihood": "high",
|
||||
"impact": "smoke test fails with 'item not found'; not a crash, just a wrong path",
|
||||
"mitigation": "Use imgui.show_id_stack_tool_window() or ctx.window_info() to find the correct labels during implementation. The label tree is deterministic (same build, same layout). Once found, the path is stable."
|
||||
},
|
||||
{
|
||||
"id": "R3",
|
||||
"description": "Engine overhead degrades live_gui test performance",
|
||||
"likelihood": "low",
|
||||
"impact": "live_gui tests take longer; batch run exceeds timeout",
|
||||
"mitigation": "The engine is idle when no tests are queued (sub-ms per-frame overhead). The existing fps_idling settings are unchanged. If measurable, the --enable-test-engine flag can be made conditional (only passed when running test_test_engine_* files)."
|
||||
},
|
||||
{
|
||||
"id": "R4",
|
||||
"description": "test_func accesses App state from the scenario thread, causing a race with the GUI render thread",
|
||||
"likelihood": "medium",
|
||||
"impact": "intermittent test failures or state corruption",
|
||||
"mitigation": "The spec FR2 + plan Task 1.3 explicitly document: test_func must NOT directly mutate App/AppController state; it must use ctx.* primitives (which post simulated input to the GUI thread). Reading via ctx.item_info / ctx.window_info is safe (C++ accessors). CHECK() runs on the scenario thread but only writes to the engine's C++ result log (thread-safe)."
|
||||
}
|
||||
],
|
||||
"campaign": {
|
||||
"name": "Test Engine Campaign (3 tracks)",
|
||||
"tracks": [
|
||||
"test_engine_integration_20260627 (THIS TRACK; bridge + smoke test)",
|
||||
"test_engine_docking_tests (Track 2; migrate docking/focus/panel tests)",
|
||||
"test_engine_capture_regression (Track 3; visual regression via screenshot capture)"
|
||||
],
|
||||
"campaign_rationale": "The test engine enables high-fidelity simulation of docking, focus, panel visibility, drag-and-drop, and keyboard input that the current Hook API cannot express. The campaign is split into 3 tracks to isolate risk: Track 1 proves the threading model + bridge work; Track 2 migrates the high-value docking tests; Track 3 adds visual regression. Each track is independently shippable."
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,163 @@
|
||||
# Plan: ImGui Test Engine Integration (Bridge via API Hooks)
|
||||
|
||||
Track: `test_engine_integration_20260627`
|
||||
Branch: master (parallel-safe; touches `sloppy.py`, `src/gui_2.py`, `src/app_controller.py`, `src/api_hooks.py`, `src/api_hook_client.py`, `tests/conftest.py`, new `tests/test_test_engine_smoke.py` — zero overlap with the running tier2 taxonomy branch or the enforcement_gap_closure track)
|
||||
Spec: `conductor/tracks/test_engine_integration_20260627/spec.md`
|
||||
|
||||
All Python edits use 1-space indentation. No comments in body. CRLF preserved.
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Enable the Test Engine in the App
|
||||
|
||||
Focus: Add `--enable-test-engine` CLI flag, set `runner_params.use_imgui_test_engine`, add the `register_tests` callback with a placeholder smoke test.
|
||||
|
||||
- [ ] Task 1.1: Write failing test for `--enable-test-engine` flag + engine activation
|
||||
- **WHERE:** `tests/test_test_engine_smoke.py` (NEW file)
|
||||
- **WHAT:** Test 1: `test_engine_enabled` — start `live_gui` (which will pass `--enable-test-engine`), verify the engine is active by calling `client.get_test_status()` (new method, implemented in Phase 3) and asserting `queue_empty == True` (engine is running, no tests queued). This test will FAIL before Phase 1 + Phase 3 land (the endpoint doesn't exist yet).
|
||||
- **HOW:** Use the `live_gui` fixture. Call `client.get_test_status()`. Assert the response has a `queue_empty` field. (The method is added in Phase 3; the test is written first per TDD.)
|
||||
- **SAFETY:** No `live_gui` state mutation; just a GET request.
|
||||
- **COMMIT:** `test(smoke): add failing test for test engine activation`
|
||||
- **GIT NOTE:** Red-phase test for the `--enable-test-engine` flag + engine activation.
|
||||
|
||||
- [ ] Task 1.2: Add `--enable-test-engine` CLI flag to `sloppy.py` + `AppController`
|
||||
- **WHERE:** `sloppy.py:35` (add arg), `src/app_controller.py:1042` (add `test_engine_enabled` field)
|
||||
- **WHAT:**
|
||||
1. `sloppy.py`: add `parser.add_argument("--enable-test-engine", action="store_true", help="Enable Dear ImGui Test Engine for automated UI testing")` after the `--enable-test-hooks` line.
|
||||
2. `src/app_controller.py:1042`: add `self.test_engine_enabled: bool = ("--enable-test-engine" in sys.argv)` after the `test_hooks_enabled` line.
|
||||
- **HOW:** Use `manual-slop_edit_file` MCP tool. 1-space indent.
|
||||
- **SAFETY:** The flag is opt-in; normal runs are unaffected.
|
||||
- **COMMIT:** `feat(cli): add --enable-test-engine flag`
|
||||
- **GIT NOTE:** CLI flag for test engine; mirrors the --enable-test-hooks pattern at app_controller.py:1042.
|
||||
|
||||
- [ ] Task 1.3: Enable the engine in `App.run()` + add `_register_imgui_tests` callback
|
||||
- **WHERE:** `src/gui_2.py:641` (after `RunnerParams()` construction) + `src/gui_2.py:~700` (new `_register_imgui_tests` method)
|
||||
- **WHAT:**
|
||||
1. In `App.run()` between line 641 (`self.runner_params = _hi.RunnerParams()`) and line 684 (`callbacks.show_gui = ...`), add:
|
||||
```python
|
||||
if getattr(self.controller, "test_engine_enabled", False):
|
||||
self.runner_params.use_imgui_test_engine = True
|
||||
self.runner_params.callbacks.register_tests = self._register_imgui_tests
|
||||
```
|
||||
2. Add `_register_imgui_tests(self)` method on `App` (after `_post_init`, ~line 700):
|
||||
```python
|
||||
def _register_imgui_tests(self) -> None:
|
||||
from imgui_bundle import hello_imgui
|
||||
from imgui_bundle.imgui import test_engine
|
||||
engine = hello_imgui.get_imgui_test_engine()
|
||||
test = test_engine.register_test(engine, "Smoke Tests", "Tab Switch")
|
||||
def smoke_func(ctx) -> None:
|
||||
from imgui_bundle.imgui.test_engine_checks import CHECK
|
||||
ctx.set_ref("###manual slop")
|
||||
ctx.item_click("**/Session")
|
||||
CHECK(True)
|
||||
test.test_func = smoke_func
|
||||
```
|
||||
The exact `set_ref` + `item_click` targets are determined during implementation by inspecting the running GUI's label tree. The smoke test should click a harmless tab (e.g., switch to "Session" tab) and `CHECK(True)` as a placeholder assertion. The real assertion (verify the tab actually switched) is added once the label path is confirmed.
|
||||
- **HOW:** Use `manual-slop_edit_file` / `manual-slop_py_update_definition` MCP tool. 1-space indent.
|
||||
- **SAFETY:** Guarded by `test_engine_enabled`; normal runs skip this entirely. The `register_tests` callback is only called by `hello_imgui` when `use_imgui_test_engine = True`.
|
||||
- **COMMIT:** `feat(gui): enable test engine + register smoke test via callbacks.register_tests`
|
||||
- **GIT NOTE:** Activates the test engine when --enable-test-engine is set; registers a placeholder smoke test.
|
||||
|
||||
- [ ] Task 1.4: Verify the engine activates (manual)
|
||||
- **WHAT:** Run `uv run python sloppy.py --enable-test-hooks --enable-test-engine` locally. Verify the app starts without crashing (the GIL-transfer mechanism works). Verify `hello_imgui.get_imgui_test_engine()` returns a non-None engine. This is a manual checkpoint before proceeding to Phase 2.
|
||||
- **COMMIT:** (no commit; manual verification checkpoint)
|
||||
- **GIT NOTE:** Manual verification that the engine + GIL transfer works with the app's existing thread layout.
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: Build the API Hooks Bridge
|
||||
|
||||
Focus: Add the 4 `/api/test_engine/*` endpoints to `HookServer` + the 4 methods to `ApiHookClient`.
|
||||
|
||||
- [ ] Task 2.1: Write failing tests for the 4 new `ApiHookClient` methods
|
||||
- **WHERE:** `tests/test_test_engine_smoke.py` (append to the file from Task 1.1)
|
||||
- **WHAT:** 2 more tests:
|
||||
- `test_queue_and_run_smoke_test`: queue the smoke test via `client.queue_test("Smoke Tests", "Tab Switch")`, poll via `client.wait_for_test_results(timeout=30)`, assert `results["count_success"] >= 1` and `results["count_tested"] >= 1`.
|
||||
- `test_engine_results_shape`: call `client.get_test_results()`, assert the response dict has keys `count_tested`, `count_success`, `count_in_queue`.
|
||||
- **HOW:** Use `live_gui` fixture. These tests fail until Phase 2 + Phase 3 land (the client methods + endpoints don't exist yet).
|
||||
- **SAFETY:** The smoke test queues a harmless tab-switch; no destructive state change.
|
||||
- **COMMIT:** `test(smoke): add failing tests for queue_test + wait_for_test_results + get_test_results`
|
||||
- **GIT NOTE:** Red-phase tests for the 4 new ApiHookClient methods.
|
||||
|
||||
- [ ] Task 2.2: Add the 4 `/api/test_engine/*` endpoints to `HookServer`
|
||||
- **WHERE:** `src/api_hooks.py` — `do_GET` (line 157) + `do_POST` (line 490)
|
||||
- **WHAT:** Add 4 new `elif` branches:
|
||||
1. `do_GET`: `elif self.path == "/api/test_engine/status":` — lazy-import `hello_imgui` + `test_engine`; get engine via `hello_imgui.get_imgui_test_engine()`; call `test_engine.is_test_queue_empty(engine)`; respond `{"queue_empty": bool}`.
|
||||
2. `do_GET`: `elif self.path == "/api/test_engine/results":` — get engine; create `TestEngineResultSummary()`; call `test_engine.get_result_summary(engine, out_results)`; respond `{"count_tested": N, "count_success": N, "count_in_queue": N}`.
|
||||
3. `do_POST`: `elif self.path == "/api/test_engine/queue":` — body `{"group": "...", "name": "..."}`; get engine; find test via `test_engine.find_test_by_name(engine, group, name)`; if found, `test_engine.queue_test(engine, test)`; respond `{"status": "queued"}` or `{"error": "test not found"}` (404).
|
||||
4. `do_POST`: `elif self.path == "/api/test_engine/abort":` — get engine; `test_engine.abort_current_test(engine)`; respond `{"status": "aborted"}`.
|
||||
- **HOW:** Follow the existing endpoint pattern (lines 499-505 for POST, lines 231-241 for GET). Use `_get_app_attr(app, "controller")` to check `test_engine_enabled`; if not enabled, respond 503. Use `json.dumps(...)` for the response body. 1-space indent.
|
||||
- **SAFETY:** The endpoints run on the HTTP handler thread. `hello_imgui.get_imgui_test_engine()` is a C++ accessor (thread-safe). `queue_test` / `is_test_queue_empty` / `get_result_summary` are thread-safe C++ engine operations (the engine is designed for cross-thread test scheduling). `abort_current_test` is also thread-safe.
|
||||
- **COMMIT:** `feat(api_hooks): add /api/test_engine/* bridge endpoints`
|
||||
- **GIT NOTE:** 4 new endpoints: queue, status, results, abort; bridge the test process to the engine via HTTP.
|
||||
|
||||
- [ ] Task 2.3: Add the 4 new methods to `ApiHookClient`
|
||||
- **WHERE:** `src/api_hook_client.py` (after the existing methods, ~line 500)
|
||||
- **WHAT:** 4 new methods:
|
||||
1. `queue_test(self, group: str, name: str) -> dict` — POST `/api/test_engine/queue` with `{"group": group, "name": name}`; return the response dict.
|
||||
2. `get_test_status(self) -> dict` — GET `/api/test_engine/status`; return `{"queue_empty": bool}`.
|
||||
3. `get_test_results(self) -> dict` — GET `/api/test_engine/results`; return `{"count_tested": N, "count_success": N, "count_in_queue": N}`.
|
||||
4. `wait_for_test_results(self, timeout: float = 30.0) -> dict` — poll `get_test_status()` every 0.5s until `queue_empty == True` or timeout; then return `get_test_results()`. On timeout, return the last results (with a `timed_out: True` field).
|
||||
- **HOW:** Follow the existing method pattern (e.g., `get_status` at line 105, `push_event` at line 156). Use `requests.get/post` + retry. 1-space indent.
|
||||
- **SAFETY:** Pure HTTP client; no thread safety concerns.
|
||||
- **COMMIT:** `feat(api_hook_client): add queue_test + get_test_status + get_test_results + wait_for_test_results`
|
||||
- **GIT NOTE:** 4 new client methods mirroring the 4 new endpoints; wait_for_test_results replaces time.sleep+get_value polling.
|
||||
|
||||
- [ ] Task 2.4: Run Phase 2 tests (Green phase)
|
||||
- **WHAT:** `uv run pytest tests/test_test_engine_smoke.py -v --timeout=60`. All 3 tests must pass. If the smoke test (test_queue_and_run_smoke_test) fails, the most likely cause is the `set_ref` / `item_click` label path being wrong — debug by using `imgui.show_id_stack_tool_window()` or `ctx.window_info("manual slop")` to find the correct label. If the GIL transfer fails, the app will crash — that's a hard blocker; report to user.
|
||||
- **COMMIT:** `conductor(state): Phase 2 green-phase verification` (or skip if no code changes)
|
||||
- **GIT NOTE:** Green-phase verification for the 4 new endpoints + 4 new client methods.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: Live_gui Fixture + Full Smoke Test
|
||||
|
||||
Focus: Pass `--enable-test-engine` in the `live_gui` fixture + verify the full bridge works end-to-end.
|
||||
|
||||
- [ ] Task 3.1: Update `live_gui` fixture to pass `--enable-test-engine`
|
||||
- **WHERE:** `tests/conftest.py:792`
|
||||
- **WHAT:** Change `gui_args = ["uv", "run", "python", "-u", gui_script, "--enable-test-hooks"]` to include `"--enable-test-engine"`:
|
||||
```python
|
||||
gui_args = ["uv", "run", "python", "-u", gui_script, "--enable-test-hooks", "--enable-test-engine"]
|
||||
```
|
||||
- **HOW:** `manual-slop_edit_file` MCP tool. 1-space indent.
|
||||
- **SAFETY:** The engine is idle when no tests are queued. Existing `live_gui` tests that don't use the test engine are unaffected (the engine adds sub-ms per-frame overhead).
|
||||
- **COMMIT:** `test(conftest): pass --enable-test-engine in live_gui fixture`
|
||||
- **GIT NOTE:** Engine activates on every live_gui run; idle when no tests queued.
|
||||
|
||||
- [ ] Task 3.2: Run the full smoke test suite (Green phase)
|
||||
- **WHAT:** `uv run pytest tests/test_test_engine_smoke.py -v --timeout=60`. All 3 tests pass. Then run a small batch of existing `live_gui` tests to verify no regression: `uv run pytest tests/test_workspace_profiles_restoration.py tests/test_undo_redo_lifecycle.py -v --timeout=120`.
|
||||
- **COMMIT:** `conductor(state): Phase 3 green-phase verification`
|
||||
- **GIT NOTE:** Full bridge verified: pytest → HTTP → HookServer → engine → scenario thread → ctx.item_click → GUI thread → CHECK → results → HTTP → pytest assert.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: End-of-Track Report + State Update
|
||||
|
||||
- [ ] Task 4.1: Write end-of-track report
|
||||
- **WHERE:** `docs/reports/TRACK_COMPLETION_test_engine_integration_20260627.md` (NEW file)
|
||||
- **WHAT:** Report following the precedent:
|
||||
- TL;DR
|
||||
- Phase summary (each phase + commits + status)
|
||||
- Verification Criteria status (mapped to spec G1-G7)
|
||||
- Threading model verification (did the GIL transfer work? any crashes? any state-access issues from the scenario thread?)
|
||||
- The 4 new endpoints + 4 new client methods documented
|
||||
- The smoke test result
|
||||
- Handoff to Track 2 (docking test migration) — what's now possible that wasn't before
|
||||
- Known limitations (engine requires a live window; not headless; the interactive panel is not shown)
|
||||
- **COMMIT:** `docs(reports): TRACK_COMPLETION_test_engine_integration_20260627`
|
||||
- **GIT NOTE:** End-of-track report; documents the bridge + threading model verification + Track 2 handoff.
|
||||
|
||||
- [ ] Task 4.2: Update `conductor/tracks.md` + `conductor/chronology.md` + `state.toml`
|
||||
- **WHAT:**
|
||||
1. `state.toml`: mark all phases "completed" with checkpoint SHA; `status = "completed"`.
|
||||
2. `conductor/tracks.md`: add row for this track (status "shipped").
|
||||
3. `conductor/chronology.md`: prepend row for `2026-06-27 | test_engine_integration_20260627 | shipped | ...`.
|
||||
- **COMMIT:** `conductor(state): test_engine_integration_20260627 SHIPPED + TRACK_COMPLETION`
|
||||
- **GIT NOTE:** Track state + chronology + tracks.md closed out.
|
||||
|
||||
- [ ] Task 4.3: Conductor - User Manual Verification
|
||||
- **WHAT:** Present the results: the smoke test pass, the threading model verification, the 4 new endpoints, the 4 new client methods. PAUSE for user sign-off.
|
||||
- **COMMIT:** (no commit; user-confirmation gate)
|
||||
- **GIT NOTE:** User sign-off record.
|
||||
@@ -0,0 +1,187 @@
|
||||
# Track Specification: ImGui Test Engine Integration (Bridge via API Hooks)
|
||||
|
||||
## Overview
|
||||
|
||||
Integrate the Dear ImGui Test Engine (`imgui_bundle.imgui.test_engine`) into Manual Slop's test infrastructure to enable high-fidelity simulation of user interactions — docking, window focus, panel visibility, drag-and-drop, keyboard input — that the current Hook API cannot express.
|
||||
|
||||
**The design principle:** the API hooks layer (`HookServer` on :8999 + `ApiHookClient`) remains the **single communication boundary** between the test process (pytest) and the GUI subprocess. The test engine is integrated *behind* the API hooks, not alongside them. New `/api/test_engine/*` endpoints bridge the test process to the engine's `queue_test` / `get_result_summary` API. The engine's `test_func` closures run on the engine's scenario thread (GIL-transferred by `hello_imgui`/`immapp`); they use `ctx.item_click("**/Label")`, `ctx.dock_into(src, dst, dir)`, `ctx.window_focus(ref)` etc. to post simulated input events to the GUI render thread. The existing `_pending_gui_tasks` queue and the engine's input simulation are two separate event injection paths into the same GUI thread; they compose without conflict.
|
||||
|
||||
This is **Track 1 of 3** in the test engine campaign. Track 1 = enable the engine + build the bridge + smoke test. Track 2 (follow-up) = migrate docking/focus/panel tests. Track 3 (follow-up) = visual regression via screenshot capture.
|
||||
|
||||
## Current State Audit (as of master `77b70226`)
|
||||
|
||||
### Already Implemented (DO NOT re-implement)
|
||||
|
||||
- **`imgui_bundle` v1.92.5** (pinned in `pyproject.toml:7`) ships the test engine compiled into the nanobind binary. Verified: `from imgui_bundle import imgui; imgui.test_engine.TestEngine` is a live class; `imgui.test_engine.register_test`, `imgui.test_engine.queue_test`, `imgui.test_engine.get_result_summary`, `imgui.test_engine.TestContext` with `dock_into`, `window_focus`, `item_click`, `capture_screenshot_window`, etc. are all present (verified via `dir()` enumeration — ~95 `TestContext` methods + ~35 module-level functions). The `.pyi` stub at `.venv/Lib/site-packages/imgui_bundle/imgui/test_engine.pyi` documents the full API.
|
||||
|
||||
- **`hello_imgui.RunnerParams.use_imgui_test_engine: bool = False`** (`.venv/Lib/site-packages/imgui_bundle/hello_imgui.pyi:2969`) — the flag that enables the engine. When `True`, `hello_imgui`/`immapp` compiles the engine into the runner and provides the GIL-transfer mechanism for the scenario thread. The engine is **already compiled into the wheel** (the C++ build flag `-DHELLOIMGUI_WITH_TEST_ENGINE=ON` was set for the published wheel); the Python-side flag just activates it.
|
||||
|
||||
- **`hello_imgui.get_imgui_test_engine()`** (`.venv/Lib/site-packages/imgui_bundle/hello_imgui.pyi:3355`) — returns the live `TestEngine` instance after `use_imgui_test_engine = True`. Verified callable.
|
||||
|
||||
- **`RunnerCallbacks.register_tests: VoidFunction`** (`.venv/Lib/site-packages/imgui_bundle/hello_imgui.pyi:1809`) — the callback that `hello_imgui` invokes at startup to let the app register tests via `imgui.test_engine.register_test(engine, group, name)`. The demo at `.venv/Lib/site-packages/imgui_bundle/demos_python/demos_immapp/demo_testengine.py` shows the full pattern.
|
||||
|
||||
- **`imgui_bundle.imgui.test_engine_checks.CHECK(result: bool)`** — the assertion primitive that emits pass/fail to the engine's result log with file:line traceback. Verified importable.
|
||||
|
||||
- **The app already uses `hello_imgui.RunnerParams` + `immapp.run()`** — the exact integration path the test engine requires:
|
||||
- `src/gui_2.py:641`: `self.runner_params = _hi.RunnerParams()`
|
||||
- `src/gui_2.py:684-688`: `self.runner_params.callbacks.show_gui/show_menus/load_additional_fonts/setup_imgui_style/post_init` are set
|
||||
- `src/gui_2.py:1486`: `immapp.run(app.runner_params, ...)` — the main loop entry point
|
||||
- The GIL-transfer mechanism is built into `immapp.run` when `use_imgui_test_engine = True`; no additional threading code is needed on the Python side.
|
||||
|
||||
- **`HookServer`** (`src/api_hooks.py:857`) — the HTTP server on `127.0.0.1:8999`, started when `--enable-test-hooks` is passed. The `do_GET` method (line 157) and `do_POST` method (line 490) use a flat `if/elif self.path == "/api/..."` dispatch. The server holds `self.app` (the `App` instance) and accesses it via `_get_app_attr(app, ...)` helpers. The `_pending_gui_tasks` queue (`app_controller.py:900`) + `_pending_gui_tasks_lock` (`app_controller.py:822`) + `_process_pending_gui_tasks()` (`app_controller.py:1844`, called per-frame from `gui_2.py:1759`) is the existing thread-safe command queue from HTTP handler thread → main render thread.
|
||||
|
||||
- **`ApiHookClient`** (`src/api_hook_client.py`) — the Python client with retry logic, health-check polling, `wait_for_server(timeout)`, `push_event(action, payload)`, `get_value(item)`, `set_value(item, value)`, `click(item)`, `wait_for_event(event_type, timeout)`, etc. Used by all `live_gui` tests.
|
||||
|
||||
- **`live_gui` fixture** (`tests/conftest.py:641`) — session-scoped; spawns `sloppy.py --enable-test-hooks --config=<temp>` as a subprocess; polls `http://127.0.0.1:8999/status` until ready; yields a `_LiveGuiHandle` with `.client` (an `ApiHookClient`), `.process`, `.workspace`. The fixture's subprocess args are at `conftest.py:792`: `gui_args = ["uv", "run", "python", "-u", gui_script, "--enable-test-hooks"]`.
|
||||
|
||||
- **`sloppy.py`** (79 lines) — the entry point. CLI flags at lines 31-36: `--headless`, `--web-host`, `--web-port`, `--enable-test-hooks`, `--config`. The `else` branch at line 75 (the normal GUI mode) calls `from src.gui_2 import main; main()`.
|
||||
|
||||
- **`AppController.test_hooks_enabled`** (`src/app_controller.py:1042`) — set via `"--enable-test-hooks" in sys.argv` or `SLOP_TEST_HOOKS=1` env var. Same pattern works for `--enable-test-engine`.
|
||||
|
||||
### Gaps to Fill (This Track's Scope)
|
||||
|
||||
- **GAP-1: The test engine is not enabled.** `runner_params.use_imgui_test_engine` is never set to `True`. No `callbacks.register_tests` callback exists. The engine's scenario thread + GIL-transfer mechanism are dormant.
|
||||
|
||||
- **GAP-2: No `/api/test_engine/*` bridge endpoints.** The `HookServer` has no way for the test process to queue a test, poll results, or abort a running test. The test engine API (`queue_test`, `get_result_summary`, `is_test_queue_empty`, `abort_current_test`) is only accessible from inside the GUI process — not from the HTTP boundary.
|
||||
|
||||
- **GAP-3: No `ApiHookClient` methods for test engine operations.** The client has `click`, `set_value`, `push_event`, `wait_for_event` — but no `queue_test`, `wait_for_test_results`, `get_test_results`.
|
||||
|
||||
- **GAP-4: `live_gui` fixture doesn't pass `--enable-test-engine`.** The subprocess at `conftest.py:792` only passes `--enable-test-hooks`. Without the engine flag, the engine won't activate even after GAP-1 is fixed.
|
||||
|
||||
- **GAP-5: No smoke test proving the end-to-end threading model works.** The test engine's scenario thread + GIL transfer is the highest-risk piece. A minimal smoke test (register a trivial test that clicks a known button + asserts a state change, queue it via the API, poll for results, assert pass) is needed to prove the bridge works before Track 2 migrates real tests.
|
||||
|
||||
### Architecture: Why the API hooks + test engine compose
|
||||
|
||||
```
|
||||
pytest test process
|
||||
└── ApiHookClient (HTTP :8999) ← single communication boundary (KEPT)
|
||||
└── HookServer.do_POST ← new /api/test_engine/* endpoints
|
||||
└── imgui.test_engine.queue_test(engine, test) ← schedules on engine
|
||||
└── TestContext.test_func(ctx) ← runs on engine scenario thread
|
||||
└── ctx.item_click("**/Label") ← posts simulated input to GUI thread
|
||||
└── GUI render thread processes the simulated event
|
||||
└── _process_pending_gui_tasks() still runs per-frame
|
||||
(existing queue; unaffected; two separate injection paths)
|
||||
```
|
||||
|
||||
The test engine's `test_func` runs on its own thread (the scenario thread). The `ctx.*` primitives post simulated input events to the ImGui input queue on the GUI render thread. This is the same destination as real user input and the same destination as `_pending_gui_tasks` — but a different injection mechanism. The two paths are independent; they don't share state, locks, or queues. The test engine doesn't touch `_pending_gui_tasks` and vice versa.
|
||||
|
||||
The GIL-transfer caveat (documented at the top of `test_engine.pyi`) is handled by `hello_imgui`/`immapp` when `use_imgui_test_engine = True` — the C++ layer transfers the GIL between the main thread and the scenario thread. No additional Python-side threading code is needed. The `test_func` callback runs with the GIL held; it can safely call `ctx.*` primitives (which are C++ nanobind calls that release the GIL during the simulated input wait).
|
||||
|
||||
## Goals
|
||||
|
||||
- **G1.** `sloppy.py` accepts `--enable-test-engine` CLI flag; when set, `App.run()` sets `runner_params.use_imgui_test_engine = True` + assigns `runner_params.callbacks.register_tests` to a method that registers tests.
|
||||
|
||||
- **G2.** `App` has a `_register_imgui_tests(self)` method (called by `hello_imgui` at startup via the `register_tests` callback) that registers at least one smoke test ("Smoke Tests", "Click Increment Button") via `imgui.test_engine.register_test(engine, group, name)`. The smoke test's `test_func(ctx)` calls `ctx.set_ref("...")` + `ctx.item_click("**/...")` + `CHECK(...)`.
|
||||
|
||||
- **G3.** `HookServer` (in `src/api_hooks.py`) has 4 new endpoints:
|
||||
- `POST /api/test_engine/queue` — body `{"group": "...", "name": "..."}`; finds the test by group+name via `imgui.test_engine.find_test_by_name(engine, group, name)`; calls `queue_test(engine, test)`; responds `{"status": "queued"}`.
|
||||
- `GET /api/test_engine/status` — calls `is_test_queue_empty(engine)`; responds `{"queue_empty": true/false}`.
|
||||
- `GET /api/test_engine/results` — calls `get_result_summary(engine, out_results)`; responds `{"count_tested": N, "count_success": N, "count_in_queue": N}`.
|
||||
- `POST /api/test_engine/abort` — calls `abort_current_test(engine)`; responds `{"status": "aborted"}`.
|
||||
|
||||
- **G4.** `ApiHookClient` (in `src/api_hook_client.py`) has 4 new methods:
|
||||
- `queue_test(group: str, name: str) -> dict` — POST to `/api/test_engine/queue`.
|
||||
- `get_test_status() -> dict` — GET `/api/test_engine/status`.
|
||||
- `get_test_results() -> dict` — GET `/api/test_engine/results`.
|
||||
- `wait_for_test_results(timeout: float = 30.0) -> dict` — polls `get_test_status()` until `queue_empty == True` or timeout; then returns `get_test_results()`.
|
||||
|
||||
- **G5.** The `live_gui` fixture passes `--enable-test-engine` in addition to `--enable-test-hooks` in the subprocess args (`conftest.py:792`). The engine activates on every `live_gui` test run.
|
||||
|
||||
- **G6.** A smoke test in `tests/test_test_engine_smoke.py` that:
|
||||
1. Uses the `live_gui` fixture.
|
||||
2. Queues the smoke test via `client.queue_test("Smoke Tests", "Click Increment Button")`.
|
||||
3. Polls via `client.wait_for_test_results(timeout=30)`.
|
||||
4. Asserts `results["count_success"] >= 1` and `results["count_tested"] >= 1`.
|
||||
This proves the full bridge works: pytest → HTTP → HookServer → engine → scenario thread → `ctx.item_click` → GUI thread → state change → `CHECK` → result log → `get_result_summary` → HTTP → pytest assert.
|
||||
|
||||
- **G7.** End-of-track report at `docs/reports/TRACK_COMPLETION_test_engine_integration_20260627.md` documenting: what shipped, the threading model verification, any GIL-transfer issues encountered, and the handoff to Track 2 (docking test migration).
|
||||
|
||||
## Functional Requirements
|
||||
|
||||
### FR1: `--enable-test-engine` CLI flag
|
||||
|
||||
- `sloppy.py`: add `parser.add_argument("--enable-test-engine", action="store_true", help="Enable the Dear ImGui Test Engine for automated UI testing")` alongside the existing `--enable-test-hooks` flag (line 35).
|
||||
- `src/app_controller.py`: add `self.test_engine_enabled: bool = ("--enable-test-engine" in sys.argv)` near line 1042 (same pattern as `test_hooks_enabled`).
|
||||
- `src/gui_2.py` `App.run()` (line 619): between the `RunnerParams()` construction (line 641) and the `callbacks.show_gui = ...` assignments (line 684), add:
|
||||
```python
|
||||
if getattr(self.controller, "test_engine_enabled", False):
|
||||
self.runner_params.use_imgui_test_engine = True
|
||||
self.runner_params.callbacks.register_tests = self._register_imgui_tests
|
||||
```
|
||||
This is guarded by the flag so normal runs are unaffected.
|
||||
|
||||
### FR2: `App._register_imgui_tests(self)` method
|
||||
|
||||
- New method on `App` in `src/gui_2.py` (near the other callback registrations, ~line 700):
|
||||
```python
|
||||
def _register_imgui_tests(self) -> None:
|
||||
"""Called by hello_imgui at startup to register ImGui Test Engine tests.
|
||||
Reads the live engine via hello_imgui.get_imgui_test_engine().
|
||||
[C: src/gui_2.py:App.run (via callbacks.register_tests)]
|
||||
"""
|
||||
from imgui_bundle import hello_imgui
|
||||
from imgui_bundle.imgui import test_engine
|
||||
engine = hello_imgui.get_imgui_test_engine()
|
||||
# Smoke test: click a known button and verify state change
|
||||
test = test_engine.register_test(engine, "Smoke Tests", "Click Increment Button")
|
||||
def smoke_func(ctx) -> None:
|
||||
from imgui_bundle.imgui.test_engine_checks import CHECK
|
||||
ctx.set_ref("...") # TODO: set to a known window
|
||||
ctx.item_click("**/...") # TODO: click a known button
|
||||
CHECK(True) # TODO: verify state change
|
||||
test.test_func = smoke_func
|
||||
```
|
||||
The exact button + state to click + verify is determined during implementation by inspecting the running GUI's item tree (use `ctx.window_info` / `imgui.show_id_stack_tool_window` to find labels). The smoke test should click something harmless (e.g., a tab switch, a checkbox toggle) and verify the state changed.
|
||||
|
||||
### FR3: `/api/test_engine/*` endpoints in `HookServer`
|
||||
|
||||
- In `src/api_hooks.py` `do_POST` (line 490): add 2 new `elif` branches for `POST /api/test_engine/queue` and `POST /api/test_engine/abort`.
|
||||
- In `src/api_hooks.py` `do_GET` (line 157): add 2 new `elif` branches for `GET /api/test_engine/status` and `GET /api/test_engine/results`.
|
||||
- All 4 endpoints guard on `test_engine_enabled` — if the engine is not active, respond `{"error": "test engine not enabled", "enabled": false}` with HTTP 503.
|
||||
- The engine instance is obtained via `hello_imgui.get_imgui_test_engine()` inside the handler (lazy import; the handler runs on the HTTP thread, but `get_imgui_test_engine()` is a C++ accessor that returns a pointer — safe to call from any thread).
|
||||
|
||||
### FR4: `ApiHookClient` methods
|
||||
|
||||
- In `src/api_hook_client.py`: add 4 methods per G4. Follow the existing method pattern (e.g., `get_status`, `push_event`): construct the URL, `requests.post/get`, retry on connection error, parse JSON, return the dict.
|
||||
|
||||
### FR5: `live_gui` fixture update
|
||||
|
||||
- In `tests/conftest.py:792`: change `gui_args` to include `"--enable-test-engine"` when the fixture spawns the subprocess. The flag flows through to `AppController.test_engine_enabled` → `App.run()` → `runner_params.use_imgui_test_engine = True`.
|
||||
|
||||
### FR6: Smoke test
|
||||
|
||||
- `tests/test_test_engine_smoke.py` (NEW) — 2-3 tests:
|
||||
- `test_engine_enabled`: `client.get_value("test_engine_enabled")` returns True (or verify via a new gettable field).
|
||||
- `test_queue_and_run_smoke_test`: queue the smoke test, poll for results, assert success.
|
||||
- `test_engine_results_shape`: `get_test_results()` returns the expected dict shape.
|
||||
|
||||
## Non-Functional Requirements
|
||||
|
||||
- **1-space indentation** for all Python code.
|
||||
- **No comments in body** per AGENTS.md.
|
||||
- **CRLF line endings** preserved.
|
||||
- **Atomic per-task commits.**
|
||||
- **Thread safety:** the `test_func` runs on the engine scenario thread. It must NOT directly mutate `App` or `AppController` state — it must use `ctx.*` primitives (which post simulated input to the GUI thread). Reading state via `hello_imgui.get_imgui_test_engine()` or engine queries (`ctx.item_info`, `ctx.window_info`) is safe. The `CHECK()` assertion runs on the scenario thread but only writes to the engine's result log (thread-safe C++ structure).
|
||||
- **No `live_gui` regression:** the `--enable-test-engine` flag must not affect normal GUI behavior when `live_gui` tests are NOT using the engine. The engine's scenario thread is idle when no tests are queued. The `show_test_engine_windows` panel is NOT shown by default (only via explicit call).
|
||||
- **Performance:** the engine adds a per-frame overhead when active. The `fps_idling` settings in `runner_params` remain unchanged. The engine's overhead is sub-millisecond per frame when no tests are running.
|
||||
|
||||
## Architecture Reference
|
||||
|
||||
- **`docs/guide_testing.md`** — the `live_gui` fixture, the structural testing contract, the Puppeteer pattern.
|
||||
- **`docs/guide_api_hooks.md`** — the Hook API surface, the `/api/ask` protocol, the `ApiHookClient` method reference.
|
||||
- **`docs/guide_gui_2.md`** — the `App` class lifecycle, the `runner_params` construction, the `callbacks` system.
|
||||
- **`.venv/Lib/site-packages/imgui_bundle/demos_python/demos_immapp/demo_testengine.py`** — the canonical demo for the test engine integration pattern (register_tests callback + test_func closures + CHECK).
|
||||
- **`.venv/Lib/site-packages/imgui_bundle/imgui/test_engine.pyi`** — the full API stub (2644 lines). Key sections: `TestContext` methods (lines 1445-2096), module-level functions (lines 433-500, 2639+), `TestEngineResultSummary` (3 fields: count_tested, count_success, count_in_queue).
|
||||
- **`.venv/Lib/site-packages/imgui_bundle/imgui/test_engine_checks.py`** — the `CHECK(result: bool)` assertion primitive.
|
||||
- **`conductor/workflow.md`** "Live_gui Test Fragility" + "Async Setters Need Poll-For-State" — the existing patterns for `live_gui` tests; the test engine's `wait_for_test_results` replaces `time.sleep` + `get_value` polling with a single engine-side poll.
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- **Migrating existing `live_gui` tests to the test engine.** That's Track 2 (`test_engine_docking_tests_<date>`). This track only builds the bridge + proves it works with 1 smoke test.
|
||||
- **Visual regression via screenshot capture.** That's Track 3 (`test_engine_capture_regression_<date>`). The `ctx.capture_screenshot_window` API is available but not wired in this track.
|
||||
- **Headless test execution (no GUI window).** The test engine requires a live GLFW window (the scenario thread drives the actual ImGui render loop). Headless mode is a future research item, not this track.
|
||||
- **The test engine's interactive UI panel (`show_test_engine_windows`).** Not shown by default. Can be added as a debug toggle in a follow-up.
|
||||
- **Test engine license audit.** Per the stub: "free for individuals, educational, open-source, and small businesses. Paid for larger businesses." This project is personal-use; no audit needed. Flagged for awareness only.
|
||||
- **CI wiring of the test engine.** The `live_gui` fixture already runs in CI via the batched runner. The `--enable-test-engine` flag is additive. No CI config changes needed.
|
||||
- **Touching `src/models.py` or any taxonomy files.** Zero overlap with the running `tier2/post_module_taxonomy_de_cruft_20260627` branch or the `enforcement_gap_closure_20260627` track.
|
||||
@@ -0,0 +1,64 @@
|
||||
# Track state for test_engine_integration_20260627
|
||||
# Initialized by Tier 1 Orchestrator on 2026-06-27.
|
||||
# Implementation delegated to Tier 2 (autonomous) or Tier 3 worker dispatch.
|
||||
# This is Track 1 of 3 in the Test Engine Campaign.
|
||||
|
||||
[meta]
|
||||
track_id = "test_engine_integration_20260627"
|
||||
name = "ImGui Test Engine Integration (Bridge via API Hooks)"
|
||||
status = "active"
|
||||
current_phase = 0
|
||||
last_updated = "2026-06-27"
|
||||
|
||||
[blocked_by]
|
||||
# None. Parallel-safe against tier2/post_module_taxonomy_de_cruft_20260627
|
||||
# (zero file overlap: this track touches sloppy.py, gui_2.py:641-700,
|
||||
# api_hooks.py, api_hook_client.py, conftest.py — none of which Tier 2 touches)
|
||||
# and enforcement_gap_closure_20260627 (scripts/audit_*, python.md — zero overlap).
|
||||
|
||||
[blocks]
|
||||
test_engine_docking_tests = "planned (Track 2 of 3 campaign)"
|
||||
test_engine_capture_regression = "planned (Track 3 of 3 campaign)"
|
||||
|
||||
[phases]
|
||||
phase_1 = { status = "pending", checkpointsha = "", name = "Enable the Test Engine in the App (CLI flag + runner_params + register_tests callback)" }
|
||||
phase_2 = { status = "pending", checkpointsha = "", name = "Build the API Hooks Bridge (4 endpoints + 4 client methods)" }
|
||||
phase_3 = { status = "pending", checkpointsha = "", name = "Live_gui Fixture + Full Smoke Test" }
|
||||
phase_4 = { status = "pending", checkpointsha = "", name = "End-of-Track Report + State Update + User Sign-off" }
|
||||
|
||||
[tasks]
|
||||
# Phase 1: enable the engine
|
||||
t1_1 = { status = "pending", commit_sha = "", description = "Write failing test for --enable-test-engine flag + engine activation (Red phase)" }
|
||||
t1_2 = { status = "pending", commit_sha = "", description = "Add --enable-test-engine CLI flag to sloppy.py + test_engine_enabled field to AppController" }
|
||||
t1_3 = { status = "pending", commit_sha = "", description = "Enable engine in App.run() (runner_params.use_imgui_test_engine = True + callbacks.register_tests = self._register_imgui_tests) + add _register_imgui_tests method with smoke test" }
|
||||
t1_4 = { status = "pending", commit_sha = "", description = "Manual verification: run sloppy.py --enable-test-engine locally; confirm engine activates + no GIL-transfer crash" }
|
||||
# Phase 2: build the bridge
|
||||
t2_1 = { status = "pending", commit_sha = "", description = "Write failing tests for queue_test + wait_for_test_results + get_test_results (Red phase)" }
|
||||
t2_2 = { status = "pending", commit_sha = "", description = "Add 4 /api/test_engine/* endpoints to HookServer (queue, status, results, abort)" }
|
||||
t2_3 = { status = "pending", commit_sha = "", description = "Add 4 new methods to ApiHookClient (queue_test, get_test_status, get_test_results, wait_for_test_results)" }
|
||||
t2_4 = { status = "pending", commit_sha = "", description = "Run Phase 2 tests (Green phase); verify all 3 smoke tests pass" }
|
||||
# Phase 3: live_gui fixture + full smoke test
|
||||
t3_1 = { status = "pending", commit_sha = "", description = "Update live_gui fixture (conftest.py:792) to pass --enable-test-engine" }
|
||||
t3_2 = { status = "pending", commit_sha = "", description = "Run full smoke test + regression batch (Green phase)" }
|
||||
# Phase 4: end-of-track
|
||||
t4_1 = { status = "pending", commit_sha = "", description = "Write docs/reports/TRACK_COMPLETION_test_engine_integration_20260627.md" }
|
||||
t4_2 = { status = "pending", commit_sha = "", description = "Update conductor/tracks.md + chronology.md + state.toml -> status='completed'" }
|
||||
t4_3 = { status = "pending", commit_sha = "", description = "Conductor - User Manual Verification (PAUSE for user sign-off)" }
|
||||
|
||||
[verification]
|
||||
phase_1_complete = false
|
||||
phase_2_complete = false
|
||||
phase_3_complete = false
|
||||
phase_4_complete = false
|
||||
engine_activates_without_crash = false
|
||||
smoke_test_passes = false
|
||||
no_live_gui_regression = false
|
||||
|
||||
[campaign_context]
|
||||
# This is Track 1 of 3. The campaign enables high-fidelity UI simulation via the
|
||||
# Dear ImGui Test Engine, bridged through the existing API hooks layer.
|
||||
campaign_name = "Test Engine Campaign"
|
||||
track_1 = "test_engine_integration_20260627 (THIS; bridge + smoke test)"
|
||||
track_2 = "test_engine_docking_tests (migrate docking/focus/panel tests)"
|
||||
track_3 = "test_engine_capture_regression (visual regression via screenshot capture)"
|
||||
key_risk = "R1: GIL-transfer crash if the app's thread layout doesn't work with the engine's scenario thread (mitigated by Phase 1 Task 1.4 manual checkpoint)"
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user