diff --git a/docs/superpowers/specs/2026-06-02-clean-install-test-design.md b/docs/superpowers/specs/2026-06-02-clean-install-test-design.md new file mode 100644 index 00000000..4cb99895 --- /dev/null +++ b/docs/superpowers/specs/2026-06-02-clean-install-test-design.md @@ -0,0 +1,195 @@ +# Clean Install Test + +**Date:** 2026-06-02 +**Status:** Draft (pending review) + +--- + +## Context & Motivation + +The user wants a "clean install" test that verifies Manual Slop works correctly when installed from scratch in an isolated environment. The test should: + +1. Clone the repo to a temp directory (no shared state with the source) +2. Install dependencies via `uv sync` +3. Launch `sloppy.py --enable-test-hooks` +4. Verify the Hook API responds (smoke test that the app is functional) + +This is a defense against: +- Repository changes that only work on the developer's machine +- Dependency drift (works on dev, fails on fresh install) +- Build/launch issues that only appear in clean environments + +The target is the user's private Gitea server: `https://git.cozyair.dev/ed/manual_slop`. This is intentionally NOT a public GitHub URL — the test must work in the user's private infrastructure. + +--- + +## Scope + +### In Scope + +- `tests/test_clean_install.py` — Opt-in pytest test +- `pyproject.toml` update: add `clean_install` marker +- Gating via `RUN_CLEAN_INSTALL_TEST=1` env var + +### Out of Scope + +- Auto-clone in CI (the user can opt in via a future CI workflow) +- Continuous monitoring +- Cloning from a specific branch/tag (uses HEAD of main by default) + +--- + +## Design + +### Test File Structure + +```python +# tests/test_clean_install.py +import os +import shutil +import subprocess +import time +from pathlib import Path + +import pytest +import requests + + +REPO_URL = "https://git.cozyair.dev/ed/manual_slop" +STARTUP_TIMEOUT_SECONDS = 30 +READINESS_POLL_INTERVAL = 0.5 + + +@pytest.mark.clean_install +def test_clean_install_runs_with_hooks(tmp_path): + """Clone the repo, install deps, launch sloppy.py, verify Hook API.""" + if os.environ.get("RUN_CLEAN_INSTALL_TEST") != "1": + pytest.skip("Set RUN_CLEAN_INSTALL_TEST=1 to enable") + + clone_dir = tmp_path / "manual_slop" + + # 1. Clone + result = subprocess.run( + ["git", "clone", REPO_URL, str(clone_dir)], + capture_output=True, text=True, timeout=60, + ) + assert result.returncode == 0, f"Clone failed: {result.stderr}" + + # 2. Install deps + result = subprocess.run( + ["uv", "sync"], + cwd=str(clone_dir), + capture_output=True, text=True, timeout=180, + ) + assert result.returncode == 0, f"uv sync failed: {result.stderr}" + + # 3. Launch sloppy.py with hooks + process = subprocess.Popen( + ["uv", "run", "sloppy.py", "--enable-test-hooks"], + cwd=str(clone_dir), + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == 'nt' else 0, + ) + + try: + # 4. Poll /status endpoint + start = time.time() + ready = False + while time.time() - start < STARTUP_TIMEOUT_SECONDS: + if process.poll() is not None: + pytest.fail(f"Process exited early. stderr: {process.stderr.read()[:2000]}") + try: + response = requests.get( + "http://127.0.0.1:8999/status", + timeout=1.0, + ) + if response.status_code == 200: + payload = response.json() + if payload.get("status") == "running": + ready = True + break + except (requests.ConnectionError, requests.Timeout): + pass + time.sleep(READINESS_POLL_INTERVAL) + + assert ready, f"Hook server did not respond within {STARTUP_TIMEOUT_SECONDS}s" + + # 5. Test a write hook (any POST endpoint that should respond) + response = requests.get( + "http://127.0.0.1:8999/api/mma_status", + timeout=5.0, + ) + assert response.status_code == 200 + # The mma_status endpoint returns a dict; verify it has expected keys + data = response.json() + assert "status" in data or "mma_state" in data + + finally: + # 6. Cleanup + if os.name == 'nt': + subprocess.run( + ["taskkill", "/F", "/T", "/PID", str(process.pid)], + capture_output=True, + ) + else: + process.terminate() + try: + process.wait(timeout=5) + except subprocess.TimeoutExpired: + process.kill() +``` + +### `pyproject.toml` Update + +```toml +[tool.pytest.ini_options] +markers = [ + "integration: integration tests requiring live GUI", + "strict: tests that require strict mode", + "clean_install: clean install verification (opt-in via RUN_CLEAN_INSTALL_TEST=1)", +] +``` + +### Running the Test + +**Default (skip):** +```bash +uv run pytest tests/test_clean_install.py -v +# SKIPPED: Set RUN_CLEAN_INSTALL_TEST=1 to enable +``` + +**Opt-in:** +```bash +RUN_CLEAN_INSTALL_TEST=1 uv run pytest tests/test_clean_install.py -v +``` + +**Just the clean_install marker:** +```bash +RUN_CLEAN_INSTALL_TEST=1 uv run pytest -m clean_install -v +``` + +--- + +## File Structure + +- `tests/test_clean_install.py` — NEW +- `pyproject.toml` — MODIFY: add `clean_install` marker + +--- + +## Acceptance Criteria + +- `RUN_CLEAN_INSTALL_TEST=1 uv run pytest tests/test_clean_install.py -v` passes when run in an environment with network access to `git.cozyair.dev` +- Without the env var, the test skips (no network access required) +- The test takes 30-90 seconds to run (clone + install + launch) +- A failure in any step (clone, sync, launch, hook response) results in a clear error message +- Process cleanup is robust (no orphaned processes on Windows or Unix) + +--- + +## Risks + +1. **Network dependency:** The test requires network access to `git.cozyair.dev`. In CI environments without that access, the test will fail. Mitigation: the env var gating makes this opt-in. +2. **Clone target is private:** Unlike GitHub, the URL is on a private Gitea server. Test failure on a public CI would leak the existence of the private repo. Mitigation: only run on private infrastructure; the test is opt-in. +3. **Port conflicts:** The test uses port 8999. If another process is using it, the test will fail. Mitigation: the polling loop detects early exit and reports the port-in-use error. +4. **`uv sync` is slow:** On a fresh machine, `uv sync` can take 30-60 seconds. The test budget is 180 seconds which should be sufficient. diff --git a/docs/superpowers/specs/2026-06-02-command-palette-design.md b/docs/superpowers/specs/2026-06-02-command-palette-design.md new file mode 100644 index 00000000..2749f23c --- /dev/null +++ b/docs/superpowers/specs/2026-06-02-command-palette-design.md @@ -0,0 +1,222 @@ +# Command Palette Implementation & Tests + +**Date:** 2026-06-02 +**Status:** Draft (pending review) +**Parent Track:** `command_palette_and_performance_20260602` (continuing Phase 2 + adding Phase 3) +**Spec:** `conductor/tracks/command_palette_and_performance_20260602/spec.md` (existing) + +--- + +## Context & Motivation + +A `command_palette_and_performance_20260602` track was started in early June 2026. **Phase 1 (Async Context Preview)** is complete. **Phase 2 (Command Palette)** is unstarted — no `src/command_palette.py`, no `src/commands.py`, no test files. The user reports the palette doesn't pop up on `Ctrl+Shift+P`. + +The existing spec says `Ctrl+P`; this design uses `Ctrl+Shift+P` (per the user's expectation and VSCode convention documented in `docs/guide_command_palette.md`). + +This design finishes Phase 2 of the existing track and adds Phase 3 (tests). + +--- + +## Scope + +### In Scope + +- `src/command_palette.py` — Module-level: `Command` dataclass, `CommandRegistry`, `fuzzy_match()`, `render_palette_modal(app)`, `render_everything_modal(app)` +- `src/commands.py` — Static command definitions (~30-50 commands across categories) +- `src/gui_2.py` — `self.show_command_palette: bool` in `App.__init__`, `_render_command_palette(self)` thin wrapper, `Ctrl+Shift+P` keyboard handler +- `tests/test_command_palette.py` — Unit tests for fuzzy matcher, command registry, mode detection +- `tests/test_command_palette_sim.py` — Integration tests via `live_gui` + +### Out of Scope + +- Async context preview (Phase 1; already complete) +- The "Everything" mode async search worker (mentioned in the existing spec; defer to a follow-up track) +- Visual theming of the palette (use existing ImGui style) + +--- + +## Design + +### Data Model: `Command` + +```python +@dataclass +class Command: + id: str # Unique identifier + title: str # Display name + category: str # Category for grouping + shortcut: Optional[str] # Optional default shortcut (e.g., "Ctrl+S") + description: str = "" # Optional help text + enabled_when: Optional[str] = None # Optional condition expression + action: Callable = None # Function to execute when selected +``` + +### Command Registry + +```python +# src/commands.py +from src.command_palette import Command, CommandRegistry + +registry = CommandRegistry() + +@registry.register +def save_file(app: App) -> None: + """Save File — File category, Ctrl+S""" + # ... call app's save logic +``` + +**Registration patterns:** +- Decorator: `@registry.register` for top-level functions +- Explicit: `registry.register(Command(id=..., title=..., action=...))` for closures or classes + +### Fuzzy Matcher + +Implemented in `src/command_palette.py` as a pure function: + +```python +def fuzzy_match(query: str, candidates: List[Command], top_n: int = 20) -> List[ScoredCommand]: + """ + Returns the top_n candidates matching query, ranked by score. + + Algorithm: + 1. Subsequence check: query chars must appear in title, in order + 2. Score calculation: + - Exact prefix match: +1.0 + - Word boundary match: +0.5 + - Contiguous match: +0.3 + - Character distance penalty: -0.1 per gap + 3. Sort by score descending + 4. Return top_n + """ +``` + +### Modal Rendering + +The palette is a centered ImGui modal. Module-level function (per delegation pattern): + +```python +# src/command_palette.py +def render_palette_modal(app: App) -> None: + """Render the Command Palette modal. Called from gui_2.py when app.show_command_palette is True.""" + if not app.show_command_palette: + return + imgui.set_next_window_position(...) # Centered + imgui.set_next_window_size(...) + if imgui.begin("Command Palette##palette", closable=True): + # Search input + # Fuzzy-matched results list + # Keyboard navigation + imgui.end() +``` + +### Keyboard Handler + +In `gui_2.py`'s main event loop: + +```python +io = imgui.get_io() +if io.key_ctrl and io.key_shift and imgui.is_key_pressed(imgui.Key.p): + app.show_command_palette = not app.show_command_palette +``` + +--- + +## File Structure + +- `src/command_palette.py` — NEW: Command, CommandRegistry, fuzzy_match, render_palette_modal +- `src/commands.py` — NEW: Static command definitions and registry +- `src/gui_2.py` — MODIFY: add `self.show_command_palette`, add `_render_command_palette` wrapper, add Ctrl+Shift+P handler, register the palette module +- `tests/test_command_palette.py` — NEW: Unit tests +- `tests/test_command_palette_sim.py` — NEW: Integration tests via live_gui + +--- + +## Tests + +### Unit Tests (`tests/test_command_palette.py`) + +```python +def test_fuzzy_match_prefix_ranks_first(): + from src.command_palette import fuzzy_match + candidates = [ + Command(id="find", title="Find in Selection"), + Command(id="fold", title="Fold All"), + Command(id="config", title="Configure Settings"), + ] + results = fuzzy_match("fin", candidates) + assert results[0].command.id == "find" + assert results[0].score > 0.5 + +def test_fuzzy_match_rejects_no_match(): + from src.command_palette import fuzzy_match + candidates = [Command(id="x", title="foo bar")] + results = fuzzy_match("xyz", candidates) + assert len(results) == 0 + +def test_command_registry_register_and_list(): + from src.command_palette import CommandRegistry + from src.commands import registry + assert "save_file" in registry.all() + # All commands have id, title, category + for cmd in registry.all(): + assert cmd.id and cmd.title and cmd.category + +def test_command_registry_duplicate_raises(): + from src.command_palette import CommandRegistry, Command + reg = CommandRegistry() + reg.register(Command(id="x", title="X", category="test")) + with pytest.raises(ValueError): + reg.register(Command(id="x", title="X", category="test")) +``` + +### Integration Tests (`tests/test_command_palette_sim.py`) + +```python +def test_ctrl_shift_p_opens_palette(live_gui): + client = live_gui[1] + # Press Ctrl+Shift+P + client.press_key_combo("Ctrl+Shift+P") + # Verify the palette is visible + state = client.get_window_state("command_palette") + assert state["visible"] == True + +def test_palette_filters_as_user_types(live_gui): + client = live_gui[1] + client.press_key_combo("Ctrl+Shift+P") + client.type_in_palette("save") + results = client.get_palette_results() + assert any("Save" in r.title for r in results) + # Other commands not shown + assert not any("Compress" in r.title for r in results) + +def test_palette_executes_command_on_enter(live_gui): + client = live_gui[1] + client.press_key_combo("Ctrl+Shift+P") + client.type_in_palette("Reset") + client.press_key("Down") + client.press_key("Enter") + # Verify the reset command was executed (check via Hook API) + state = client.get_session_state() + assert state.get("discussion_history", []) == [] +``` + +--- + +## Acceptance Criteria + +- `Ctrl+Shift+P` opens the palette (verified via `live_gui` test) +- Typing in the palette filters results via fuzzy match +- Selecting a command (Enter key) executes it and closes the palette +- Escape closes the palette without executing +- All unit tests pass +- All integration tests pass +- The palette respects the existing theme (dark/light/nerv) +- No new lint errors + +--- + +## Risks + +1. **Keyboard handler conflicts:** The Ctrl+Shift+P combo might be intercepted by other subsystems. Mitigation: check for other handlers in the codebase first; if conflicts, document them. +2. **Pyodide build dependencies:** The image_bundle web backend (for Track 4) has a different architecture than this track. The two are independent but should be aware of each other. +3. **Test flakiness:** `live_gui` tests can be flaky if the GUI doesn't initialize in time. Mitigation: the standard 15-second readiness polling is sufficient. diff --git a/docs/superpowers/specs/2026-06-02-docker-web-frontend-design.md b/docs/superpowers/specs/2026-06-02-docker-web-frontend-design.md new file mode 100644 index 00000000..ccb8b05d --- /dev/null +++ b/docs/superpowers/specs/2026-06-02-docker-web-frontend-design.md @@ -0,0 +1,288 @@ +# Docker Container & Web-Hosted ImGui Frontend + +**Date:** 2026-06-02 +**Status:** Draft (pending review) +**Reference:** https://imgui-bundle.pages.dev/explorer/ — imgui-bundle web backend via Hello ImGui + +--- + +## Context & Motivation + +The user wants to deploy Manual Slop on Unraid (a home server OS) and access the GUI via a web browser. The goal is for agents to operate on projects hosted on the home server, with the user monitoring/controlling via web browser. + +Current state: +- `sloppy.py` is a desktop GUI (ImGui via imgui-bundle + Python) +- `src/api_hooks.py` provides a FastAPI/Uvicorn headless service on `:8999` for external automation +- The app is Windows-oriented (PowerShell subprocesses, `pywin32` for window frame) + +Target state: +- Docker container with the full app +- Web browser shows the ImGui GUI in real-time +- Agents can interact via the existing Hook API on `:8999` +- The user's Unraid server can host multiple project directories + +imgui-bundle's web backend ([reference](https://imgui-bundle.pages.dev/explorer/)) uses a server-side render with a client-side WebGL display. The Hello ImGui runner pairs a Python render loop with a JavaScript WebGL canvas via WebSocket. + +--- + +## Scope + +### In Scope + +- `Dockerfile` — Container build for the Manual Slop app +- `docker-compose.yml` — Multi-container deployment for Unraid +- `scripts/docker_build.sh` — Build helper +- `scripts/docker_run.sh` — Run helper with env var wiring +- `docs/guide_docker_deployment.md` — Unraid setup guide +- `tests/test_docker_build.py` — Opt-in Docker build test + +### Out of Scope + +- Migrating `sloppy.py` to use the imgui-bundle web backend (the web backend is an alternative to the desktop backend; switching is a significant refactor and may be deferred) +- Multi-user authentication (single-user deployment) +- Cloud-specific deployment (AWS, GCP) — Unraid is the target +- TLS termination (assumed handled by a reverse proxy like Traefik or Caddy) + +--- + +## Design + +### Architecture: V2 — Server-side Python + WebGL client (via WebSocket) + +``` +┌─────────────────────────────────────────────┐ +│ Docker Container (unraid:manual_slop:latest) │ +│ │ +│ ┌────────────────────────────────────┐ │ +│ │ Python app │ │ +│ │ - ImGui renders to framebuffer │ │ +│ │ - Hello ImGui web backend: │ │ +│ │ - Python: render loop │ │ +│ │ - WebSocket: frame deltas │ │ +│ │ - HTTP: serves JS client │ │ +│ │ - HookServer on :8999 │ │ +│ └────────────────────────────────────┘ │ +│ │ +│ Exposed ports: │ +│ - 8080: Web client (HTTP + WS) │ +│ - 8999: Hook API │ +│ │ +│ Volumes: │ +│ - /projects: project workspaces │ +│ - /config: app state, presets, personas │ +└─────────────────────────────────────────────┘ + ↑ ↑ + │ Browser (Chrome, Firefox) │ Agent (curl, scripts) + │ WebSocket for live frames │ HTTP for state +``` + +### Dockerfile + +```dockerfile +FROM python:3.11-slim + +# System deps +RUN apt-get update && apt-get install -y --no-install-recommends \ + git curl ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Install uv +RUN pip install uv + +# App setup +WORKDIR /app +COPY pyproject.toml uv.lock ./ +RUN uv sync --frozen + +COPY . . + +# Volumes +RUN mkdir -p /projects /config +VOLUME ["/projects", "/config"] + +# Expose +EXPOSE 8080 8999 + +# Health check +HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \ + CMD curl -f http://127.0.0.1:8999/status || exit 1 + +# Entrypoint +ENTRYPOINT ["uv", "run", "sloppy.py", "--enable-test-hooks", "--web-host=0.0.0.0", "--web-port=8080"] +``` + +### `docker-compose.yml` + +```yaml +version: '3.8' + +services: + manual_slop: + build: . + image: manual_slop:latest + container_name: manual_slop + ports: + - "8999:8999" # Hook API (host) + - "8080:8080" # Web client (host) + volumes: + - /mnt/user/projects:/projects:rw # Unraid project share + - /mnt/user/appdata/manual_slop:/config:rw # App state + environment: + - GEMINI_API_KEY=${GEMINI_API_KEY} + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + - DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY} + - MINIMAX_API_KEY=${MINIMAX_API_KEY} + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://127.0.0.1:8999/status"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 30s +``` + +### Entry Point Changes (`sloppy.py`) + +The current `sloppy.py` launches the desktop GUI. For web mode, we need: + +```python +# In sloppy.py +import argparse + +parser = argparse.ArgumentParser() +# ... existing args ... +parser.add_argument("--web-host", default=None, help="Enable web mode and bind to this host") +parser.add_argument("--web-port", type=int, default=8080, help="Web mode port") +args = parser.parse_args() + +if args.web_host is not None: + from imgui_bundle import hello_imgui + runner_params = hello_imgui.RunnerParams() + runner_params.app_window_params.borderless = False + runner_params.imgui_window_params.default_imgui_window_type = ... # web backend + hello_imgui.run(runner_params) +else: + # Existing desktop launch + ... +``` + +The imgui-bundle web backend is selected by the Hello ImGui runner. The exact config is per the [imgui-bundle explorer docs](https://imgui-bundle.pages.dev/explorer/). + +### `docs/guide_docker_deployment.md` + +A complete Unraid setup guide: + +- Prerequisites (Unraid version, Docker template) +- Building the image +- Configuring volumes and env vars +- Accessing the web client (URL, browser requirements) +- Agent interaction examples (curl, Python script) +- Backup and restore of /config +- Updating the image + +### `tests/test_docker_build.py` + +```python +import os +import subprocess +import time + +import pytest +import requests + +IMAGE_NAME = "manual_slop:test" +CONTAINER_NAME = "manual_slop_test" +WEB_PORT = 8080 +HOOK_PORT = 8999 + + +@pytest.mark.docker +def test_docker_container_starts_and_serves(tmp_path): + """Build the Docker image, run the container, verify web client + hook API.""" + if os.environ.get("RUN_DOCKER_TEST") != "1": + pytest.skip("Set RUN_DOCKER_TEST=1 to enable") + + if not _docker_available(): + pytest.skip("Docker not available") + + # Build + result = subprocess.run( + ["docker", "build", "-t", IMAGE_NAME, "."], + capture_output=True, text=True, timeout=300, + ) + assert result.returncode == 0, f"Docker build failed: {result.stderr}" + + # Run + subprocess.run(["docker", "rm", "-f", CONTAINER_NAME], capture_output=True) + result = subprocess.run([ + "docker", "run", "-d", + "--name", CONTAINER_NAME, + "-p", f"{WEB_PORT}:8080", + "-p", f"{HOOK_PORT}:8999", + IMAGE_NAME, + ], capture_output=True, text=True, timeout=30) + assert result.returncode == 0, f"Docker run failed: {result.stderr}" + + try: + # Wait for hook API + start = time.time() + ready = False + while time.time() - start < 60: + try: + r = requests.get(f"http://127.0.0.1:{HOOK_PORT}/status", timeout=1) + if r.status_code == 200: + ready = True + break + except (requests.ConnectionError, requests.Timeout): + pass + time.sleep(1) + + assert ready, "Container did not start hook API within 60s" + + # Verify web client is served + r = requests.get(f"http://127.0.0.1:{WEB_PORT}/", timeout=5) + assert r.status_code == 200 + assert b" bool: + result = subprocess.run(["docker", "version"], capture_output=True) + return result.returncode == 0 +``` + +--- + +## File Structure + +- `Dockerfile` — NEW +- `docker-compose.yml` — NEW +- `scripts/docker_build.sh` — NEW +- `scripts/docker_run.sh` — NEW +- `docs/guide_docker_deployment.md` — NEW +- `tests/test_docker_build.py` — NEW +- `sloppy.py` — MODIFY: add `--web-host` and `--web-port` args + +--- + +## Acceptance Criteria + +- `docker build -t manual_slop:latest .` succeeds on a clean machine +- `docker compose up` starts the container, and `:8999/status` returns 200 within 60s +- `curl http://localhost:8080/` returns the web client HTML +- An agent can `curl http://localhost:8999/api/mma_status` and get a valid response +- The user can navigate to the web UI in a browser and see the ImGui panels +- File operations on `/projects` persist across container restarts +- Env vars for API keys are not committed to the image (use runtime env) + +--- + +## Risks + +1. **imgui-bundle web backend maturity:** The web backend is less battle-tested than the desktop backend. There may be rendering quirks, input latency, or unsupported features. Mitigation: this is experimental; expect to iterate. +2. **Headless rendering in container:** Some ImGui features (e.g., font hinting) may need extra config for headless rendering. Mitigation: test early in development; fall back to Xvfb + noVNC if web backend is too immature. +3. **WebSocket bandwidth:** Streaming frame deltas requires consistent network. On flaky networks, the user experience degrades. Mitigation: implement client-side prediction or reduce frame rate. +4. **Container size:** Python + uv + all deps can produce a 1-2GB image. Mitigation: use multi-stage builds; pin Python deps for reproducibility. +5. **Unraid-specific quirks:** Unraid uses a specific Docker storage driver and may have path mapping edge cases. Mitigation: test on the actual Unraid deployment; document the path mapping clearly. diff --git a/docs/superpowers/specs/2026-06-02-test-consolidation-design.md b/docs/superpowers/specs/2026-06-02-test-consolidation-design.md new file mode 100644 index 00000000..aeb5ebba --- /dev/null +++ b/docs/superpowers/specs/2026-06-02-test-consolidation-design.md @@ -0,0 +1,220 @@ +# Test Consolidation & TOML Sandboxing Enforcement + +**Date:** 2026-06-02 +**Status:** Draft (pending review) + +--- + +## Context & Motivation + +The Manual Slop test suite has grown to ~258 test files. Many tests read or write project TOML files (manual_slop.toml, config.toml, credentials.toml, presets.toml, etc.) for fixtures. The pattern is inconsistent: + +- Some tests use `tmp_path` + `monkeypatch` (good — isolated) +- Some tests use real `./` paths (bad — pollutes user config) +- Some tests use mock paths at module level (good — fast) + +The user wants to: +1. Audit tests for real-TOML usage +2. Migrate offenders to sandboxed variants +3. Consolidate similar tests where it improves clarity +4. Enforce the rule going forward + +The `isolate_workspace` autouse fixture in `tests/conftest.py` (added in the May 2026 docs refresh work) is the foundation for the migration pattern. + +--- + +## Scope + +### In Scope + +- Audit all `tests/*.py` for direct path references to `./` TOML files +- Migrate offenders to use `tmp_path` + `monkeypatch` (or `isolate_workspace`) +- Consolidate similar tests where it improves clarity (judgment call) +- Add a `tests/conftest.py` autouse fixture that prevents regression +- Add a `scripts/check_test_toml_paths.py` script for CI/pre-commit +- Add tests for the enforcement mechanism itself + +### Out of Scope + +- Rewriting tests for clarity (only consolidation where it improves maintainability) +- Adding new tests +- Changing the test runner (pytest stays) +- Coverage tooling changes + +--- + +## Design + +### Phase 1: Audit + +A script that greps `tests/*.py` for problematic patterns: + +```python +# scripts/check_test_toml_paths.py +import re +from pathlib import Path + +PROBLEMATIC_PATTERNS = [ + r'Path\("(?:manual_slop|config|credentials|presets|personas|tool_presets|workspace_profiles)\.toml"\)', + r'open\(["\'](?:manual_slop|config|credentials|presets|personas|tool_presets|workspace_profiles)\.toml["\']', + r'["\']\.{1,2}/(?:manual_slop|config|credentials|presets|personas|tool_presets|workspace_profiles)\.toml["\']', +] + +def find_violations(tests_dir: Path) -> List[Tuple[Path, int, str]]: + """Returns list of (file, line, pattern) for each violation.""" + ... +``` + +Run this script as the first step. Output a report grouped by file. + +### Phase 2: Migrate Offenders + +For each violation, refactor the test to use the sandboxed pattern: + +**Before (real TOML):** +```python +def test_load_presets(): + path = Path("presets.toml") # Real file! + if path.exists(): + data = tomllib.loads(path.read_text()) + assert data is not None +``` + +**After (sandboxed):** +```python +def test_load_presets(tmp_path): + path = tmp_path / "presets.toml" + path.write_text("[presets.test]\nkey = 'value'\n") + # Patch the path module to point to tmp_path + monkeypatch = pytest.MonkeyPatch() + monkeypatch.setattr("src.paths.get_global_presets_path", lambda: path) + data = tomllib.loads(path.read_text()) + assert data["presets"]["test"]["key"] == "value" + monkeypatch.undo() +``` + +Or use the `isolate_workspace` autouse fixture (already in conftest.py) which redirects all path resolution to `tmp_path`. + +### Phase 3: Consolidate (Judgment Call) + +Examples of consolidation opportunities (NOT a forced refactor): + +| Current | Proposed | Rationale | +|---|---|---| +| `test_ai_settings_layout.py` + `test_sim_ai_settings.py` | `test_ai_settings.py` with parametrize | Tests cover same surface | +| `test_*_provider.py` (5+ files) | `test_providers.py` parametrized | Each provider test has same shape | +| `test_*_preset*.py` (3 files) | `test_presets.py` with class organization | Settings/presets/tools all CRUD TOML | +| `test_*_screenshot*.py` | `test_screenshots.py` | Currently fragmented | + +Each consolidation is reviewed case-by-case. **Test count is not a goal; test clarity is.** Don't merge tests that test different things just to reduce file count. + +### Phase 4: Enforce + +**4a. Autouse fixture** in `tests/conftest.py`: + +```python +@pytest.fixture(autouse=True) +def enforce_no_real_toml(monkeypatch, tmp_path): + """Prevents any test from reading ./.toml by detecting file existence + and asserting the path is inside tmp_path or explicitly monkeypatched.""" + + real_toml_paths = [ + Path("manual_slop.toml"), + Path("config.toml"), + Path("credentials.toml"), + Path("presets.toml"), + Path("personas.toml"), + Path("tool_presets.toml"), + Path("workspace_profiles.toml"), + ] + + # If any real TOML exists in the cwd, save it for restoration + snapshots = {} + for p in real_toml_paths: + if p.exists(): + snapshots[p] = p.read_bytes() + p.unlink() # Remove to prevent test from reading + yield # Run the test + # Restore after test + for p, content in snapshots.items(): + p.write_bytes(content) +``` + +This is **strict** — any test that tries to read a real TOML will get FileNotFoundError. Tests must use `tmp_path` or `monkeypatch`. + +If this is too aggressive, a softer alternative: + +```python +@pytest.fixture(autouse=True) +def warn_on_real_toml(): + """Warns if a test reads a real TOML. Does not fail by default; + set ENFORCE_NO_REAL_TOML=1 to convert warnings to failures.""" + ... +``` + +**4b. CI script** `scripts/check_test_toml_paths.py` — runs on every commit: + +```python +# Greps for direct ./.toml references +# Exits non-zero if any found +# Output: "test_foo.py:42: Path('presets.toml') — direct reference to real TOML" +``` + +Add to `conductor/...` workflow or as a pre-commit hook (out of scope for this track — just provide the script). + +### Phase 5: Test the Enforcer + +`tests/test_enforce_no_real_toml.py` — meta-test: + +```python +def test_enforcer_catches_violation(tmp_path, monkeypatch): + """Verify the fixture prevents reading a real TOML.""" + # Create a real-looking TOML in cwd + real_path = Path("test_enforcer_temp.toml") + real_path.write_text("[test]\nkey='value'") + try: + # The fixture removes it; try to read it + with pytest.raises(FileNotFoundError): + real_path.read_text() + finally: + if real_path.exists(): + real_path.unlink() + +def test_enforcer_restores_real_tomls(tmp_path): + """Verify the fixture restores real TOMLs after the test.""" + real_path = Path("test_enforcer_temp2.toml") + original = b"[test]\nkey='original'" + real_path.write_bytes(original) + # The test runs (fixture activates) + assert real_path.exists() # The fixture restored it + assert real_path.read_bytes() == original + real_path.unlink() +``` + +--- + +## File Structure + +- `scripts/check_test_toml_paths.py` — NEW: greps for violations, exits non-zero +- `tests/conftest.py` — MODIFY: add `enforce_no_real_toml` autouse fixture (strict or warn-only) +- `tests/test_enforce_no_real_toml.py` — NEW: tests for the enforcer +- Various `tests/test_*.py` — MODIFY: migrate offenders to sandboxed pattern +- Various `tests/test_*.py` — MODIFY: consolidate where it improves clarity + +--- + +## Acceptance Criteria + +- All existing tests pass after migration +- `scripts/check_test_toml_paths.py` exits 0 on the test suite after migration +- The autouse fixture catches new violations in CI +- Test count is approximately the same after consolidation (slight decrease acceptable) +- No real TOML files in the user's project are touched by the test suite + +--- + +## Risks + +1. **Test breakage:** Migration may break tests that depend on real-file behavior. Mitigation: run full test suite after each migration batch. +2. **Performance:** The autouse fixture adds overhead to every test. Mitigation: keep it cheap (just snapshot/restore file existence). +3. **Coverage regression:** Removing real-file behavior may hide bugs. Mitigation: add explicit tests for the sandboxed path resolution.